[PATCH] WIP: regexp matching in subjects
David Bremner
david at tethera.net
Mon Jun 6 19:05:49 PDT 2016
the idea is that you can run
% notmuch search 'subject:rx:<your-favourite-regexp>'
or
% notmuch search subject:"your usual phrase search"
This should also work with bindings.
---
Here is Austin's "hack", crammed into the field processor framework.
I seem to have broken one of the existing subject search tests with my
recursive query parsing. I didn't have time to figure out why, yet.
lib/Makefile.local | 2 ++
lib/database-private.h | 1 +
lib/database.cc | 5 +++
lib/regexp-ps.cc | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
lib/regexp-ps.h | 37 ++++++++++++++++++++
lib/subject-fp.cc | 41 ++++++++++++++++++++++
lib/subject-fp.h | 43 +++++++++++++++++++++++
7 files changed, 221 insertions(+)
create mode 100644 lib/regexp-ps.cc
create mode 100644 lib/regexp-ps.h
create mode 100644 lib/subject-fp.cc
create mode 100644 lib/subject-fp.h
diff --git a/lib/Makefile.local b/lib/Makefile.local
index beb9635..0e7311f 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -51,6 +51,8 @@ libnotmuch_cxx_srcs = \
$(dir)/query.cc \
$(dir)/query-fp.cc \
$(dir)/config.cc \
+ $(dir)/regexp-ps.cc \
+ $(dir)/subject-fp.cc \
$(dir)/thread.cc
libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o)
diff --git a/lib/database-private.h b/lib/database-private.h
index ca71a92..5de0b81 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -186,6 +186,7 @@ struct _notmuch_database {
#if HAVE_XAPIAN_FIELD_PROCESSOR
Xapian::FieldProcessor *date_field_processor;
Xapian::FieldProcessor *query_field_processor;
+ Xapian::FieldProcessor *subject_field_processor;
#endif
Xapian::ValueRangeProcessor *last_mod_range_processor;
};
diff --git a/lib/database.cc b/lib/database.cc
index 86bf261..adfbb81 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -21,6 +21,7 @@
#include "database-private.h"
#include "parse-time-vrp.h"
#include "query-fp.h"
+#include "subject-fp.h"
#include "string-util.h"
#include <iostream>
@@ -1008,6 +1009,8 @@ notmuch_database_open_verbose (const char *path,
notmuch->query_parser->add_boolean_prefix("date", notmuch->date_field_processor);
notmuch->query_field_processor = new QueryFieldProcessor (*notmuch->query_parser, notmuch);
notmuch->query_parser->add_boolean_prefix("query", notmuch->query_field_processor);
+ notmuch->subject_field_processor = new SubjectFieldProcessor (*notmuch->query_parser, notmuch);
+ notmuch->query_parser->add_boolean_prefix("subject", notmuch->subject_field_processor);
#endif
notmuch->last_mod_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, "lastmod:");
@@ -1027,6 +1030,8 @@ notmuch_database_open_verbose (const char *path,
for (i = 0; i < ARRAY_SIZE (PROBABILISTIC_PREFIX); i++) {
prefix_t *prefix = &PROBABILISTIC_PREFIX[i];
+ if (strcmp (prefix->name, "subject") == 0)
+ continue;
notmuch->query_parser->add_prefix (prefix->name, prefix->prefix);
}
} catch (const Xapian::Error &error) {
diff --git a/lib/regexp-ps.cc b/lib/regexp-ps.cc
new file mode 100644
index 0000000..540c7d6
--- /dev/null
+++ b/lib/regexp-ps.cc
@@ -0,0 +1,92 @@
+/* query-fp.cc - "query:" field processor glue
+ *
+ * This file is part of notmuch.
+ *
+ * Copyright © 2016 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: Austin Clements <aclements at csail.mit.edu>
+ * David Bremner <david at tethera.net>
+ */
+
+#include "regexp-ps.h"
+
+RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const std::string ®exp)
+ : slot_ (slot)
+{
+ int r = regcomp (®exp_, regexp.c_str (), REG_EXTENDED | REG_NOSUB);
+
+ if (r != 0)
+ /* XXX Report a query syntax error using regerror */
+ throw "regcomp failed";
+}
+
+RegexpPostingSource::~RegexpPostingSource ()
+{
+ regfree (®exp_);
+}
+
+void
+RegexpPostingSource::init (const Xapian::Database &db)
+{
+ db_ = db;
+ it_ = db_.valuestream_begin (slot_);
+ end_ = db.valuestream_end (slot_);
+ started_ = false;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_min () const
+{
+ return 0;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_est () const
+{
+ return get_termfreq_max () / 2;
+}
+
+Xapian::doccount
+RegexpPostingSource::get_termfreq_max () const
+{
+ return db_.get_value_freq (slot_);
+}
+
+Xapian::docid
+RegexpPostingSource::get_docid () const
+{
+ return it_.get_docid ();
+}
+
+bool
+RegexpPostingSource::at_end () const
+{
+ return it_ == end_;
+}
+
+void
+RegexpPostingSource::next (unused (double min_wt))
+{
+ if (started_ && ! at_end ())
+ ++it_;
+ started_ = true;
+
+ for (; ! at_end (); ++it_) {
+ std::string value = *it_;
+ if (regexec (®exp_, value.c_str (), 0, NULL, 0) == 0)
+ break;
+ }
+}
diff --git a/lib/regexp-ps.h b/lib/regexp-ps.h
new file mode 100644
index 0000000..a4553a7
--- /dev/null
+++ b/lib/regexp-ps.h
@@ -0,0 +1,37 @@
+#ifndef NOTMUCH_REGEX_PS_H
+#define NOTMUCH_REGEX_PS_H
+
+#include <sys/types.h>
+#include <regex.h>
+#include <xapian.h>
+#include "notmuch-private.h"
+
+/* A posting source that returns documents where a value matches a
+ * regexp.
+ */
+class RegexpPostingSource : public Xapian::PostingSource
+{
+protected:
+const Xapian::valueno slot_;
+regex_t regexp_;
+Xapian::Database db_;
+bool started_;
+Xapian::ValueIterator it_, end_;
+
+/* No copying */
+RegexpPostingSource (const RegexpPostingSource &);
+RegexpPostingSource &operator= (const RegexpPostingSource &);
+
+public:
+ RegexpPostingSource (Xapian::valueno slot, const std::string ®exp);
+~RegexpPostingSource ();
+void init (const Xapian::Database &db);
+Xapian::doccount get_termfreq_min () const;
+Xapian::doccount get_termfreq_est () const;
+Xapian::doccount get_termfreq_max () const;
+Xapian::docid get_docid () const;
+bool at_end () const;
+void next (unused (double min_wt));
+};
+
+#endif
diff --git a/lib/subject-fp.cc b/lib/subject-fp.cc
new file mode 100644
index 0000000..1627721
--- /dev/null
+++ b/lib/subject-fp.cc
@@ -0,0 +1,41 @@
+/* subject-fp.cc - "subject:" field processor glue
+ *
+ * This file is part of notmuch.
+ *
+ * Copyright © 2016 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david at tethera.net>
+ */
+
+#include "database-private.h"
+#include "subject-fp.h"
+#include <iostream>
+
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+
+Xapian::Query
+SubjectFieldProcessor::operator() (const std::string & str)
+{
+ std::string prefix = "rx:";
+
+ if (str.compare(0,prefix.size(),prefix)==0) {
+ postings = new RegexpPostingSource(NOTMUCH_VALUE_SUBJECT, str.substr(prefix.size()));
+ return Xapian::Query(postings);
+ } else {
+ return parser.parse_query (str, NOTMUCH_QUERY_PARSER_FLAGS, _find_prefix ("subject"));
+ }
+}
+#endif
diff --git a/lib/subject-fp.h b/lib/subject-fp.h
new file mode 100644
index 0000000..ca622ba
--- /dev/null
+++ b/lib/subject-fp.h
@@ -0,0 +1,43 @@
+/* subject-fp.h - subject field processor glue
+ *
+ * This file is part of notmuch.
+ *
+ * Copyright © 2016 David Bremner
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see https://www.gnu.org/licenses/ .
+ *
+ * Author: David Bremner <david at tethera.net>
+ */
+
+#ifndef NOTMUCH_SUBJECT_FP_H
+#define NOTMUCH_SUBJECT_FP_H
+
+#include <xapian.h>
+#include "notmuch.h"
+#include "regexp-ps.h"
+
+#if HAVE_XAPIAN_FIELD_PROCESSOR
+class SubjectFieldProcessor : public Xapian::FieldProcessor {
+ protected:
+ Xapian::QueryParser &parser;
+ notmuch_database_t *notmuch;
+ RegexpPostingSource *postings = NULL;
+ public:
+ SubjectFieldProcessor (Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
+ : parser(parser_), notmuch(notmuch_) { };
+
+ Xapian::Query operator()(const std::string & str);
+};
+#endif
+#endif /* NOTMUCH_SUBJECT_FP_H */
--
2.8.1
More information about the notmuch
mailing list