[patch v3 2/2] lib: Add regexp expansion for for tags and paths
David Bremner
david at tethera.net
Thu Mar 30 03:30:13 PDT 2017
>From a ui perspective this looks similar to what was already provided
for from, subject, and mid, but the implimentation is quite
different. It uses the database's list of terms to construct a term
based query equivalent to the passed regular expression.
---
lib/database.cc | 12 ++++++----
lib/regexp-fields.cc | 32 +++++++++++++++++++++-----
test/T650-regexp-query.sh | 58 ++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 89 insertions(+), 13 deletions(-)
diff --git a/lib/database.cc b/lib/database.cc
index 49b3849c..5b13f541 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -259,12 +259,15 @@ prefix_t prefix_table[] = {
{ "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
{ "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
{ "thread", "G", NOTMUCH_FIELD_EXTERNAL },
- { "tag", "K", NOTMUCH_FIELD_EXTERNAL },
- { "is", "K", NOTMUCH_FIELD_EXTERNAL },
+ { "tag", "K", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
+ { "is", "K", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
{ "id", "Q", NOTMUCH_FIELD_EXTERNAL },
{ "mid", "Q", NOTMUCH_FIELD_EXTERNAL |
NOTMUCH_FIELD_PROCESSOR },
- { "path", "P", NOTMUCH_FIELD_EXTERNAL },
+ { "path", "P", NOTMUCH_FIELD_EXTERNAL|
+ NOTMUCH_FIELD_PROCESSOR },
{ "property", "XPROPERTY", NOTMUCH_FIELD_EXTERNAL },
/*
* Unconditionally add ':' to reduce potential ambiguity with
@@ -272,7 +275,8 @@ prefix_t prefix_table[] = {
* letters. See Xapian document termprefixes.html for related
* discussion.
*/
- { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL },
+ { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
#if HAVE_XAPIAN_FIELD_PROCESSOR
{ "date", NULL, NOTMUCH_FIELD_EXTERNAL |
NOTMUCH_FIELD_PROCESSOR },
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 7ae55e70..084bc8c0 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix)
else if (prefix == "mid")
return NOTMUCH_VALUE_MESSAGE_ID;
else
- throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
+ return Xapian::BAD_VALUENO;
}
RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
@@ -156,15 +156,35 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
Xapian::Query
RegexpFieldProcessor::operator() (const std::string & str)
{
- if (str.size () == 0)
- return Xapian::Query(Xapian::Query::OP_AND_NOT,
+ if (str.empty ()) {
+ if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+ return Xapian::Query(Xapian::Query::OP_AND_NOT,
Xapian::Query::MatchAll,
Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
+ } else {
+ return Xapian::Query (term_prefix);
+ }
+ }
if (str.at (0) == '/') {
- if (str.at (str.size () - 1) == '/'){
- RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
- return Xapian::Query (postings->release ());
+ if (str.length() > 1 && str.at (str.size () - 1) == '/'){
+ std::string regexp_str = str.substr(1,str.size () - 2);
+ if (slot != Xapian::BAD_VALUENO) {
+ RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+ return Xapian::Query (postings->release ());
+ } else {
+ std::vector<std::string> terms;
+ regex_t regexp;
+
+ compile_regex(regexp, regexp_str.c_str ());
+ for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+ it != notmuch->xapian_db->allterms_end (); ++it) {
+ if (regexec (®exp, (*it).c_str () + term_prefix.size(),
+ 0, NULL, 0) == 0)
+ terms.push_back(*it);
+ }
+ return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end());
+ }
} else {
throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
}
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
index 27fc9ab9..b7bdda11 100755
--- a/test/T650-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -2,13 +2,54 @@
test_description='regular expression searches'
. ./test-lib.sh || exit 1
-add_email_corpus
-
-
if [ $NOTMUCH_HAVE_XAPIAN_FIELD_PROCESSOR -eq 0 ]; then
test_done
fi
+add_message '[dir]=bad' '[subject]="To the bone"'
+add_message '[dir]=.' '[subject]="Top level"'
+add_message '[dir]=bad/news' '[subject]="Bears"'
+mkdir -p "${MAIL_DIR}/duplicate/bad/news"
+cp "$gen_msg_filename" "${MAIL_DIR}/duplicate/bad/news"
+
+add_message '[dir]=things' '[subject]="These are a few"'
+add_message '[dir]=things/favorite' '[subject]="Raindrops, whiskers, kettles"'
+add_message '[dir]=things/bad' '[subject]="Bites, stings, sad feelings"'
+
+test_begin_subtest "empty path:// search"
+notmuch search 'path:""' > EXPECTED
+notmuch search 'path:/^$/' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty folder:// search"
+notmuch search 'folder:""' > EXPECTED
+notmuch search 'folder:/^$/' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "unanchored folder:// specification"
+output=$(notmuch search folder:/bad/ | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
+
+test_begin_subtest "anchored folder:// search"
+output=$(notmuch search 'folder:/^bad$/' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)"
+
+test_begin_subtest "unanchored path:// specification"
+output=$(notmuch search path:/bad/ | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bears (inbox unread)
+thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; Bites, stings, sad feelings (inbox unread)"
+
+test_begin_subtest "anchored path:// search"
+output=$(notmuch search 'path:/^bad$/' | notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX 2001-01-05 [1/1] Notmuch Test Suite; To the bone (inbox unread)"
+
+# Use "standard" corpus from here on.
+rm -rf $MAIL_DIR
+add_email_corpus
+
notmuch search --output=messages from:cworth > cworth.msg-ids
# these headers will generate no document terms
@@ -120,4 +161,15 @@ thread:XXX 2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older ver
EOF
test_expect_equal_file EXPECTED OUTPUT
+test_begin_subtest "unanchored tag search"
+notmuch search tag:signed or tag:inbox > EXPECTED
+notmuch search tag:/i/ > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+notmuch tag +testsi '*'
+test_begin_subtest "anchored tag search"
+notmuch search tag:signed > EXPECTED
+notmuch search tag:/^si/ > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
test_done
--
2.11.0
More information about the notmuch
mailing list