[WIP2 5/5] lib: special case parenthesized subqueries in regex fields

David Bremner david at tethera.net
Sat Aug 31 18:37:48 PDT 2019


These should not be treated as phrase searches. Due to the problem
outlined in [1] it is not possible to use a query string

         subject:(foo bar)

but at least with this fix

         subject:"(foo bar)"

expands to subject:foo OP subject:bar, which is more useful than
another way to type a phrase search.

[1]: https://trac.xapian.org/ticket/795
---
 lib/regexp-fields.cc            | 19 +++++++++++++++++++
 test/T650-regexp-query.sh       |  4 ----
 test/T760-implicit-operators.sh |  7 +++----
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 198eb32f..3f92b2ab 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -189,6 +189,25 @@ RegexpFieldProcessor::operator() (const std::string & str)
 	} else {
 	    throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
 	}
+    }	else if (str.at (0) == '(') {
+	if (str.length () > 1 && str.at (str.size () - 1) == ')') {
+	    std::string subexp_str = str.substr (1, str.size () - 2);
+	    Xapian::Query query = parser.parse_query (subexp_str,
+						      NOTMUCH_QUERY_PARSER_FLAGS,
+						      term_prefix);
+
+	    if (_notmuch_query_debug ()) {
+		fprintf (stderr, "subquery string [prefix=%s]:\n%s\n",
+			 term_prefix.c_str (),
+			 subexp_str.c_str ());
+		fprintf (stderr, "parsed subquery:\n%s\n",
+		     query.get_description ().c_str ());
+	    }
+
+	    return query;
+	} else {
+	    throw Xapian::QueryParserError ("unmatched '(' in '" + str + "'");
+	}
     } else {
 	if (options & NOTMUCH_FIELD_PROBABILISTIC) {
 	    /* TODO replace this with a nicer API level triggering of
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
index 05a04791..d9ffd2e7 100755
--- a/test/T650-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -183,25 +183,21 @@ notmuch search tag:/^si/ > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest "parenthesized subqueries are not phrases"
-test_subtest_known_broken
 notmuch search subject:notmuch AND subject:outputs > EXPECTED
 notmuch search 'subject:"(notmuch outputs)"' > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest "parenthesized subqueries work with OR"
-test_subtest_known_broken
 notmuch search subject:notmuch OR subject:outputs > EXPECTED
 notmuch search 'subject:"(notmuch OR outputs)"' > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest "parenthesized subqueries work with AND"
-test_subtest_known_broken
 notmuch search subject:notmuch AND subject:outputs > EXPECTED
 notmuch search 'subject:"(notmuch AND outputs)"' > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest "parenthesized subqueries work with overidden prefix"
-test_subtest_known_broken
 notmuch search subject:help AND subject:outputs AND to:notmuch > EXPECTED
 notmuch search 'subject:"(outputs help to:notmuch)"' > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
diff --git a/test/T760-implicit-operators.sh b/test/T760-implicit-operators.sh
index 118a9de2..1eab4ec9 100755
--- a/test/T760-implicit-operators.sh
+++ b/test/T760-implicit-operators.sh
@@ -27,7 +27,7 @@ test_prob() {
 # regex search
 
 test_prob_regex() {
-    test_prob $1 $2
+    test_prob $1
 
     test_begin_subtest "regex for '$1': matches"
     notmuch search --output=messages id:$1 at one > EXPECTED
@@ -44,7 +44,6 @@ test_prob_regex() {
     test_expect_equal_file EXPECTED OUTPUT
 
     test_begin_subtest "subquery for '$1' implicit AND"
-    $2
     notmuch search --output=messages id:$1 at one > EXPECTED
     notmuch search --output=messages "$1:\"(agent alice)\"" > OUTPUT
     test_expect_equal_file EXPECTED OUTPUT
@@ -84,8 +83,8 @@ add_email_corpus
 
 test_prob_not_regex to
 test_prob_not_regex body
-test_prob_regex from test_subtest_known_broken
-test_prob_regex subject test_subtest_known_broken
+test_prob_regex from
+test_prob_regex subject
 
 test_bool_exclusive id id
 test_bool_exclusive_regex mid id
-- 
2.23.0.rc1



More information about the notmuch mailing list