[PATCH 08/11] search: Add stable queries to thread search results

Jani Nikula jani at nikula.org
Wed Oct 9 00:41:17 PDT 2013


On Tue, 08 Oct 2013, Austin Clements <amdragon at MIT.EDU> wrote:
> These queries will match exactly the set of messages currently in the
> thread, even if more messages later arrive.  Two queries are provided:
> one for matched messages and one for unmatched messages.
>
> This can be used to fix race conditions with tagging threads from
> search results.  While tagging based on a thread: query can affect
> messages that arrived after the search, tagging based on stable
> queries affects only the messages the user was shown in the search UI.
>
> Since we want clients to be able to depend on the presence of these
> queries, this ushers in schema version 2.
> ---
>  devel/schemata       | 22 +++++++++++++++++--
>  notmuch-client.h     |  2 +-
>  notmuch-search.c     | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  test/json            |  2 ++
>  test/missing-headers |  6 ++++--
>  test/sexp            |  4 ++--
>  6 files changed, 89 insertions(+), 7 deletions(-)
>
> diff --git a/devel/schemata b/devel/schemata
> index cdd0e43..41dc4a6 100644
> --- a/devel/schemata
> +++ b/devel/schemata
> @@ -14,7 +14,17 @@ are interleaved. Keys are printed as keywords (symbols preceded by a
>  colon), e.g. (:id "123" :time 54321 :from "foobar"). Null is printed as
>  nil, true as t and false as nil.
>  
> -This is version 1 of the structured output format.
> +This is version 2 of the structured output format.
> +
> +Version history
> +---------------
> +
> +v1
> +- First versioned schema release.
> +- Added part.content-length and part.content-transfer-encoding fields.
> +
> +v2
> +- Added the thread_summary.query field.
>  
>  Common non-terminals
>  --------------------
> @@ -145,7 +155,15 @@ thread_summary = {
>      authors:        string,   # comma-separated names with | between
>                                # matched and unmatched
>      subject:        string,
> -    tags:           [string*]
> +    tags:           [string*],
> +
> +    # Two stable query strings identifying exactly the matched and
> +    # unmatched messages currently in this thread.  The messages
> +    # matched by these queries will not change even if more messages
> +    # arrive in the thread.  If there are no matched or unmatched
> +    # messages, the corresponding query will be null (there is no
> +    # query that matches nothing).  (Added in schema version 2.)
> +    query:          [string|null, string|null],
>  }
>  
>  notmuch reply schema
> diff --git a/notmuch-client.h b/notmuch-client.h
> index 8d986f4..1b14910 100644
> --- a/notmuch-client.h
> +++ b/notmuch-client.h
> @@ -138,7 +138,7 @@ chomp_newline (char *str)
>   * this.  New (required) map fields can be added without increasing
>   * this.
>   */
> -#define NOTMUCH_FORMAT_CUR 1
> +#define NOTMUCH_FORMAT_CUR 2
>  /* The minimum supported structured output format version.  Requests
>   * for format versions below this will return an error. */
>  #define NOTMUCH_FORMAT_MIN 1
> diff --git a/notmuch-search.c b/notmuch-search.c
> index d9d39ec..1d14651 100644
> --- a/notmuch-search.c
> +++ b/notmuch-search.c
> @@ -20,6 +20,7 @@
>  
>  #include "notmuch-client.h"
>  #include "sprinter.h"
> +#include "string-util.h"
>  
>  typedef enum {
>      OUTPUT_SUMMARY,
> @@ -46,6 +47,46 @@ sanitize_string (const void *ctx, const char *str)
>      return out;
>  }
>  
> +/* Return two stable query strings that identify exactly the matched
> + * and unmatched messages currently in thread.  If there are no
> + * matched or unmatched messages, the returned buffers will be
> + * NULL. */
> +static int
> +get_thread_query (notmuch_thread_t *thread,
> +		  char **matched_out, char **unmached_out)
> +{
> +    notmuch_messages_t *messages;
> +    char *escaped = NULL;
> +    size_t escaped_len = 0;
> +
> +    *matched_out = *unmached_out = NULL;
> +
> +    for (messages = notmuch_thread_get_messages (thread);
> +	 notmuch_messages_valid (messages);
> +	 notmuch_messages_move_to_next (messages))
> +    {
> +	notmuch_message_t *message = notmuch_messages_get (messages);
> +	const char *mid = notmuch_message_get_message_id (message);
> +	/* Determine which query buffer to extend */
> +	char **buf = notmuch_message_get_flag (
> +	    message, NOTMUCH_MESSAGE_FLAG_MATCH) ? matched_out : unmached_out;
> +	/* Allocate the query buffer is this is the first message */
> +	if (!*buf && (*buf = talloc_strdup (thread, "")) == NULL)
> +	    return -1;

I think it would improve clarity if you dropped the above...

> +	/* Add this message's id: query.  Since "id" is an exclusive
> +	 * prefix, it is implicitly 'or'd together, so we only need to
> +	 * join queries with a space. */
> +	if (make_boolean_term (thread, "id", mid, &escaped, &escaped_len) < 0)
> +	    return -1;
> +	*buf = talloc_asprintf_append_buffer (
> +	    *buf, "%s%s", **buf ? " " : "", escaped);

...and turned this into:

	if (*buf)
	    *buf = talloc_asprintf_append_buffer (*buf, " %s", escaped);
	else
	    *buf = talloc_strdup (thread, escaped);

Also one talloc less. Which brings me to the main worry:
performance. What's the impact?

BR,
Jani.


> +	if (!*buf)
> +	    return -1;
> +    }
> +    talloc_free (escaped);
> +    return 0;
> +}
> +
>  static int
>  do_search_threads (sprinter_t *format,
>  		   notmuch_query_t *query,
> @@ -131,6 +172,25 @@ do_search_threads (sprinter_t *format,
>  		format->string (format, authors);
>  		format->map_key (format, "subject");
>  		format->string (format, subject);
> +		if (notmuch_format_version >= 2) {
> +		    char *matched_query, *unmatched_query;
> +		    if (get_thread_query (thread, &matched_query,
> +					  &unmatched_query) < 0) {
> +			fprintf (stderr, "Out of memory\n");
> +			return 1;
> +		    }
> +		    format->map_key (format, "query");
> +		    format->begin_list (format);
> +		    if (matched_query)
> +			format->string (format, matched_query);
> +		    else
> +			format->null (format);
> +		    if (unmatched_query)
> +			format->string (format, unmatched_query);
> +		    else
> +			format->null (format);
> +		    format->end (format);
> +		}
>  	    }
>  
>  	    talloc_free (ctx_quote);
> diff --git a/test/json b/test/json
> index b87b7f6..e07a290 100755
> --- a/test/json
> +++ b/test/json
> @@ -26,6 +26,7 @@ test_expect_equal_json "$output" "[{\"thread\": \"XXX\",
>   \"total\": 1,
>   \"authors\": \"Notmuch Test Suite\",
>   \"subject\": \"json-search-subject\",
> + \"query\": [\"id:$gen_msg_id\", null],
>   \"tags\": [\"inbox\",
>   \"unread\"]}]"
>  
> @@ -59,6 +60,7 @@ test_expect_equal_json "$output" "[{\"thread\": \"XXX\",
>   \"total\": 1,
>   \"authors\": \"Notmuch Test Suite\",
>   \"subject\": \"json-search-utf8-body-sübjéct\",
> + \"query\": [\"id:$gen_msg_id\", null],
>   \"tags\": [\"inbox\",
>   \"unread\"]}]"
>  
> diff --git a/test/missing-headers b/test/missing-headers
> index f14b878..43e861b 100755
> --- a/test/missing-headers
> +++ b/test/missing-headers
> @@ -43,7 +43,8 @@ test_expect_equal_json "$output" '
>          ],
>          "thread": "XXX",
>          "timestamp": 978709437,
> -        "total": 1
> +        "total": 1,
> +        "query": ["id:notmuch-sha1-7a6e4eac383ef958fcd3ebf2143db71b8ff01161", null]
>      },
>      {
>          "authors": "Notmuch Test Suite",
> @@ -56,7 +57,8 @@ test_expect_equal_json "$output" '
>          ],
>          "thread": "XXX",
>          "timestamp": 0,
> -        "total": 1
> +        "total": 1,
> +        "query": ["id:notmuch-sha1-ca55943aff7a72baf2ab21fa74fab3d632401334", null]
>      }
>  ]'
>  
> diff --git a/test/sexp b/test/sexp
> index 492a82f..be815e1 100755
> --- a/test/sexp
> +++ b/test/sexp
> @@ -19,7 +19,7 @@ test_expect_equal "$output" "((((:id \"${gen_msg_id}\" :match t :excluded nil :f
>  test_begin_subtest "Search message: sexp"
>  add_message "[subject]=\"sexp-search-subject\"" "[date]=\"Sat, 01 Jan 2000 12:00:00 -0000\"" "[body]=\"sexp-search-message\""
>  output=$(notmuch search --format=sexp "sexp-search-message" | notmuch_search_sanitize)
> -test_expect_equal "$output" "((:thread \"0000000000000002\" :timestamp 946728000 :date_relative \"2000-01-01\" :matched 1 :total 1 :authors \"Notmuch Test Suite\" :subject \"sexp-search-subject\" :tags (\"inbox\" \"unread\")))"
> +test_expect_equal "$output" "((:thread \"0000000000000002\" :timestamp 946728000 :date_relative \"2000-01-01\" :matched 1 :total 1 :authors \"Notmuch Test Suite\" :subject \"sexp-search-subject\" :query (\"id:$gen_msg_id\" nil) :tags (\"inbox\" \"unread\")))"
>  
>  test_begin_subtest "Show message: sexp, utf-8"
>  add_message "[subject]=\"sexp-show-utf8-body-sübjéct\"" "[date]=\"Sat, 01 Jan 2000 12:00:00 -0000\"" "[body]=\"jsön-show-méssage\""
> @@ -44,7 +44,7 @@ test_expect_equal "$output" "((((:id \"$id\" :match t :excluded nil :filename \"
>  test_begin_subtest "Search message: sexp, utf-8"
>  add_message "[subject]=\"sexp-search-utf8-body-sübjéct\"" "[date]=\"Sat, 01 Jan 2000 12:00:00 -0000\"" "[body]=\"jsön-search-méssage\""
>  output=$(notmuch search --format=sexp "jsön-search-méssage" | notmuch_search_sanitize)
> -test_expect_equal "$output" "((:thread \"0000000000000005\" :timestamp 946728000 :date_relative \"2000-01-01\" :matched 1 :total 1 :authors \"Notmuch Test Suite\" :subject \"sexp-search-utf8-body-sübjéct\" :tags (\"inbox\" \"unread\")))"
> +test_expect_equal "$output" "((:thread \"0000000000000005\" :timestamp 946728000 :date_relative \"2000-01-01\" :matched 1 :total 1 :authors \"Notmuch Test Suite\" :subject \"sexp-search-utf8-body-sübjéct\" :query (\"id:$gen_msg_id\" nil) :tags (\"inbox\" \"unread\")))"
>  
>  
>  test_done
> -- 
> 1.8.4.rc3
>
> _______________________________________________
> notmuch mailing list
> notmuch at notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch


More information about the notmuch mailing list