[PATCH 2/2] lib/database.cc: change how the parent of a message is calculated

Jani Nikula jani at nikula.org
Sat May 4 09:24:59 PDT 2013


LGTM

On Wed, 06 Mar 2013, Aaron Ecay <aaronecay at gmail.com> wrote:
> Presently, the code which finds the parent of a message as it is being
> added to the database assumes that the first Message-ID-like substring
> of the In-Reply-To header is the parent Message ID.  Some mail clients,
> however, put stuff other than the Message-ID of the parent in the
> In-Reply-To header, such as the email address of the sender of the
> parent.  This can fool notmuch.
>
> The updated algorithm prefers the last Message ID in the References
> header.  The References header lists messages oldest-first, so the last
> Message ID is the parent (RFC2822, p. 24).  The References header is
> also less likely to be in a non-standard
> syntax (http://cr.yp.to/immhf/thread.html,
> http://www.jwz.org/doc/threading.html).  In case the References header
> is not to be found, fall back to the old behavior.
>
> V2 of this patch, incorporating feedback from Jani and (indirectly)
> Austin.
> ---
>  lib/database.cc     | 48 +++++++++++++++++++++++++++++++++---------------
>  test/thread-replies |  4 ----
>  2 files changed, 33 insertions(+), 19 deletions(-)
>
> diff --git a/lib/database.cc b/lib/database.cc
> index 91d4329..52ed618 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -501,8 +501,10 @@ _parse_message_id (void *ctx, const char *message_id, const char **next)
>   * 'message_id' in the result (to avoid mass confusion when a single
>   * message references itself cyclically---and yes, mail messages are
>   * not infrequent in the wild that do this---don't ask me why).
> -*/
> -static void
> + *
> + * Return the last reference parsed, if it is not equal to message_id.
> + */
> +static char *
>  parse_references (void *ctx,
>  		  const char *message_id,
>  		  GHashTable *hash,
> @@ -511,7 +513,7 @@ parse_references (void *ctx,
>      char *ref;
>  
>      if (refs == NULL || *refs == '\0')
> -	return;
> +	return NULL;
>  
>      while (*refs) {
>  	ref = _parse_message_id (ctx, refs, &refs);
> @@ -519,6 +521,17 @@ parse_references (void *ctx,
>  	if (ref && strcmp (ref, message_id))
>  	    g_hash_table_insert (hash, ref, NULL);
>      }
> +
> +    /* The return value of this function is used to add a parent
> +     * reference to the database.  We should avoid making a message
> +     * its own parent, thus the following check.
> +     */
> +
> +    if (ref && strcmp(ref, message_id)) {
> +	return ref;
> +    } else {
> +	return NULL;
> +    }
>  }
>  
>  notmuch_status_t
> @@ -1510,28 +1523,33 @@ _notmuch_database_link_message_to_parents (notmuch_database_t *notmuch,
>  {
>      GHashTable *parents = NULL;
>      const char *refs, *in_reply_to, *in_reply_to_message_id;
> +    const char *last_ref_message_id, *this_message_id;
>      GList *l, *keys = NULL;
>      notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
>  
>      parents = g_hash_table_new_full (g_str_hash, g_str_equal,
>  				     _my_talloc_free_for_g_hash, NULL);
> +    this_message_id = notmuch_message_get_message_id (message);
>  
>      refs = notmuch_message_file_get_header (message_file, "references");
> -    parse_references (message, notmuch_message_get_message_id (message),
> -		      parents, refs);
> +    last_ref_message_id = parse_references (message,
> +					    this_message_id,
> +					    parents, refs);
>  
>      in_reply_to = notmuch_message_file_get_header (message_file, "in-reply-to");
> -    parse_references (message, notmuch_message_get_message_id (message),
> -		      parents, in_reply_to);
> -
> -    /* Carefully avoid adding any self-referential in-reply-to term. */
> -    in_reply_to_message_id = _parse_message_id (message, in_reply_to, NULL);
> -    if (in_reply_to_message_id &&
> -	strcmp (in_reply_to_message_id,
> -		notmuch_message_get_message_id (message)))
> -    {
> +    in_reply_to_message_id = parse_references (message,
> +					       this_message_id,
> +					       parents, in_reply_to);
> +
> +    /* For the parent of this message, use the last message ID of the
> +     * References header, if available.  If not, fall back to the
> +     * first message ID in the In-Reply-To header. */
> +    if (last_ref_message_id) {
> +        _notmuch_message_add_term (message, "replyto",
> +                                   last_ref_message_id);
> +    } else if (in_reply_to_message_id) {
>  	_notmuch_message_add_term (message, "replyto",
> -			     _parse_message_id (message, in_reply_to, NULL));
> +			     in_reply_to_message_id);
>      }
>  
>      keys = g_hash_table_get_keys (parents);
> diff --git a/test/thread-replies b/test/thread-replies
> index a902691..28c2b1f 100755
> --- a/test/thread-replies
> +++ b/test/thread-replies
> @@ -11,7 +11,6 @@ constructed properly, even in the presence of non-RFC-compliant headers'
>  . ./test-lib.sh
>  
>  test_begin_subtest "Use References when In-Reply-To is broken"
> -test_subtest_known_broken
>  add_message '[id]="foo at one.com"' \
>      '[subject]=one'
>  add_message '[in-reply-to]="mumble"' \
> @@ -46,7 +45,6 @@ expected=`echo "$expected" | notmuch_json_show_sanitize`
>  test_expect_equal_json "$output" "$expected"
>  
>  test_begin_subtest "Prefer References to In-Reply-To"
> -test_subtest_known_broken
>  add_message '[id]="foo at two.com"' \
>      '[subject]=two'
>  add_message '[in-reply-to]="<bar at baz.com>"' \
> @@ -77,7 +75,6 @@ expected=`echo "$expected" | notmuch_json_show_sanitize`
>  test_expect_equal_json "$output" "$expected"
>  
>  test_begin_subtest "Use In-Reply-To when no References"
> -test_subtest_known_broken
>  add_message '[id]="foo at three.com"' \
>      '[subject]="three"'
>  add_message '[in-reply-to]="<foo at three.com>"' \
> @@ -104,7 +101,6 @@ expected=`echo "$expected" | notmuch_json_show_sanitize`
>  test_expect_equal_json "$output" "$expected"
>  
>  test_begin_subtest "Use last Reference"
> -test_subtest_known_broken
>  add_message '[id]="foo at four.com"' \
>      '[subject]="four"'
>  add_message '[id]="bar at four.com"' \
> -- 
> 1.8.1.5
>
> _______________________________________________
> notmuch mailing list
> notmuch at notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch


More information about the notmuch mailing list