[PATCH v2 08/12] lib: Implement ghost-based thread linking

Austin Clements aclements at csail.mit.edu
Tue Oct 21 18:49:13 PDT 2014


Quoth Mark Walters on Oct 22 at 12:10 am:
> On Tue, 07 Oct 2014, Austin Clements <aclements at csail.mit.edu> wrote:
> > From: Austin Clements <amdragon at mit.edu>
> >
> > This updates the thread linking code to use ghost messages instead of
> > user metadata to link messages into threads.
> >
> > In contrast with the old approach, this is actually correct.
> > Previously, thread merging updated only the thread IDs of message
> > documents, not thread IDs stored in user metadata.  As originally
> > diagnosed by Mark Walters [1] and as demonstrated by the broken
> > T260-thread-order test, this can cause notmuch to fail to link
> > messages even though they're in the same thread.  In principle the old
> > approach could have been fixed by updating the user metadata thread
> > IDs as well, but these are not indexed and hence this would have
> > required a full scan of all stored thread IDs.  Ghost messages solve
> > this problem naturally by reusing the exact same thread ID and message
> > ID representation and indexing as regular messages.
> >
> > Furthermore, thanks to this greater symmetry, ghost messages are also
> > algorithmically simpler.  We continue to support the old user metadata
> > format, so this patch can't delete any code, but when we do remove
> > support for the old format, several functions can simply be deleted.
> >
> > [1] id:8738h7kv2q.fsf at qmul.ac.uk
> > ---
> >  lib/database.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++++--------
> >  1 file changed, 75 insertions(+), 11 deletions(-)
> >
> > diff --git a/lib/database.cc b/lib/database.cc
> > index c641bcd..fdcc526 100644
> > --- a/lib/database.cc
> > +++ b/lib/database.cc
> > @@ -1752,6 +1752,12 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
> >  			    message_id);
> >  }
> >  
> > +static notmuch_status_t
> > +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
> > +				      void *ctx,
> > +				      const char *message_id,
> > +				      const char **thread_id_ret);
> > +
> >  /* Find the thread ID to which the message with 'message_id' belongs.
> >   *
> >   * Note: 'thread_id_ret' must not be NULL!
> > @@ -1760,9 +1766,9 @@ _get_metadata_thread_id_key (void *ctx, const char *message_id)
> >   *
> >   * Note: If there is no message in the database with the given
> >   * 'message_id' then a new thread_id will be allocated for this
> > - * message and stored in the database metadata, (where this same
> > + * message ID and stored in the database metadata so that the
> >   * thread ID can be looked up if the message is added to the database
> > - * later).
> > + * later.
> >   */
> >  static notmuch_status_t
> >  _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
> > @@ -1770,6 +1776,49 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
> >  				  const char *message_id,
> >  				  const char **thread_id_ret)
> >  {
> > +    notmuch_private_status_t status;
> > +    notmuch_message_t *message;
> > +
> > +    if (! (notmuch->features & NOTMUCH_FEATURE_GHOSTS))
> > +	return _resolve_message_id_to_thread_id_old (notmuch, ctx, message_id,
> > +						     thread_id_ret);
> > +
> > +    /* Look for this message (regular or ghost) */
> > +    message = _notmuch_message_create_for_message_id (
> > +	notmuch, message_id, &status);
> > +    if (status == NOTMUCH_PRIVATE_STATUS_SUCCESS) {
> > +	/* Message exists */
> > +	*thread_id_ret = talloc_steal (
> > +	    ctx, notmuch_message_get_thread_id (message));
> > +    } else if (status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
> > +	/* Message did not exist.  Give it a fresh thread ID and
> > +	 * populate this message as a ghost message. */
> > +	*thread_id_ret = talloc_strdup (
> > +	    ctx, _notmuch_database_generate_thread_id (notmuch));
> > +	if (! *thread_id_ret) {
> > +	    status = NOTMUCH_PRIVATE_STATUS_OUT_OF_MEMORY;
> > +	} else {
> > +	    status = _notmuch_message_initialize_ghost (message, *thread_id_ret);
> > +	    if (status == 0)
> > +		/* Commit the new ghost message */
> > +		_notmuch_message_sync (message);
> > +	}
> > +    } else {
> > +	/* Create failed. Fall through. */
> > +    }
> > +
> > +    notmuch_message_destroy (message);
> > +
> > +    return COERCE_STATUS (status, "Error creating ghost message");
> > +}
> > +
> > +/* Pre-ghost messages _resolve_message_id_to_thread_id */
> > +static notmuch_status_t
> > +_resolve_message_id_to_thread_id_old (notmuch_database_t *notmuch,
> > +				      void *ctx,
> > +				      const char *message_id,
> > +				      const char **thread_id_ret)
> > +{
> >      notmuch_status_t status;
> >      notmuch_message_t *message;
> >      string thread_id_string;
> > @@ -2007,7 +2056,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
> >      }
> >  }
> >  
> > -/* Given a (mostly empty) 'message' and its corresponding
> > +/* Given a blank or ghost 'message' and its corresponding
> >   * 'message_file' link it to existing threads in the database.
> >   *
> >   * The first check is in the metadata of the database to see if we
> 
> There is quite a lot of comment below this and it is not clear to me how
> much of it applies to the is_ghost case. In particular does the 
> 
>   * Finally, we look in the database for existing message that
>   * reference 'message'.
> 
> still apply? I would have thought that we would already have done that
> in the is_ghost case. 

Good point.  The comment isn't really wrong, but it isn't right
either.  How about

diff --git a/lib/database.cc b/lib/database.cc
index 6e51a72..d063ec8 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -2121,10 +2121,13 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
 /* Given a blank or ghost 'message' and its corresponding
  * 'message_file' link it to existing threads in the database.
  *
- * The first check is in the metadata of the database to see if we
- * have pre-allocated a thread_id in advance for this message, (which
- * would have happened if a message was previously added that
- * referenced this one).
+ * First, if is_ghost, this retrieves the thread ID already stored in
+ * the message (which will be the case if a message was previously
+ * added that referenced this one).  If the message is blank
+ * (!is_ghost), it doesn't have a thread ID yet (we'll generate one
+ * later in this function).  If the database does not support ghost
+ * messages, this checks for a thread ID stored in database metadata
+ * for this message ID.
  *
  * Second, we look at 'message_file' and its link-relevant headers
  * (References and In-Reply-To) for message IDs.
@@ -2132,7 +2135,7 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
  * Finally, we look in the database for existing message that
  * reference 'message'.
  *
- * In all cases, we assign to the current message the first thread_id
+ * In all cases, we assign to the current message the first thread ID
  * found (through either parent or child). We will also merge any
  * existing, distinct threads where this message belongs to both,
  * (which is not uncommon when messages are processed out of order).

?

> Of course this also applies to the actual code which seems to call
> _notmuch_database_link_message_to_children unconditionally. It might be
> worth a comment in any case.
> 
> Best wishes
> 
> Mark
> 
> 
> > @@ -2035,16 +2084,22 @@ _consume_metadata_thread_id (void *ctx, notmuch_database_t *notmuch,
> >  static notmuch_status_t
> >  _notmuch_database_link_message (notmuch_database_t *notmuch,
> >  				notmuch_message_t *message,
> > -				notmuch_message_file_t *message_file)
> > +				notmuch_message_file_t *message_file,
> > +				notmuch_bool_t is_ghost)
> >  {
> >      void *local = talloc_new (NULL);
> >      notmuch_status_t status;
> > -    const char *thread_id;
> > +    const char *thread_id = NULL;
> >  
> >      /* Check if the message already had a thread ID */
> > -    thread_id = _consume_metadata_thread_id (local, notmuch, message);
> > -    if (thread_id)
> > -	_notmuch_message_add_term (message, "thread", thread_id);
> > +    if (notmuch->features & NOTMUCH_FEATURE_GHOSTS) {
> > +	if (is_ghost)
> > +	    thread_id = notmuch_message_get_thread_id (message);
> > +    } else {
> > +	thread_id = _consume_metadata_thread_id (local, notmuch, message);
> > +	if (thread_id)
> > +	    _notmuch_message_add_term (message, "thread", thread_id);
> > +    }
> >  
> >      status = _notmuch_database_link_message_to_parents (notmuch, message,
> >  							message_file,
> > @@ -2079,6 +2134,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
> >      notmuch_message_t *message = NULL;
> >      notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, ret2;
> >      notmuch_private_status_t private_status;
> > +    notmuch_bool_t is_ghost = false;
> >  
> >      const char *date, *header;
> >      const char *from, *to, *subject;
> > @@ -2171,12 +2227,20 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
> >  
> >  	_notmuch_message_add_filename (message, filename);
> >  
> > -	/* Is this a newly created message object? */
> > -	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND) {
> > +	/* Is this a newly created message object or a ghost
> > +	 * message?  We have to be slightly careful: if this is a
> > +	 * blank message, it's not safe to call
> > +	 * notmuch_message_get_flag yet. */
> > +	if (private_status == NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND ||
> > +	    (is_ghost = notmuch_message_get_flag (
> > +		message, NOTMUCH_MESSAGE_FLAG_GHOST))) {
> >  	    _notmuch_message_add_term (message, "type", "mail");
> > +	    if (is_ghost)
> > +		/* Convert ghost message to a regular message */
> > +		_notmuch_message_remove_term (message, "type", "ghost");
> >  
> >  	    ret = _notmuch_database_link_message (notmuch, message,
> > -						  message_file);
> > +						  message_file, is_ghost);
> >  	    if (ret)
> >  		goto DONE;
> >  
> >
> > _______________________________________________
> > notmuch mailing list
> > notmuch at notmuchmail.org
> > http://notmuchmail.org/mailman/listinfo/notmuch


More information about the notmuch mailing list