[notmuch] [PATCH] Store thread ids for messages that we haven't seen yet

James Westby jw+debian at jameswestby.net
Sat Mar 13 13:27:57 PST 2010


This allows us to thread messages even when we receive them out of
order, or never receive the root.

The thread ids for messages that aren't present but are referred to are
stored as metadata in the database and then retrieved if we ever get
that message.

When determining the thread id for a message we also check for this
metadata so that we can thread descendants of a message together before
we receive it.
---
 lib/database.cc   |   78 ++++++++++++++++++++++++++++++++++++++--------------
 test/notmuch-test |   32 +++++++++++++++++++--
 2 files changed, 86 insertions(+), 24 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index c91e97c..92234ff 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1111,6 +1111,31 @@ notmuch_database_get_directory (notmuch_database_t *notmuch,
     return _notmuch_directory_create (notmuch, path, &status);
 }
 
+static const char *
+_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
+{
+    /* 16 bytes (+ terminator) for hexadecimal representation of
+     * a 64-bit integer. */
+    static char thread_id[17];
+    Xapian::WritableDatabase *db;
+
+    db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+    notmuch->last_thread_id++;
+
+    sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id);
+
+    db->set_metadata ("last_thread_id", thread_id);
+
+    return thread_id;
+}
+
+static char *
+_get_metadata_thread_id_key (void *ctx, const char *message_id)
+{
+    return talloc_asprintf (ctx, "thread_id_%s", message_id);
+}
+
 /* Find the thread ID to which the message with 'message_id' belongs.
  *
  * Returns NULL if no message with message ID 'message_id' is in the
@@ -1127,8 +1152,25 @@ _resolve_message_id_to_thread_id (notmuch_database_t *notmuch,
     const char *ret = NULL;
 
     message = notmuch_database_find_message (notmuch, message_id);
-    if (message == NULL)
-	goto DONE;
+    /* If we haven't seen that message yet then check if we have already
+     * generated a dummy id for it and stored it in the metadata.
+     * If not then we generate a new thread id.
+     * This ensures that we can thread messages even when we haven't received
+     * the root (yet?)
+     */
+    if (message == NULL) {
+        Xapian::WritableDatabase *db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+        char * metadata_key = _get_metadata_thread_id_key (ctx, message_id);
+        string thread_id = notmuch->xapian_db->get_metadata(metadata_key);
+        if (thread_id.empty()) {
+            ret = _notmuch_database_generate_thread_id(notmuch);
+            db->set_metadata(metadata_key, ret);
+        } else {
+            ret = thread_id.c_str();
+        }
+        talloc_free (metadata_key);
+        goto DONE;
+    }
 
     ret = talloc_steal (ctx, notmuch_message_get_thread_id (message));
 
@@ -1295,25 +1337,6 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
     return ret;
 }
 
-static const char *
-_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
-{
-    /* 16 bytes (+ terminator) for hexadecimal representation of
-     * a 64-bit integer. */
-    static char thread_id[17];
-    Xapian::WritableDatabase *db;
-
-    db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
-
-    notmuch->last_thread_id++;
-
-    sprintf (thread_id, "%016" PRIx64, notmuch->last_thread_id);
-
-    db->set_metadata ("last_thread_id", thread_id);
-
-    return thread_id;
-}
-
 /* Given a (mostly empty) 'message' and its corresponding
  * 'message_file' link it to existing threads in the database.
  *
@@ -1337,6 +1360,19 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
 {
     notmuch_status_t status;
     const char *thread_id = NULL;
+    char *metadata_key = _get_metadata_thread_id_key (message,
+            notmuch_message_get_message_id (message));
+    /* Check if we have already seen related messages to this one.
+     * If we have then use the thread_id that we stored at that time.
+     */
+    string stored_id = notmuch->xapian_db->get_metadata (metadata_key);
+    if (!stored_id.empty()) {
+        Xapian::WritableDatabase *db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+        db->set_metadata (metadata_key, "");
+        thread_id = stored_id.c_str();
+        _notmuch_message_add_term (message, "thread", thread_id);
+    }
+    talloc_free (metadata_key);
 
     status = _notmuch_database_link_message_to_parents (notmuch, message,
 							message_file,
diff --git a/test/notmuch-test b/test/notmuch-test
index 7bc53ec..9264fb0 100755
--- a/test/notmuch-test
+++ b/test/notmuch-test
@@ -64,6 +64,10 @@ increment_mtime ()
 #	Additional values for email headers. If these are not provided
 #	then the relevant headers will simply not appear in the
 #	message.
+#
+#  '[id]=<message-id>'
+#
+#	Controls the message-id of the created message.
 gen_msg_cnt=0
 gen_msg_filename=""
 gen_msg_id=""
@@ -73,9 +77,14 @@ generate_message ()
     local -A template="($@)"
     local additional_headers
 
-    gen_msg_cnt=$((gen_msg_cnt + 1))
-    gen_msg_name=msg-$(printf "%03d" $gen_msg_cnt)
-    gen_msg_id="${gen_msg_name}@notmuch-test-suite"
+    if [ -z "${template[id]}" ]; then
+	gen_msg_cnt=$((gen_msg_cnt + 1))
+	gen_msg_name=msg-$(printf "%03d" $gen_msg_cnt)
+	gen_msg_id="${gen_msg_name}@notmuch-test-suite"
+    else
+	gen_msg_name="msg-${template[id]}"
+	gen_msg_id="${template[id]}"
+    fi
 
     if [ -z "${template[dir]}" ]; then
 	gen_msg_filename="${MAIL_DIR}/$gen_msg_name"
@@ -534,6 +543,23 @@ printf " Restore with nothing to do...\t"
 $NOTMUCH restore dump.expected
 echo "	PASS"
 
+printf "\nTesting threading when messages received out of order:\n"
+printf " Adding initial child message...\t\t"
+generate_message [body]=foo '[in-reply-to]=\<parent-id\>' [subject]=brokenthreadtest '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+execute_expecting new "Added 1 new message to the database."
+printf " Searching returns the message...\t\t"
+execute_expecting "search foo" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; brokenthreadtest (inbox unread)"
+printf " Adding second child message...\t\t"
+generate_message [body]=foo '[in-reply-to]=\<parent-id\>' [subject]=brokenthreadtest '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+execute_expecting new "Added 1 new message to the database."
+printf " Searching returns both messages in one thread...\t\t"
+execute_expecting "search foo" "thread:XXX   2000-01-01 [2/2] Notmuch Test Suite; brokenthreadtest (inbox unread)"
+printf " Adding parent message...\t\t"
+generate_message [body]=foo [id]=parent-id [subject]=brokenthreadtest '[date]="Sat, 01 Jan 2000 12:00:00 -0000"'
+execute_expecting new "Added 1 new message to the database."
+printf " Searching returns all three messages in one thread...\t\t"
+execute_expecting "search foo" "thread:XXX   2000-01-01 [3/3] Notmuch Test Suite; brokenthreadtest (inbox unread)"
+
 cat <<EOF
 Notmuch test suite complete.
 
-- 
1.7.0



More information about the notmuch mailing list