[notmuch] [PATCH] Switch from random to sequential thread identifiers.

Carl Worth cworth at cworth.org
Mon Feb 8 11:42:04 PST 2010


The sequential identifiers have the advantage of being guaranteed to
be unique (until we overflow a 64-bit unsigned integer), and also take
up slightly less space in the "notmuch search" output (20 columns
rather than 32).

This change also has the side effect of fixing a bug where notmuch
could block on /dev/random at startup (waiting for some entropy to
appear). This bug was hit hard by the test suite, (which could easily
exhaust the available entropy on common systems---resulting in large
delays of the test suite).

---

I'm sending this patch to the mailing-list rather than pushing it
directly so that any authors of user interfaces can ensure that they are
ready for the length of thread identifiers in "notmuch search" output to
change.

I tested that the emacs client doesn't need any change in this regard.

And the change doesn't introduce any compatibility with an existing
database, so no rebuild required (hurrah!).

-Carl

 lib/database-private.h |    7 +++++-
 lib/database.cc        |   52 ++++++++++++++++++++++++++++++++++++++++++++---
 lib/message.cc         |   46 ------------------------------------------
 test/notmuch-test      |    2 +-
 4 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 5891584..5bb6e86 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -27,14 +27,19 @@
 
 struct _notmuch_database {
     notmuch_bool_t exception_reported;
+
     char *path;
+
+    notmuch_bool_t needs_upgrade;
     notmuch_database_mode_t mode;
     Xapian::Database *xapian_db;
+
+    uint64_t last_thread_id;
+
     Xapian::QueryParser *query_parser;
     Xapian::TermGenerator *term_gen;
     Xapian::ValueRangeProcessor *value_range_processor;
 
-    notmuch_bool_t needs_upgrade;
 };
 
 /* Convert tags from Xapian internal format to notmuch format.
diff --git a/lib/database.cc b/lib/database.cc
index cce7847..65ff12e 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -533,6 +533,8 @@ notmuch_database_open (const char *path,
     notmuch->needs_upgrade = FALSE;
     notmuch->mode = mode;
     try {
+	string last_thread_id;
+
 	if (mode == NOTMUCH_DATABASE_MODE_READ_WRITE) {
 	    notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
 							       Xapian::DB_CREATE_OR_OPEN);
@@ -567,6 +569,20 @@ notmuch_database_open (const char *path,
 			 notmuch_path, version, NOTMUCH_DATABASE_VERSION);
 	    }
 	}
+
+	last_thread_id = notmuch->xapian_db->get_metadata ("last_thread_id");
+	if (last_thread_id.empty ()) {
+	    notmuch->last_thread_id = 0;
+	} else {
+	    const char *str;
+	    char *end;
+
+	    str = last_thread_id.c_str ();
+	    notmuch->last_thread_id = strtoull (str, &end, 10);
+	    if (*end != '\0')
+		INTERNAL_ERROR ("Malformed database last_thread_id: %s", str);
+	}
+
 	notmuch->query_parser = new Xapian::QueryParser;
 	notmuch->term_gen = new Xapian::TermGenerator;
 	notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));
@@ -1278,14 +1294,38 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
     return ret;
 }
 
+static const char *
+_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
+{
+    /* 20 bytes (+ terminator) for ASCII decimal representation of
+     * a 64-bit integer. */
+    static char thread_id[21];
+    Xapian::WritableDatabase *db;
+
+    db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+    notmuch->last_thread_id++;
+
+    sprintf (thread_id, "%020llu", notmuch->last_thread_id);
+
+    db->set_metadata ("last_thread_id", thread_id);
+
+    return thread_id;
+}
+
 /* Given a (mostly empty) 'message' and its corresponding
  * 'message_file' link it to existing threads in the database.
  *
  * We first look at 'message_file' and its link-relevant headers
  * (References and In-Reply-To) for message IDs. We also look in the
- * database for existing message that reference 'message'.
+ * database for existing message that reference 'message'. In either
+ * case, we will assign to the current message the first thread_id
+ * found (through either parent or child). We will also merge any
+ * existing, distinct threads where this message belongs to both,
+ * (which is not uncommon when mesages are processed out of order).
  *
- * The end result is to call _notmuch_message_ensure_thread_id which
+ * Finally, if not thread ID has been found through parent or child,
+ * we call _notmuch_message_generate_thread_id to generate a new
  * generates a new thread ID if the message doesn't connect to any
  * existing threads.
  */
@@ -1308,8 +1348,12 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
     if (status)
 	return status;
 
-    if (thread_id == NULL)
-	_notmuch_message_ensure_thread_id (message);
+    /* If not part of any existing thread, generate a new thread ID. */
+    if (thread_id == NULL) {
+	thread_id = _notmuch_database_generate_thread_id (notmuch);
+
+	_notmuch_message_add_term (message, "thread", thread_id);
+    }
 
     return NOTMUCH_STATUS_SUCCESS;
 }
diff --git a/lib/message.cc b/lib/message.cc
index f0e905b..0195050 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -42,13 +42,6 @@ struct _notmuch_message {
     Xapian::Document doc;
 };
 
-/* "128 bits of thread-id ought to be enough for anybody" */
-#define NOTMUCH_THREAD_ID_BITS	 128
-#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4)
-typedef struct _thread_id {
-    char str[NOTMUCH_THREAD_ID_DIGITS + 1];
-} thread_id_t;
-
 /* We end up having to call the destructor explicitly because we had
  * to use "placement new" in order to initialize C++ objects within a
  * block that we allocated with talloc. So C++ is making talloc
@@ -557,45 +550,6 @@ _notmuch_message_set_date (notmuch_message_t *message,
 			    Xapian::sortable_serialise (time_value));
 }
 
-static void
-thread_id_generate (thread_id_t *thread_id)
-{
-    static int seeded = 0;
-    FILE *dev_random;
-    uint32_t value;
-    char *s;
-    int i;
-
-    if (! seeded) {
-	dev_random = fopen ("/dev/random", "r");
-	if (dev_random == NULL) {
-	    srand (time (NULL));
-	} else {
-	    fread ((void *) &value, sizeof (value), 1, dev_random);
-	    srand (value);
-	    fclose (dev_random);
-	}
-	seeded = 1;
-    }
-
-    s = thread_id->str;
-    for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) {
-	value = rand ();
-	sprintf (s, "%08x", value);
-	s += 8;
-    }
-}
-
-void
-_notmuch_message_ensure_thread_id (notmuch_message_t *message)
-{
-    /* If not part of any existing thread, generate a new thread_id. */
-    thread_id_t thread_id;
-
-    thread_id_generate (&thread_id);
-    _notmuch_message_add_term (message, "thread", thread_id.str);
-}
-
 /* Synchronize changes made to message->doc out into the database. */
 void
 _notmuch_message_sync (notmuch_message_t *message)
diff --git a/test/notmuch-test b/test/notmuch-test
index 2e5eb24..cac5705 100755
--- a/test/notmuch-test
+++ b/test/notmuch-test
@@ -146,7 +146,7 @@ add_message ()
 }
 
 NOTMUCH_IGNORED_OUTPUT_REGEXP='^Processed [0-9]*( total)? file|Found [0-9]* total file'
-NOTMUCH_THREAD_ID_SQUELCH='s/thread:................................/thread:XXX/'
+NOTMUCH_THREAD_ID_SQUELCH='s/thread:..................../thread:XXX/'
 execute_expecting ()
 {
     args=$1
-- 
1.6.5.7




More information about the notmuch mailing list