[notmuch] [PATCH] Switch from random to sequential thread identifiers.

Carl Worth cworth at cworth.org
Mon Feb 8 13:36:14 PST 2010


The sequential identifiers have the advantage of being guaranteed to
be unique (until we overflow a 64-bit unsigned integer), and also take
up half as much space in the "notmuch search" output (16 columns
rather than 32).

This change also has the side effect of fixing a bug where notmuch
could block on /dev/random at startup (waiting for some entropy to
appear). This bug was hit hard by the test suite, (which could easily
exhaust the available entropy on common systems---resulting in large
delays of the test suite).
---

Keith pointed out to me that there was obviously no benefit from
switching from hexadecimal to decimal here. So this second version of
the patch means 16-character identifiers rather than 20.

 lib/database-private.h |    7 +++++-
 lib/database.cc        |   52 ++++++++++++++++++++++++++++++++++++++++++++---
 lib/message.cc         |   46 ------------------------------------------
 test/notmuch-test      |    2 +-
 4 files changed, 55 insertions(+), 52 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 5891584..5bb6e86 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -27,14 +27,19 @@
 
 struct _notmuch_database {
     notmuch_bool_t exception_reported;
+
     char *path;
+
+    notmuch_bool_t needs_upgrade;
     notmuch_database_mode_t mode;
     Xapian::Database *xapian_db;
+
+    uint64_t last_thread_id;
+
     Xapian::QueryParser *query_parser;
     Xapian::TermGenerator *term_gen;
     Xapian::ValueRangeProcessor *value_range_processor;
 
-    notmuch_bool_t needs_upgrade;
 };
 
 /* Convert tags from Xapian internal format to notmuch format.
diff --git a/lib/database.cc b/lib/database.cc
index cce7847..8641321 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -533,6 +533,8 @@ notmuch_database_open (const char *path,
     notmuch->needs_upgrade = FALSE;
     notmuch->mode = mode;
     try {
+	string last_thread_id;
+
 	if (mode == NOTMUCH_DATABASE_MODE_READ_WRITE) {
 	    notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
 							       Xapian::DB_CREATE_OR_OPEN);
@@ -567,6 +569,20 @@ notmuch_database_open (const char *path,
 			 notmuch_path, version, NOTMUCH_DATABASE_VERSION);
 	    }
 	}
+
+	last_thread_id = notmuch->xapian_db->get_metadata ("last_thread_id");
+	if (last_thread_id.empty ()) {
+	    notmuch->last_thread_id = 0;
+	} else {
+	    const char *str;
+	    char *end;
+
+	    str = last_thread_id.c_str ();
+	    notmuch->last_thread_id = strtoull (str, &end, 16);
+	    if (*end != '\0')
+		INTERNAL_ERROR ("Malformed database last_thread_id: %s", str);
+	}
+
 	notmuch->query_parser = new Xapian::QueryParser;
 	notmuch->term_gen = new Xapian::TermGenerator;
 	notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));
@@ -1278,14 +1294,38 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
     return ret;
 }
 
+static const char *
+_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
+{
+    /* 16 bytes (+ terminator) for hexadecimal representation of
+     * a 64-bit integer. */
+    static char thread_id[17];
+    Xapian::WritableDatabase *db;
+
+    db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+    notmuch->last_thread_id++;
+
+    sprintf (thread_id, "%016llx", notmuch->last_thread_id);
+
+    db->set_metadata ("last_thread_id", thread_id);
+
+    return thread_id;
+}
+
 /* Given a (mostly empty) 'message' and its corresponding
  * 'message_file' link it to existing threads in the database.
  *
  * We first look at 'message_file' and its link-relevant headers
  * (References and In-Reply-To) for message IDs. We also look in the
- * database for existing message that reference 'message'.
+ * database for existing message that reference 'message'. In either
+ * case, we will assign to the current message the first thread_id
+ * found (through either parent or child). We will also merge any
+ * existing, distinct threads where this message belongs to both,
+ * (which is not uncommon when mesages are processed out of order).
  *
- * The end result is to call _notmuch_message_ensure_thread_id which
+ * Finally, if not thread ID has been found through parent or child,
+ * we call _notmuch_message_generate_thread_id to generate a new
  * generates a new thread ID if the message doesn't connect to any
  * existing threads.
  */
@@ -1308,8 +1348,12 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
     if (status)
 	return status;
 
-    if (thread_id == NULL)
-	_notmuch_message_ensure_thread_id (message);
+    /* If not part of any existing thread, generate a new thread ID. */
+    if (thread_id == NULL) {
+	thread_id = _notmuch_database_generate_thread_id (notmuch);
+
+	_notmuch_message_add_term (message, "thread", thread_id);
+    }
 
     return NOTMUCH_STATUS_SUCCESS;
 }
diff --git a/lib/message.cc b/lib/message.cc
index f0e905b..0195050 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -42,13 +42,6 @@ struct _notmuch_message {
     Xapian::Document doc;
 };
 
-/* "128 bits of thread-id ought to be enough for anybody" */
-#define NOTMUCH_THREAD_ID_BITS	 128
-#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4)
-typedef struct _thread_id {
-    char str[NOTMUCH_THREAD_ID_DIGITS + 1];
-} thread_id_t;
-
 /* We end up having to call the destructor explicitly because we had
  * to use "placement new" in order to initialize C++ objects within a
  * block that we allocated with talloc. So C++ is making talloc
@@ -557,45 +550,6 @@ _notmuch_message_set_date (notmuch_message_t *message,
 			    Xapian::sortable_serialise (time_value));
 }
 
-static void
-thread_id_generate (thread_id_t *thread_id)
-{
-    static int seeded = 0;
-    FILE *dev_random;
-    uint32_t value;
-    char *s;
-    int i;
-
-    if (! seeded) {
-	dev_random = fopen ("/dev/random", "r");
-	if (dev_random == NULL) {
-	    srand (time (NULL));
-	} else {
-	    fread ((void *) &value, sizeof (value), 1, dev_random);
-	    srand (value);
-	    fclose (dev_random);
-	}
-	seeded = 1;
-    }
-
-    s = thread_id->str;
-    for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) {
-	value = rand ();
-	sprintf (s, "%08x", value);
-	s += 8;
-    }
-}
-
-void
-_notmuch_message_ensure_thread_id (notmuch_message_t *message)
-{
-    /* If not part of any existing thread, generate a new thread_id. */
-    thread_id_t thread_id;
-
-    thread_id_generate (&thread_id);
-    _notmuch_message_add_term (message, "thread", thread_id.str);
-}
-
 /* Synchronize changes made to message->doc out into the database. */
 void
 _notmuch_message_sync (notmuch_message_t *message)
diff --git a/test/notmuch-test b/test/notmuch-test
index 2e5eb24..cac5705 100755
--- a/test/notmuch-test
+++ b/test/notmuch-test
@@ -146,7 +146,7 @@ add_message ()
 }
 
 NOTMUCH_IGNORED_OUTPUT_REGEXP='^Processed [0-9]*( total)? file|Found [0-9]* total file'
-NOTMUCH_THREAD_ID_SQUELCH='s/thread:................................/thread:XXX/'
+NOTMUCH_THREAD_ID_SQUELCH='s/thread:..................../thread:XXX/'
 execute_expecting ()
 {
     args=$1
-- 
1.6.5.7



More information about the notmuch mailing list