[notmuch] [PATCH] Switch from random to sequential thread identifiers.
Carl Worth
cworth at cworth.org
Mon Feb 8 13:36:14 PST 2010
The sequential identifiers have the advantage of being guaranteed to
be unique (until we overflow a 64-bit unsigned integer), and also take
up half as much space in the "notmuch search" output (16 columns
rather than 32).
This change also has the side effect of fixing a bug where notmuch
could block on /dev/random at startup (waiting for some entropy to
appear). This bug was hit hard by the test suite, (which could easily
exhaust the available entropy on common systems---resulting in large
delays of the test suite).
---
Keith pointed out to me that there was obviously no benefit from
switching from hexadecimal to decimal here. So this second version of
the patch means 16-character identifiers rather than 20.
lib/database-private.h | 7 +++++-
lib/database.cc | 52 ++++++++++++++++++++++++++++++++++++++++++++---
lib/message.cc | 46 ------------------------------------------
test/notmuch-test | 2 +-
4 files changed, 55 insertions(+), 52 deletions(-)
diff --git a/lib/database-private.h b/lib/database-private.h
index 5891584..5bb6e86 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -27,14 +27,19 @@
struct _notmuch_database {
notmuch_bool_t exception_reported;
+
char *path;
+
+ notmuch_bool_t needs_upgrade;
notmuch_database_mode_t mode;
Xapian::Database *xapian_db;
+
+ uint64_t last_thread_id;
+
Xapian::QueryParser *query_parser;
Xapian::TermGenerator *term_gen;
Xapian::ValueRangeProcessor *value_range_processor;
- notmuch_bool_t needs_upgrade;
};
/* Convert tags from Xapian internal format to notmuch format.
diff --git a/lib/database.cc b/lib/database.cc
index cce7847..8641321 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -533,6 +533,8 @@ notmuch_database_open (const char *path,
notmuch->needs_upgrade = FALSE;
notmuch->mode = mode;
try {
+ string last_thread_id;
+
if (mode == NOTMUCH_DATABASE_MODE_READ_WRITE) {
notmuch->xapian_db = new Xapian::WritableDatabase (xapian_path,
Xapian::DB_CREATE_OR_OPEN);
@@ -567,6 +569,20 @@ notmuch_database_open (const char *path,
notmuch_path, version, NOTMUCH_DATABASE_VERSION);
}
}
+
+ last_thread_id = notmuch->xapian_db->get_metadata ("last_thread_id");
+ if (last_thread_id.empty ()) {
+ notmuch->last_thread_id = 0;
+ } else {
+ const char *str;
+ char *end;
+
+ str = last_thread_id.c_str ();
+ notmuch->last_thread_id = strtoull (str, &end, 16);
+ if (*end != '\0')
+ INTERNAL_ERROR ("Malformed database last_thread_id: %s", str);
+ }
+
notmuch->query_parser = new Xapian::QueryParser;
notmuch->term_gen = new Xapian::TermGenerator;
notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));
@@ -1278,14 +1294,38 @@ _notmuch_database_link_message_to_children (notmuch_database_t *notmuch,
return ret;
}
+static const char *
+_notmuch_database_generate_thread_id (notmuch_database_t *notmuch)
+{
+ /* 16 bytes (+ terminator) for hexadecimal representation of
+ * a 64-bit integer. */
+ static char thread_id[17];
+ Xapian::WritableDatabase *db;
+
+ db = static_cast <Xapian::WritableDatabase *> (notmuch->xapian_db);
+
+ notmuch->last_thread_id++;
+
+ sprintf (thread_id, "%016llx", notmuch->last_thread_id);
+
+ db->set_metadata ("last_thread_id", thread_id);
+
+ return thread_id;
+}
+
/* Given a (mostly empty) 'message' and its corresponding
* 'message_file' link it to existing threads in the database.
*
* We first look at 'message_file' and its link-relevant headers
* (References and In-Reply-To) for message IDs. We also look in the
- * database for existing message that reference 'message'.
+ * database for existing message that reference 'message'. In either
+ * case, we will assign to the current message the first thread_id
+ * found (through either parent or child). We will also merge any
+ * existing, distinct threads where this message belongs to both,
+ * (which is not uncommon when mesages are processed out of order).
*
- * The end result is to call _notmuch_message_ensure_thread_id which
+ * Finally, if not thread ID has been found through parent or child,
+ * we call _notmuch_message_generate_thread_id to generate a new
* generates a new thread ID if the message doesn't connect to any
* existing threads.
*/
@@ -1308,8 +1348,12 @@ _notmuch_database_link_message (notmuch_database_t *notmuch,
if (status)
return status;
- if (thread_id == NULL)
- _notmuch_message_ensure_thread_id (message);
+ /* If not part of any existing thread, generate a new thread ID. */
+ if (thread_id == NULL) {
+ thread_id = _notmuch_database_generate_thread_id (notmuch);
+
+ _notmuch_message_add_term (message, "thread", thread_id);
+ }
return NOTMUCH_STATUS_SUCCESS;
}
diff --git a/lib/message.cc b/lib/message.cc
index f0e905b..0195050 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -42,13 +42,6 @@ struct _notmuch_message {
Xapian::Document doc;
};
-/* "128 bits of thread-id ought to be enough for anybody" */
-#define NOTMUCH_THREAD_ID_BITS 128
-#define NOTMUCH_THREAD_ID_DIGITS (NOTMUCH_THREAD_ID_BITS / 4)
-typedef struct _thread_id {
- char str[NOTMUCH_THREAD_ID_DIGITS + 1];
-} thread_id_t;
-
/* We end up having to call the destructor explicitly because we had
* to use "placement new" in order to initialize C++ objects within a
* block that we allocated with talloc. So C++ is making talloc
@@ -557,45 +550,6 @@ _notmuch_message_set_date (notmuch_message_t *message,
Xapian::sortable_serialise (time_value));
}
-static void
-thread_id_generate (thread_id_t *thread_id)
-{
- static int seeded = 0;
- FILE *dev_random;
- uint32_t value;
- char *s;
- int i;
-
- if (! seeded) {
- dev_random = fopen ("/dev/random", "r");
- if (dev_random == NULL) {
- srand (time (NULL));
- } else {
- fread ((void *) &value, sizeof (value), 1, dev_random);
- srand (value);
- fclose (dev_random);
- }
- seeded = 1;
- }
-
- s = thread_id->str;
- for (i = 0; i < NOTMUCH_THREAD_ID_DIGITS; i += 8) {
- value = rand ();
- sprintf (s, "%08x", value);
- s += 8;
- }
-}
-
-void
-_notmuch_message_ensure_thread_id (notmuch_message_t *message)
-{
- /* If not part of any existing thread, generate a new thread_id. */
- thread_id_t thread_id;
-
- thread_id_generate (&thread_id);
- _notmuch_message_add_term (message, "thread", thread_id.str);
-}
-
/* Synchronize changes made to message->doc out into the database. */
void
_notmuch_message_sync (notmuch_message_t *message)
diff --git a/test/notmuch-test b/test/notmuch-test
index 2e5eb24..cac5705 100755
--- a/test/notmuch-test
+++ b/test/notmuch-test
@@ -146,7 +146,7 @@ add_message ()
}
NOTMUCH_IGNORED_OUTPUT_REGEXP='^Processed [0-9]*( total)? file|Found [0-9]* total file'
-NOTMUCH_THREAD_ID_SQUELCH='s/thread:................................/thread:XXX/'
+NOTMUCH_THREAD_ID_SQUELCH='s/thread:..................../thread:XXX/'
execute_expecting ()
{
args=$1
--
1.6.5.7
More information about the notmuch
mailing list