[PATCH v4 15/16] added notmuch_message_reindex

Daniel Kahn Gillmor dkg at fifthhorseman.net
Fri Jul 8 02:27:26 PDT 2016


This new function asks the database to reindex a given message, using
the supplied indexopts.

This can be used, for example, to index the cleartext of an encrypted
message.

My initial inclination for this implementation was to remove all the
indexed terms for a given message's body, and then to add them back
in.

Unfortunately, that doesn't appear to be possible due to the way we're
using xapian.  I could find no way to distinguish terms which were
added during indexing of the message body from other terms associated
with the document.  As a result, we just save the tags and properties,
remove the message from the database entirely, and add it back into
the database in full, re-adding tags and properties as needed.
---
 lib/message.cc | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/notmuch.h  |  14 ++++++++
 2 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/lib/message.cc b/lib/message.cc
index 9d3e807..ab807b7 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -557,7 +557,9 @@ void
 _notmuch_message_remove_terms (notmuch_message_t *message, const char *prefix)
 {
     Xapian::TermIterator i;
-    size_t prefix_len = strlen (prefix);
+    size_t prefix_len = 0;
+
+    prefix_len = strlen (prefix);
 
     while (1) {
 	i = message->doc.termlist_begin ();
@@ -1847,3 +1849,107 @@ _notmuch_message_frozen (notmuch_message_t *message)
 {
     return message->frozen;
 }
+
+notmuch_status_t
+notmuch_message_reindex (notmuch_message_t *message,
+			 notmuch_indexopts_t *indexopts)
+{
+    notmuch_database_t *notmuch = NULL;
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
+    notmuch_tags_t *tags = NULL;
+    notmuch_message_properties_t *properties = NULL;
+    notmuch_filenames_t *filenames, *orig_filenames = NULL;
+    const char *filename = NULL, *tag = NULL, *propkey = NULL;
+    notmuch_message_t *newmsg = NULL;
+    notmuch_bool_t readded = FALSE, skip;
+    const char *autotags[] = {
+		    "attachment",
+		    "encrypted",
+		    "signed" };
+    const char *autoproperties[] = { "index-decryption" };
+
+    if (message == NULL)
+	return NOTMUCH_STATUS_NULL_POINTER;
+    
+    notmuch = _notmuch_message_database (message);
+
+    /* cache tags, properties, and filenames */
+    tags = notmuch_message_get_tags (message);
+    properties = notmuch_message_get_properties (message, "", FALSE);
+    filenames = notmuch_message_get_filenames (message);
+    orig_filenames = notmuch_message_get_filenames (message);
+    
+    /* walk through filenames, removing them until the message is gone */
+    for ( ; notmuch_filenames_valid (filenames);
+	  notmuch_filenames_move_to_next (filenames)) {
+	filename = notmuch_filenames_get (filenames);
+
+	ret = notmuch_database_remove_message (notmuch, filename);
+	if (ret != NOTMUCH_STATUS_SUCCESS &&
+	    ret != NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID)
+	    return ret;
+    }
+    if (ret != NOTMUCH_STATUS_SUCCESS)
+	return ret;
+    
+    /* re-add the filenames with the associated indexopts */
+    for (; notmuch_filenames_valid (orig_filenames);
+	 notmuch_filenames_move_to_next (orig_filenames)) {
+	filename = notmuch_filenames_get (orig_filenames);
+
+	status = notmuch_database_add_message_with_indexopts(notmuch,
+							     filename,
+							     indexopts,
+							     readded ? NULL : &newmsg);
+	if (status == NOTMUCH_STATUS_SUCCESS ||
+	    status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID) {
+	    if (!readded) {
+		/* re-add tags */
+		for (; notmuch_tags_valid (tags);
+		     notmuch_tags_move_to_next (tags)) {
+		    tag = notmuch_tags_get (tags);
+		    skip = FALSE;
+		    
+		    for (size_t i = 0; i < ARRAY_SIZE (autotags); i++)
+			if (strcmp (tag, autotags[i]) == 0)
+			    skip = TRUE;
+		    
+		    if (!skip) {
+			status = notmuch_message_add_tag (newmsg, tag);
+			if (status != NOTMUCH_STATUS_SUCCESS)
+			    ret = status;
+		    }
+		}
+		/* re-add properties */
+		for (; notmuch_message_properties_valid (properties);
+		     notmuch_message_properties_move_to_next (properties)) {
+		    propkey = notmuch_message_properties_key (properties);
+		    skip = FALSE;
+
+		    for (size_t i = 0; i < ARRAY_SIZE (autoproperties); i++)
+			if (strcmp (propkey, autoproperties[i]) == 0)
+			    skip = TRUE;
+
+		    if (!skip) {
+			status = notmuch_message_add_property (newmsg, propkey,
+							       notmuch_message_properties_value (properties));
+			if (status != NOTMUCH_STATUS_SUCCESS)
+			    ret = status;
+		    }
+		}
+		readded = TRUE;
+	    }
+	} else {
+	    /* if we failed to add this filename, go ahead and try the
+	     * next one as though it were first, but report the
+	     * error... */
+	    ret = status;
+	}
+    }
+    if (newmsg)
+	notmuch_message_destroy (newmsg);
+	    		
+    /* should we also destroy the incoming message object?  at the
+     * moment, we leave that to the caller */
+    return ret;
+}
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 66b3503..9076a9b 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -1394,6 +1394,20 @@ notmuch_filenames_t *
 notmuch_message_get_filenames (notmuch_message_t *message);
 
 /**
+ * Re-index the e-mail corresponding to 'message' using the supplied index options
+ *
+ * Returns the status of the re-index operation.  (see the return
+ * codes documented in notmuch_database_add_message)
+ *
+ * After reindexing, the user should discard the message object passed
+ * in here by calling notmuch_message_destroy, since it refers to the
+ * original message, not to the reindexed message.
+ */
+notmuch_status_t
+notmuch_message_reindex (notmuch_message_t *message,
+			 notmuch_indexopts_t *indexopts);
+
+/**
  * Message flags.
  */
 typedef enum _notmuch_message_flag {
-- 
2.8.1



More information about the notmuch mailing list