[PATCH v4 16/16] add "notmuch reindex" subcommand

Daniel Kahn Gillmor dkg at fifthhorseman.net
Fri Jul 8 02:27:27 PDT 2016


This new subcommand takes a set of search terms, and re-indexes the
list of matching messages using the supplied options.

This can be used to index the cleartext of encrypted messages with
something like:

 notmuch reindex --try-decrypt \
    tag:encrypted and not has:index-decryption=success
---
 Makefile.local                    |   1 +
 doc/conf.py                       |   7 ++
 doc/index.rst                     |   1 +
 doc/man1/notmuch-reindex.rst      |  41 ++++++++++
 doc/man1/notmuch.rst              |   1 +
 doc/man7/notmuch-search-terms.rst |   7 +-
 notmuch-client.h                  |   3 +
 notmuch-reindex.c                 | 152 ++++++++++++++++++++++++++++++++++++++
 notmuch.c                         |   2 +
 test/T357-index-decryption.sh     |  67 +++++++++++++++++
 10 files changed, 280 insertions(+), 2 deletions(-)
 create mode 100644 doc/man1/notmuch-reindex.rst
 create mode 100644 notmuch-reindex.c

diff --git a/Makefile.local b/Makefile.local
index c13ba76..a916bd0 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -281,6 +281,7 @@ notmuch_client_srcs =		\
 	notmuch-dump.c		\
 	notmuch-insert.c	\
 	notmuch-new.c		\
+	notmuch-reindex.c       \
 	notmuch-reply.c		\
 	notmuch-restore.c	\
 	notmuch-search.c	\
diff --git a/doc/conf.py b/doc/conf.py
index 8b93296..8455a51 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -94,6 +94,10 @@ man_pages = [
         u'incorporate new mail into the notmuch database',
         [u'Carl Worth and many others'], 1),
 
+('man1/notmuch-reindex','notmuch-reindex',
+        u're-index matching messages',
+        [u'Carl Worth and many others'], 1),
+
 ('man1/notmuch-reply','notmuch-reply',
         u'constructs a reply template for a set of messages',
         [u'Carl Worth and many others'], 1),
@@ -162,6 +166,9 @@ texinfo_documents = [
 ('man1/notmuch-new','notmuch-new',u'notmuch Documentation',
       u'Carl Worth and many others', 'notmuch-new',
       'incorporate new mail into the notmuch database','Miscellaneous'),
+('man1/notmuch-reindex','notmuch-reindex',u'notmuch Documentation',
+      u'Carl Worth and many others', 'notmuch-reindex',
+      're-index matching messages','Miscellaneous'),
 ('man1/notmuch-reply','notmuch-reply',u'notmuch Documentation',
       u'Carl Worth and many others', 'notmuch-reply',
       'constructs a reply template for a set of messages','Miscellaneous'),
diff --git a/doc/index.rst b/doc/index.rst
index 344606d..aa6c9f4 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -18,6 +18,7 @@ Contents:
    man5/notmuch-hooks
    man1/notmuch-insert
    man1/notmuch-new
+   man1/notmuch-reindex
    man1/notmuch-reply
    man1/notmuch-restore
    man1/notmuch-search
diff --git a/doc/man1/notmuch-reindex.rst b/doc/man1/notmuch-reindex.rst
new file mode 100644
index 0000000..7ccc947
--- /dev/null
+++ b/doc/man1/notmuch-reindex.rst
@@ -0,0 +1,41 @@
+===========
+notmuch-reindex
+===========
+
+SYNOPSIS
+========
+
+**notmuch** **reindex** [*option* ...] <*search-term*> ...
+
+DESCRIPTION
+===========
+
+Re-index all messages matching the search terms.
+
+See **notmuch-search-terms(7)** for details of the supported syntax for
+<*search-term*\ >.
+
+The **reindex** command searches for all messages matching the
+supplied search terms, and re-creates the full-text index on these
+messages using the supplied options.
+
+Supported options for **reindex** include
+
+    ``--try-decrypt``
+
+        For each message, if it is encrypted, try to decrypt it while
+        indexing.  If decryption is successful, index the cleartext
+        itself.  Be aware that the index is likely sufficient to
+        reconstruct the cleartext of the message itself, so please
+        ensure that the notmuch message index is adequately
+        protected. DO NOT USE THIS FLAG without considering the
+        security of your index.
+
+SEE ALSO
+========
+
+**notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**,
+**notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**,
+**notmuch-new(1)**,
+**notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**,
+**notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)**
diff --git a/doc/man1/notmuch.rst b/doc/man1/notmuch.rst
index edd04ef..e6a14f6 100644
--- a/doc/man1/notmuch.rst
+++ b/doc/man1/notmuch.rst
@@ -140,6 +140,7 @@ SEE ALSO
 
 **notmuch-config(1)**, **notmuch-count(1)**, **notmuch-dump(1)**,
 **notmuch-hooks(5)**, **notmuch-insert(1)**, **notmuch-new(1)**,
+**notmuch-reindex(1)**,
 **notmuch-reply(1)**, **notmuch-restore(1)**, **notmuch-search(1)**,
 **notmuch-search-terms(7)**, **notmuch-show(1)**, **notmuch-tag(1)**,
 **notmuch-address(1)**
diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst
index 075f88c..e5ea899 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -9,6 +9,8 @@ SYNOPSIS
 
 **notmuch** **dump** [--format=(batch-tag|sup)] [--] [--output=<*file*>] [--] [<*search-term*> ...]
 
+**notmuch** **reindex** [option ...] <*search-term*> ...
+
 **notmuch** **search** [option ...] <*search-term*> ...
 
 **notmuch** **show** [option ...] <*search-term*> ...
@@ -395,5 +397,6 @@ SEE ALSO
 
 **notmuch(1)**, **notmuch-config(1)**, **notmuch-count(1)**,
 **notmuch-dump(1)**, **notmuch-hooks(5)**, **notmuch-insert(1)**,
-**notmuch-new(1)**, **notmuch-reply(1)**, **notmuch-restore(1)**,
-**notmuch-search(1)**, **notmuch-show(1)**, **notmuch-tag(1)**
+**notmuch-new(1)**, **notmuch-reindex(1)**, **notmuch-reply(1)**,
+**notmuch-restore(1)**, **notmuch-search(1)**, **notmuch-show(1)**,
+**notmuch-tag(1)**
diff --git a/notmuch-client.h b/notmuch-client.h
index a65bb6d..a32ef7a 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -169,6 +169,9 @@ int
 notmuch_insert_command (notmuch_config_t *config, int argc, char *argv[]);
 
 int
+notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[]);
+
+int
 notmuch_reply_command (notmuch_config_t *config, int argc, char *argv[]);
 
 int
diff --git a/notmuch-reindex.c b/notmuch-reindex.c
new file mode 100644
index 0000000..6fc88c5
--- /dev/null
+++ b/notmuch-reindex.c
@@ -0,0 +1,152 @@
+/* notmuch - Not much of an email program, (just index and search)
+ *
+ * Copyright © 2016 Daniel Kahn Gillmor
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/ .
+ *
+ * Author: Daniel Kahn Gillmor <dkg at fifthhorseman.net>
+ */
+
+#include "notmuch-client.h"
+#include "string-util.h"
+
+static volatile sig_atomic_t interrupted;
+
+static void
+handle_sigint (unused (int sig))
+{
+    static char msg[] = "Stopping...         \n";
+
+    /* This write is "opportunistic", so it's okay to ignore the
+     * result.  It is not required for correctness, and if it does
+     * fail or produce a short write, we want to get out of the signal
+     * handler as quickly as possible, not retry it. */
+    IGNORE_RESULT (write (2, msg, sizeof (msg) - 1));
+    interrupted = 1;
+}
+
+/* reindex all messages matching 'query_string' using the passed-in indexopts
+ */
+static int
+reindex_query (notmuch_database_t *notmuch, const char *query_string,
+	       notmuch_indexopts_t *indexopts)
+{
+    notmuch_query_t *query;
+    notmuch_messages_t *messages;
+    notmuch_message_t *message;
+    notmuch_status_t status;
+
+    int ret = NOTMUCH_STATUS_SUCCESS;
+
+    query = notmuch_query_create (notmuch, query_string);
+    if (query == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	return 1;
+    }
+
+    /* reindexing is not interested in any special sort order */
+    notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED);
+
+    status = notmuch_query_search_messages_st (query, &messages);
+    if (print_status_query ("notmuch reindex", query, status))
+	return status;
+
+    for (;
+	 notmuch_messages_valid (messages) && ! interrupted;
+	 notmuch_messages_move_to_next (messages)) {
+	message = notmuch_messages_get (messages);
+
+	notmuch_message_reindex(message, indexopts);
+	notmuch_message_destroy (message);
+	if (ret != NOTMUCH_STATUS_SUCCESS)
+	    break;
+    }
+
+    notmuch_query_destroy (query);
+
+    return ret || interrupted;
+}
+
+int
+notmuch_reindex_command (notmuch_config_t *config, int argc, char *argv[])
+{
+    char *query_string = NULL;
+    notmuch_database_t *notmuch;
+    struct sigaction action;
+    notmuch_bool_t try_decrypt = FALSE;
+    int opt_index;
+    int ret;
+    notmuch_status_t status;
+    notmuch_indexopts_t *indexopts = NULL;
+
+    /* Set up our handler for SIGINT */
+    memset (&action, 0, sizeof (struct sigaction));
+    action.sa_handler = handle_sigint;
+    sigemptyset (&action.sa_mask);
+    action.sa_flags = SA_RESTART;
+    sigaction (SIGINT, &action, NULL);
+
+    notmuch_opt_desc_t options[] = {
+	{ NOTMUCH_OPT_BOOLEAN, &try_decrypt, "try-decrypt", 0, 0 },
+	{ NOTMUCH_OPT_INHERIT, (void *) &notmuch_shared_options, NULL, 0, 0 },
+	{ 0, 0, 0, 0, 0 }
+    };
+
+    opt_index = parse_arguments (argc, argv, options, 1);
+    if (opt_index < 0)
+	return EXIT_FAILURE;
+
+    notmuch_process_shared_options (argv[0]);
+
+    if (notmuch_database_open (notmuch_config_get_database_path (config),
+			       NOTMUCH_DATABASE_MODE_READ_WRITE, &notmuch))
+	return EXIT_FAILURE;
+
+    notmuch_exit_if_unmatched_db_uuid (notmuch);
+
+    indexopts = notmuch_indexopts_create();
+    if (!indexopts)
+	return EXIT_FAILURE;
+
+    status = notmuch_indexopts_set_try_decrypt (indexopts, try_decrypt);
+    if (status)
+	fprintf (stderr, "Warning: failed to set --try-decrypt to %d (%s)\n",
+		 try_decrypt, notmuch_status_to_string (status));
+
+    if (try_decrypt) {
+	const char* gpg_path = notmuch_config_get_crypto_gpg_path (config);
+	status = notmuch_indexopts_set_gpg_path (indexopts, gpg_path);
+	if (status)
+	    fprintf (stderr, "Warning: failed to set gpg_path for reindexing to '%s' (%s)\n",
+		     gpg_path ? gpg_path : "(NULL)",
+		     notmuch_status_to_string (status));
+    }
+
+    query_string = query_string_from_args (config, argc-opt_index, argv+opt_index);
+    if (query_string == NULL) {
+	fprintf (stderr, "Out of memory\n");
+	return EXIT_FAILURE;
+    }
+
+    if (*query_string == '\0') {
+	fprintf (stderr, "Error: notmuch reindex requires at least one search term.\n");
+	return EXIT_FAILURE;
+    }
+    
+    ret = reindex_query (notmuch, query_string, indexopts);
+
+    notmuch_database_destroy (notmuch);
+
+    return ret || interrupted ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/notmuch.c b/notmuch.c
index 38b73c1..5aadea5 100644
--- a/notmuch.c
+++ b/notmuch.c
@@ -123,6 +123,8 @@ static command_t commands[] = {
       "Restore the tags from the given dump file (see 'dump')." },
     { "compact", notmuch_compact_command, FALSE,
       "Compact the notmuch database." },
+    { "reindex", notmuch_reindex_command, FALSE,
+      "Re-index all messages matching the search terms." },
     { "config", notmuch_config_command, FALSE,
       "Get or set settings in the notmuch configuration file." },
     { "help", notmuch_help_command, TRUE, /* create but don't save config */
diff --git a/test/T357-index-decryption.sh b/test/T357-index-decryption.sh
index 27d6885..9510c57 100755
--- a/test/T357-index-decryption.sh
+++ b/test/T357-index-decryption.sh
@@ -46,4 +46,71 @@ test_expect_equal \
     "$output" \
     "$expected"
 
+# add a tag to all messages to ensure that it stays after reindexing
+test_expect_success 'tagging all messages' \
+                    'notmuch tag +blarney "encrypted message"'
+test_begin_subtest "verify that tags have not changed"
+output=$(notmuch search tag:blarney)
+expected='thread:0000000000000001   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 001 (blarney encrypted inbox)
+thread:0000000000000002   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (blarney encrypted inbox)'
+test_expect_equal \
+    "$output" \
+    "$expected"
+
+# see if first message shows up after reindexing with --try-decrypt
+test_expect_success 'reindex old messages' \
+                    'notmuch reindex --try-decrypt tag:encrypted and not has:index-decryption=success'
+test_begin_subtest "reindexed encrypted message, including cleartext"
+output=$(notmuch search wumpus)
+expected='thread:0000000000000002   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (blarney encrypted inbox)
+thread:0000000000000003   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 001 (blarney encrypted inbox)'
+test_expect_equal \
+    "$output" \
+    "$expected"
+
+# and the same search, but by property ($expected is untouched):
+test_begin_subtest "emacs search by property for both messages"
+output=$(notmuch search has:index-decryption=success)
+test_expect_equal \
+    "$output" \
+    "$expected"
+
+
+# try to remove cleartext indexing
+test_expect_success 'reindex without cleartext' \
+                    'notmuch reindex tag:encrypted and has:index-decryption=success'
+test_begin_subtest "reindexed encrypted messages, without cleartext"
+output=$(notmuch search wumpus)
+expected=''
+test_expect_equal \
+    "$output" \
+    "$expected"
+
+# and the same search, but by property ($expected is untouched):
+test_begin_subtest "emacs search by property with both messages unindexed"
+output=$(notmuch search has:index-decryption=success)
+test_expect_equal \
+    "$output" \
+    "$expected"
+
+# ensure that the tags remain even when we are dropping the cleartext.
+test_begin_subtest "verify that tags remain without cleartext"
+output=$(notmuch search tag:blarney)
+expected='thread:0000000000000004   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 002 (blarney encrypted inbox)
+thread:0000000000000005   2000-01-01 [1/1] Notmuch Test Suite; test encrypted message for cleartext index 001 (blarney encrypted inbox)'
+test_expect_equal \
+    "$output" \
+    "$expected"
+
+
+# TODO: test removal of a message from the message store between
+# indexing and reindexing.
+
+# TODO: insert the same message into the message store twice, index,
+# remove one of them from the message store, and then reindex.
+# reindexing should return a failure but the message should still be
+# present? -- or what should the semantics be if you ask to reindex a
+# message whose underlying files have been renamed or moved or
+# removed?
+
 test_done
-- 
2.8.1



More information about the notmuch mailing list