[PATCH] lib: Add a new prefix "list" to the search-terms syntax

Kirill A. Shutemov kirill at shutemov.name
Tue Dec 17 10:03:22 PST 2013


On Thu, Oct 17, 2013 at 05:17:00PM +0300, Jani Nikula wrote:
> On Wed, 10 Apr 2013, "Alexey I. Froloff" <raorn at raorn.name> wrote:
> > From: "Alexey I. Froloff" <raorn at raorn.name>
> >
> > Add support for indexing and searching the message's List-Id header.
> > This is useful when matching all the messages belonging to a particular
> > mailing list.
> 
> There's an issue with our duplicate message-id handling that is likely
> to cause confusion with List-Id: searches. If you receive several
> duplicates of the same message (judged by the message-id), only the
> first one of them gets indexed, and the rest are ignored. This means
> that for messages you receive both directly and through a list, it will
> be arbitrary whether the List-Id: gets indexed or not. Therefore a list:
> search might not return all the messages you'd expect.

I've tried to address this. The patch also adds few tests for the feature.

There's still missing functionality: re-indexing existing messages for
list-id, handling message removal, etc.

Any comments?

diff --git a/lib/database.cc b/lib/database.cc
index f395061e3a73..196243e15d1a 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -205,6 +205,7 @@ static prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
 };
 
 static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
+    { "list",			"XLIST"},
     { "thread",			"G" },
     { "tag",			"K" },
     { "is",			"K" },
@@ -2025,10 +2026,13 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
 	    date = notmuch_message_file_get_header (message_file, "date");
 	    _notmuch_message_set_header_values (message, date, from, subject);
 
-	    ret = _notmuch_message_index_file (message, filename);
+	    ret = _notmuch_message_index_file (message, filename, false);
 	    if (ret)
 		goto DONE;
 	} else {
+	    ret = _notmuch_message_index_file (message, filename, true);
+	    if (ret)
+		goto DONE;
 	    ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
 	}
 
diff --git a/lib/index.cc b/lib/index.cc
index 78c18cf36d10..9fe1ad6502ed 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -304,6 +304,47 @@ _index_address_list (notmuch_message_t *message,
     }
 }
 
+static void
+_index_list_id (notmuch_message_t *message,
+               const char *list_id_header)
+{
+    const char *begin_list_id, *end_list_id, *list_id;
+    void *local;
+
+    if (list_id_header == NULL)
+	return;
+
+    /* RFC2919 says that the list-id is found at the end of the header
+     * and enclosed between angle brackets. If we cannot find a
+     * matching pair of brackets containing at least one character,
+     * we ignore the list id header. */
+    begin_list_id = strrchr (list_id_header, '<');
+    if (!begin_list_id) {
+	fprintf (stderr, "Warning: Not indexing mailformed List-Id tag.\n");
+	return;
+    }
+
+    end_list_id = strrchr(begin_list_id, '>');
+    if (!end_list_id || (end_list_id - begin_list_id < 2)) {
+	fprintf (stderr, "Warning: Not indexing mailformed List-Id tag.\n");
+	return;
+    }
+
+    local = talloc_new (message);
+
+    /* We extract the list id between the angle brackets */
+    list_id = talloc_strndup (local, begin_list_id + 1,
+			      end_list_id - begin_list_id - 1);
+
+    /* _notmuch_message_add_term() may return
+     * NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG here.  We can't fix it, but
+     * this is not a reason to exit with error... */
+    if (_notmuch_message_add_term (message, "list", list_id))
+	fprintf (stderr, "Warning: Not indexing List-Id: <%s>\n", list_id);
+
+    talloc_free (local);
+}
+
 /* Callback to generate terms for each mime part of a message. */
 static void
 _index_mime_part (notmuch_message_t *message,
@@ -425,14 +466,15 @@ _index_mime_part (notmuch_message_t *message,
 
 notmuch_status_t
 _notmuch_message_index_file (notmuch_message_t *message,
-			     const char *filename)
+			     const char *filename,
+			     notmuch_bool_t duplicate)
 {
     GMimeStream *stream = NULL;
     GMimeParser *parser = NULL;
     GMimeMessage *mime_message = NULL;
     InternetAddressList *addresses;
     FILE *file = NULL;
-    const char *from, *subject;
+    const char *from, *subject, *list_id;
     notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
     static int initialized = 0;
     char from_buf[5];
@@ -485,6 +527,9 @@ mboxes is deprecated and may be removed in the future.\n", filename);
 
     from = g_mime_message_get_sender (mime_message);
 
+    if (duplicate)
+	goto DUP;
+
     addresses = internet_address_list_parse_string (from);
     if (addresses) {
 	_index_address_list (message, "from", addresses);
@@ -502,6 +547,10 @@ mboxes is deprecated and may be removed in the future.\n", filename);
 
     _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
 
+  DUP:
+    list_id = g_mime_object_get_header (GMIME_OBJECT (mime_message), "List-Id");
+    _index_list_id (message, list_id);
+
   DONE:
     if (mime_message)
 	g_object_unref (mime_message);
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index af185c7c5ba8..138dfa58efc8 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -322,7 +322,8 @@ notmuch_message_get_author (notmuch_message_t *message);
 
 notmuch_status_t
 _notmuch_message_index_file (notmuch_message_t *message,
-			     const char *filename);
+			     const char *filename,
+			     notmuch_bool_t duplicate);
 
 /* message-file.c */
 
diff --git a/man/man7/notmuch-search-terms.7 b/man/man7/notmuch-search-terms.7
index f1627b3488f8..29b30b7b0b00 100644
--- a/man/man7/notmuch-search-terms.7
+++ b/man/man7/notmuch-search-terms.7
@@ -52,6 +52,8 @@ terms to match against specific portions of an email, (where
 
 	thread:<thread-id>
 
+	list:<list-id>
+
 	folder:<directory-path>
 
 	date:<since>..<until>
@@ -109,6 +111,12 @@ within a matching directory. Only the directory components below the
 top-level mail database path are available to be searched.
 
 The
+.BR list: ,
+is used to match mailing list ID of an email message \- contents of the
+List\-Id: header without the '<', '>' delimiters or decoded list
+description.
+
+The
 .B date:
 prefix can be used to restrict the results to only messages within a
 particular time range (based on the Date: header) with a range syntax
diff --git a/test/corpus/cur/18:2, b/test/corpus/cur/18:2,
index f522f69eb933..2b54925bd5d1 100644
--- a/test/corpus/cur/18:2,
+++ b/test/corpus/cur/18:2,
@@ -3,6 +3,7 @@ To: notmuch at notmuchmail.org
 Date: Tue, 17 Nov 2009 18:21:38 -0500
 Subject: [notmuch] archive
 Message-ID: <20091117232137.GA7669 at griffis1.net>
+List-Id: <test1.example.com>
 
 Just subscribed, I'd like to catch up on the previous postings,
 but the archive link seems to be bogus?
diff --git a/test/corpus/cur/51:2, b/test/corpus/cur/51:2,
index f522f69eb933..b155e6ee64a5 100644
--- a/test/corpus/cur/51:2,
+++ b/test/corpus/cur/51:2,
@@ -3,6 +3,7 @@ To: notmuch at notmuchmail.org
 Date: Tue, 17 Nov 2009 18:21:38 -0500
 Subject: [notmuch] archive
 Message-ID: <20091117232137.GA7669 at griffis1.net>
+List-Id: <test2.example.com>
 
 Just subscribed, I'd like to catch up on the previous postings,
 but the archive link seems to be bogus?
diff --git a/test/search b/test/search
index a7a0b18d2e48..bef42971226c 100755
--- a/test/search
+++ b/test/search
@@ -129,4 +129,28 @@ add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12
 output=$(notmuch search "bödý" | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)"
 
+test_begin_subtest "Search by List-Id"
+notmuch search list:notmuch.notmuchmail.org | notmuch_search_sanitize > OUTPUT
+cat <<EOF >EXPECTED
+thread:XXX   2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX   2009-11-18 [4/7] Lars Kellogg-Stedman, Mikhail Gusarov| Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+thread:XXX   2009-11-18 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX   2009-11-17 [1/3] Adrian Perez de Castro| Keith Packard, Carl Worth; [notmuch] Introducing myself (inbox signed unread)
+thread:XXX   2009-11-17 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "Search by List-Id, duplicated messages, step 1"
+notmuch search list:test1.example.com | notmuch_search_sanitize > OUTPUT
+cat <<EOF >EXPECTED
+thread:XXX   2009-11-17 [1/3] Aron Griffis| Keith Packard, Carl Worth; [notmuch] archive (inbox unread)
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "Search by List-Id, duplicated messages, step 2"
+notmuch search list:test2.example.com | notmuch_search_sanitize > OUTPUT
+cat <<EOF >EXPECTED
+thread:XXX   2009-11-17 [1/3] Aron Griffis| Keith Packard, Carl Worth; [notmuch] archive (inbox unread)
+EOF
+test_expect_equal_file OUTPUT EXPECTED
 test_done
diff --git a/test/test-lib.sh b/test/test-lib.sh
index d8e0d9115a69..981bde4a4004 100644
--- a/test/test-lib.sh
+++ b/test/test-lib.sh
@@ -576,9 +576,9 @@ test_expect_equal_json () {
     # The test suite forces LC_ALL=C, but this causes Python 3 to
     # decode stdin as ASCII.  We need to read JSON in UTF-8, so
     # override Python's stdio encoding defaults.
-    output=$(echo "$1" | PYTHONIOENCODING=utf-8 python -mjson.tool \
+    output=$(echo "$1" | PYTHONIOENCODING=utf-8 python2 -mjson.tool \
         || echo "$1")
-    expected=$(echo "$2" | PYTHONIOENCODING=utf-8 python -mjson.tool \
+    expected=$(echo "$2" | PYTHONIOENCODING=utf-8 python2 -mjson.tool \
         || echo "$2")
     shift 2
     test_expect_equal "$output" "$expected" "$@"
-- 
 Kirill A. Shutemov


More information about the notmuch mailing list