[PATCH] lib: Add a new prefix "list" to the search-terms syntax
Kirill A. Shutemov
kirill at shutemov.name
Tue Dec 17 10:03:22 PST 2013
On Thu, Oct 17, 2013 at 05:17:00PM +0300, Jani Nikula wrote:
> On Wed, 10 Apr 2013, "Alexey I. Froloff" <raorn at raorn.name> wrote:
> > From: "Alexey I. Froloff" <raorn at raorn.name>
> >
> > Add support for indexing and searching the message's List-Id header.
> > This is useful when matching all the messages belonging to a particular
> > mailing list.
>
> There's an issue with our duplicate message-id handling that is likely
> to cause confusion with List-Id: searches. If you receive several
> duplicates of the same message (judged by the message-id), only the
> first one of them gets indexed, and the rest are ignored. This means
> that for messages you receive both directly and through a list, it will
> be arbitrary whether the List-Id: gets indexed or not. Therefore a list:
> search might not return all the messages you'd expect.
I've tried to address this. The patch also adds few tests for the feature.
There's still missing functionality: re-indexing existing messages for
list-id, handling message removal, etc.
Any comments?
diff --git a/lib/database.cc b/lib/database.cc
index f395061e3a73..196243e15d1a 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -205,6 +205,7 @@ static prefix_t BOOLEAN_PREFIX_INTERNAL[] = {
};
static prefix_t BOOLEAN_PREFIX_EXTERNAL[] = {
+ { "list", "XLIST"},
{ "thread", "G" },
{ "tag", "K" },
{ "is", "K" },
@@ -2025,10 +2026,13 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
date = notmuch_message_file_get_header (message_file, "date");
_notmuch_message_set_header_values (message, date, from, subject);
- ret = _notmuch_message_index_file (message, filename);
+ ret = _notmuch_message_index_file (message, filename, false);
if (ret)
goto DONE;
} else {
+ ret = _notmuch_message_index_file (message, filename, true);
+ if (ret)
+ goto DONE;
ret = NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID;
}
diff --git a/lib/index.cc b/lib/index.cc
index 78c18cf36d10..9fe1ad6502ed 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -304,6 +304,47 @@ _index_address_list (notmuch_message_t *message,
}
}
+static void
+_index_list_id (notmuch_message_t *message,
+ const char *list_id_header)
+{
+ const char *begin_list_id, *end_list_id, *list_id;
+ void *local;
+
+ if (list_id_header == NULL)
+ return;
+
+ /* RFC2919 says that the list-id is found at the end of the header
+ * and enclosed between angle brackets. If we cannot find a
+ * matching pair of brackets containing at least one character,
+ * we ignore the list id header. */
+ begin_list_id = strrchr (list_id_header, '<');
+ if (!begin_list_id) {
+ fprintf (stderr, "Warning: Not indexing mailformed List-Id tag.\n");
+ return;
+ }
+
+ end_list_id = strrchr(begin_list_id, '>');
+ if (!end_list_id || (end_list_id - begin_list_id < 2)) {
+ fprintf (stderr, "Warning: Not indexing mailformed List-Id tag.\n");
+ return;
+ }
+
+ local = talloc_new (message);
+
+ /* We extract the list id between the angle brackets */
+ list_id = talloc_strndup (local, begin_list_id + 1,
+ end_list_id - begin_list_id - 1);
+
+ /* _notmuch_message_add_term() may return
+ * NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG here. We can't fix it, but
+ * this is not a reason to exit with error... */
+ if (_notmuch_message_add_term (message, "list", list_id))
+ fprintf (stderr, "Warning: Not indexing List-Id: <%s>\n", list_id);
+
+ talloc_free (local);
+}
+
/* Callback to generate terms for each mime part of a message. */
static void
_index_mime_part (notmuch_message_t *message,
@@ -425,14 +466,15 @@ _index_mime_part (notmuch_message_t *message,
notmuch_status_t
_notmuch_message_index_file (notmuch_message_t *message,
- const char *filename)
+ const char *filename,
+ notmuch_bool_t duplicate)
{
GMimeStream *stream = NULL;
GMimeParser *parser = NULL;
GMimeMessage *mime_message = NULL;
InternetAddressList *addresses;
FILE *file = NULL;
- const char *from, *subject;
+ const char *from, *subject, *list_id;
notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
static int initialized = 0;
char from_buf[5];
@@ -485,6 +527,9 @@ mboxes is deprecated and may be removed in the future.\n", filename);
from = g_mime_message_get_sender (mime_message);
+ if (duplicate)
+ goto DUP;
+
addresses = internet_address_list_parse_string (from);
if (addresses) {
_index_address_list (message, "from", addresses);
@@ -502,6 +547,10 @@ mboxes is deprecated and may be removed in the future.\n", filename);
_index_mime_part (message, g_mime_message_get_mime_part (mime_message));
+ DUP:
+ list_id = g_mime_object_get_header (GMIME_OBJECT (mime_message), "List-Id");
+ _index_list_id (message, list_id);
+
DONE:
if (mime_message)
g_object_unref (mime_message);
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index af185c7c5ba8..138dfa58efc8 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -322,7 +322,8 @@ notmuch_message_get_author (notmuch_message_t *message);
notmuch_status_t
_notmuch_message_index_file (notmuch_message_t *message,
- const char *filename);
+ const char *filename,
+ notmuch_bool_t duplicate);
/* message-file.c */
diff --git a/man/man7/notmuch-search-terms.7 b/man/man7/notmuch-search-terms.7
index f1627b3488f8..29b30b7b0b00 100644
--- a/man/man7/notmuch-search-terms.7
+++ b/man/man7/notmuch-search-terms.7
@@ -52,6 +52,8 @@ terms to match against specific portions of an email, (where
thread:<thread-id>
+ list:<list-id>
+
folder:<directory-path>
date:<since>..<until>
@@ -109,6 +111,12 @@ within a matching directory. Only the directory components below the
top-level mail database path are available to be searched.
The
+.BR list: ,
+is used to match mailing list ID of an email message \- contents of the
+List\-Id: header without the '<', '>' delimiters or decoded list
+description.
+
+The
.B date:
prefix can be used to restrict the results to only messages within a
particular time range (based on the Date: header) with a range syntax
diff --git a/test/corpus/cur/18:2, b/test/corpus/cur/18:2,
index f522f69eb933..2b54925bd5d1 100644
--- a/test/corpus/cur/18:2,
+++ b/test/corpus/cur/18:2,
@@ -3,6 +3,7 @@ To: notmuch at notmuchmail.org
Date: Tue, 17 Nov 2009 18:21:38 -0500
Subject: [notmuch] archive
Message-ID: <20091117232137.GA7669 at griffis1.net>
+List-Id: <test1.example.com>
Just subscribed, I'd like to catch up on the previous postings,
but the archive link seems to be bogus?
diff --git a/test/corpus/cur/51:2, b/test/corpus/cur/51:2,
index f522f69eb933..b155e6ee64a5 100644
--- a/test/corpus/cur/51:2,
+++ b/test/corpus/cur/51:2,
@@ -3,6 +3,7 @@ To: notmuch at notmuchmail.org
Date: Tue, 17 Nov 2009 18:21:38 -0500
Subject: [notmuch] archive
Message-ID: <20091117232137.GA7669 at griffis1.net>
+List-Id: <test2.example.com>
Just subscribed, I'd like to catch up on the previous postings,
but the archive link seems to be bogus?
diff --git a/test/search b/test/search
index a7a0b18d2e48..bef42971226c 100755
--- a/test/search
+++ b/test/search
@@ -129,4 +129,28 @@ add_message '[subject]="utf8-message-body-subject"' '[date]="Sat, 01 Jan 2000 12
output=$(notmuch search "bödý" | notmuch_search_sanitize)
test_expect_equal "$output" "thread:XXX 2000-01-01 [1/1] Notmuch Test Suite; utf8-message-body-subject (inbox unread)"
+test_begin_subtest "Search by List-Id"
+notmuch search list:notmuch.notmuchmail.org | notmuch_search_sanitize > OUTPUT
+cat <<EOF >EXPECTED
+thread:XXX 2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX 2009-11-18 [4/7] Lars Kellogg-Stedman, Mikhail Gusarov| Keith Packard, Carl Worth; [notmuch] Working with Maildir storage? (inbox signed unread)
+thread:XXX 2009-11-18 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX 2009-11-17 [1/3] Adrian Perez de Castro| Keith Packard, Carl Worth; [notmuch] Introducing myself (inbox signed unread)
+thread:XXX 2009-11-17 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "Search by List-Id, duplicated messages, step 1"
+notmuch search list:test1.example.com | notmuch_search_sanitize > OUTPUT
+cat <<EOF >EXPECTED
+thread:XXX 2009-11-17 [1/3] Aron Griffis| Keith Packard, Carl Worth; [notmuch] archive (inbox unread)
+EOF
+test_expect_equal_file OUTPUT EXPECTED
+
+test_begin_subtest "Search by List-Id, duplicated messages, step 2"
+notmuch search list:test2.example.com | notmuch_search_sanitize > OUTPUT
+cat <<EOF >EXPECTED
+thread:XXX 2009-11-17 [1/3] Aron Griffis| Keith Packard, Carl Worth; [notmuch] archive (inbox unread)
+EOF
+test_expect_equal_file OUTPUT EXPECTED
test_done
diff --git a/test/test-lib.sh b/test/test-lib.sh
index d8e0d9115a69..981bde4a4004 100644
--- a/test/test-lib.sh
+++ b/test/test-lib.sh
@@ -576,9 +576,9 @@ test_expect_equal_json () {
# The test suite forces LC_ALL=C, but this causes Python 3 to
# decode stdin as ASCII. We need to read JSON in UTF-8, so
# override Python's stdio encoding defaults.
- output=$(echo "$1" | PYTHONIOENCODING=utf-8 python -mjson.tool \
+ output=$(echo "$1" | PYTHONIOENCODING=utf-8 python2 -mjson.tool \
|| echo "$1")
- expected=$(echo "$2" | PYTHONIOENCODING=utf-8 python -mjson.tool \
+ expected=$(echo "$2" | PYTHONIOENCODING=utf-8 python2 -mjson.tool \
|| echo "$2")
shift 2
test_expect_equal "$output" "$expected" "$@"
--
Kirill A. Shutemov
More information about the notmuch
mailing list