[PATCH] test: add known broken test for multiple thread terms per message

David Bremner david at tethera.net
Fri Jul 27 01:37:20 PDT 2018


Having multiple thread terms on a message document seems to be the
underlying cause of some confusing results from notmuch search.

The presence of these multiple thread terms is presumably an indexing
bug, related to multiple files with the same message-id.

The files here are synthesized from a reproducer for the problems in
id:1523007700.l8xm6nm6af.naveen at linux.ibm.com. It isn't quite clear
this is the same issue (the symptoms using notmuch-search are a bit
different).
---
 test/.gitignore                         |  1 +
 test/Makefile.local                     |  7 ++++++-
 test/T720-database-schema.sh            | 17 +++++++++++++++++
 test/corpora/threading/mutant-ref/file1 |  9 +++++++++
 test/corpora/threading/mutant-ref/file2 |  9 +++++++++
 test/corpora/threading/mutant-ref/file3 |  9 +++++++++
 test/corpora/threading/mutant-ref/file4 |  7 +++++++
 test/corpora/threading/mutant-ref/file5 |  7 +++++++
 test/term-report.cc                     | 22 ++++++++++++++++++++++
 9 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100755 test/T720-database-schema.sh
 create mode 100644 test/corpora/threading/mutant-ref/file1
 create mode 100644 test/corpora/threading/mutant-ref/file2
 create mode 100644 test/corpora/threading/mutant-ref/file3
 create mode 100644 test/corpora/threading/mutant-ref/file4
 create mode 100644 test/corpora/threading/mutant-ref/file5
 create mode 100644 test/term-report.cc

diff --git a/test/.gitignore b/test/.gitignore
index 73fe7e24..71bbd7ed 100644
--- a/test/.gitignore
+++ b/test/.gitignore
@@ -8,4 +8,5 @@
 /make-db-version
 /test-results
 /ghost-report
+/term-report
 /tmp.*
diff --git a/test/Makefile.local b/test/Makefile.local
index 1cf09778..c39feace 100644
--- a/test/Makefile.local
+++ b/test/Makefile.local
@@ -44,6 +44,9 @@ $(dir)/make-db-version: $(dir)/make-db-version.o
 $(dir)/ghost-report: $(dir)/ghost-report.o
 	$(call quiet,CXX) $^ -o $@ $(LDFLAGS) $(XAPIAN_LDFLAGS)
 
+$(dir)/term-report: $(dir)/term-report.o
+	$(call quiet,CXX) $^ -o $@ $(LDFLAGS) $(XAPIAN_LDFLAGS)
+
 .PHONY: test check
 
 test_main_srcs=$(dir)/arg-test.c \
@@ -54,7 +57,9 @@ test_main_srcs=$(dir)/arg-test.c \
 	      $(dir)/symbol-test.cc \
 	      $(dir)/make-db-version.cc \
 	      $(dir)/ghost-report.cc \
-	      $(dir)/message-id-parse.c
+	      $(dir)/message-id-parse.c \
+	      $(dir)/term-report.cc
+
 
 test_srcs=$(test_main_srcs) $(dir)/database-test.c
 
diff --git a/test/T720-database-schema.sh b/test/T720-database-schema.sh
new file mode 100755
index 00000000..ddcd5cd1
--- /dev/null
+++ b/test/T720-database-schema.sh
@@ -0,0 +1,17 @@
+
+#!/usr/bin/env bash
+test_description="database schema in lib/database.cc"
+
+. $(dirname "$0")/test-lib.sh || exit 1
+
+add_email_corpus threading
+
+test_begin_subtest "every document has at most one thread term"
+test_subtest_known_broken
+${TEST_DIRECTORY}/term-report ${MAIL_DIR}/.notmuch/xapian | perl -ane 'pop(@F); printf "%d\n",scalar(grep { m/^G/ } @F);' | sort -u > OUTPUT
+cat <<EOF >> EXPECTED
+0
+1
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+test_done
diff --git a/test/corpora/threading/mutant-ref/file1 b/test/corpora/threading/mutant-ref/file1
new file mode 100644
index 00000000..97f8db58
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file1
@@ -0,0 +1,9 @@
+From: Alice <alice at example.org>
+To: Daniel <daniel at example.org>
+Subject: leaf message
+In-Reply-To: <mutant-ref-parent1 at example.org>
+References: <mutant-ref-parent1 at example.org>
+Message-ID: <mutant-ref-leaf at example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file2 b/test/corpora/threading/mutant-ref/file2
new file mode 100644
index 00000000..2b2ccd1d
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file2
@@ -0,0 +1,9 @@
+From: Alice <alice at example.org>
+To: Daniel <daniel at example.org>
+Subject: leaf message
+In-Reply-To: <mutant-ref-parent2 at example.org>
+References: <mutant-ref-parent2 at example.org>
+Message-ID: <mutant-ref-leaf at example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file3 b/test/corpora/threading/mutant-ref/file3
new file mode 100644
index 00000000..a8e705bc
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file3
@@ -0,0 +1,9 @@
+From: Alice <alice at example.org>
+To: Daniel <daniel at example.org>
+Subject: leaf message
+In-Reply-To: <mutant-ref-parent3 at example.org>
+References: <mutant-ref-parent3 at example.org>
+Message-ID: <mutant-ref-leaf at example.org>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file4 b/test/corpora/threading/mutant-ref/file4
new file mode 100644
index 00000000..3a0a5a13
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file4
@@ -0,0 +1,7 @@
+From: Daniel <daniel at example.org>
+To: Alice <alice at example.org>
+Subject: existing parent
+Message-ID: <mutant-ref-parent2 at example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/corpora/threading/mutant-ref/file5 b/test/corpora/threading/mutant-ref/file5
new file mode 100644
index 00000000..8f525d63
--- /dev/null
+++ b/test/corpora/threading/mutant-ref/file5
@@ -0,0 +1,7 @@
+From: Daniel <daniel at example.org>
+To: Alice <alice at example.org>
+Subject: existing parent
+Message-ID: <mutant-ref-parent3 at example.org>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+body
diff --git a/test/term-report.cc b/test/term-report.cc
new file mode 100644
index 00000000..88cd1bf5
--- /dev/null
+++ b/test/term-report.cc
@@ -0,0 +1,22 @@
+#include <iostream>
+#include <cstdlib>
+#include <xapian.h>
+
+int main(int argc, char **argv) {
+
+    if (argc < 2) {
+	std::cerr << "usage: term-report xapian-dir" << std::endl;
+	exit(1);
+    }
+
+    Xapian::Database db(argv[1]);
+    for (Xapian::docid id(1); id < db.get_lastdocid(); id++) {
+	std::cout << id;
+	for (Xapian::TermIterator iter = db.termlist_begin(id);
+	     iter != db.termlist_end(id);
+	     iter++) {
+	    std::cout << " " << *iter;
+	}
+	std::cout << std::endl;
+    }
+}
-- 
2.18.0



More information about the notmuch mailing list