[PATCH] perf-test: bump version to 0.4, use manifest files

David Bremner david at tethera.net
Sat Nov 30 19:00:03 PST 2013


The new revision of the performance test includes manifests for each corpus,
so update the support library to use these manifests at the same time.
---

Jani's message
       
       id:d74499f1e462755676edf9aa6ab689ba47fa2471.1385825425.git.jani at nikula.org

reminded me I had an update of the performance test suite in the
works.  The new suite is available from

    http://tesseract.cs.unb.ca/notmuch/notmuch-email-corpus-0.4.tar.xz
    http://tesseract.cs.unb.ca/notmuch/notmuch-email-corpus-0.4.tar.xz.asc

I haven't yet uploaded it to notmuchmail.org because of disk space concerns.

 .../download/notmuch-email-corpus-0.4.tar.xz.asc   | 14 +++++
 performance-test/perf-test-lib.sh                  | 72 ++++++++++++++--------
 performance-test/version.sh                        |  2 +-
 3 files changed, 60 insertions(+), 28 deletions(-)
 create mode 100644 performance-test/download/notmuch-email-corpus-0.4.tar.xz.asc

diff --git a/performance-test/download/notmuch-email-corpus-0.4.tar.xz.asc b/performance-test/download/notmuch-email-corpus-0.4.tar.xz.asc
new file mode 100644
index 0000000..72dedd8
--- /dev/null
+++ b/performance-test/download/notmuch-email-corpus-0.4.tar.xz.asc
@@ -0,0 +1,14 @@
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.15 (GNU/Linux)
+
+iQGcBAABCAAGBQJSdaDkAAoJEPIClx2kp54sQ54L/ikkvF1fy88hjLitN59v6g2J
+vw85YNRifNHyp/UXI6nt2eXFzyWJiRHuvHFoBgmEsJVxauOKw61Gs2zd53x9Ear4
+MGcQWyiM1cnwX/nD7GvxRQNh33f+FEamTjg+QhG47K0A2YdLWcDC7r9GMatGT11x
+5KE24WQGOqtgQn/9qNtJvkiKIehpRiDTaW/QJ7mTCYeJFjIHJUY8dxyfiTtkJ0z7
+cJ6omehvWSw4STbEg65XJgqykxMdltNEavfvSbAT73FgmkkyXxul0s5hDZ/esd0n
+re3dyDxGt085POiAgPti05a4tJI5EQC2wLBUFri0s2JdMtazcD6yVuHNbVzZ4Do3
+nL/sgwKGUq5wRrPqPWp6HXtZ9zG+/V7hFNrr/l42qGrLqsSh0bqvEnUiwczZLBGy
+NEs4G8VjmfS2cMKePsWaekBAvFUtb47PSB6JIPwpCNvKXDrcCb28eOQVB2atgj1h
+9SktOtWYJhWIQp2YW9iae30Z6lhCcdPRRHTFMQq2nQ==
+=eSMY
+-----END PGP SIGNATURE-----
diff --git a/performance-test/perf-test-lib.sh b/performance-test/perf-test-lib.sh
index 9ee7661..44708cf 100644
--- a/performance-test/perf-test-lib.sh
+++ b/performance-test/perf-test-lib.sh
@@ -41,52 +41,70 @@ add_email_corpus ()
 {
     rm -rf ${MAIL_DIR}
 
-    case "$corpus_size" in
-	small)
-	    mail_subdir="mail/enron/bailey-s"
-	    check_for="${TEST_DIRECTORY}/corpus/$mail_subdir"
-	    ;;
-	medium)
-	    mail_subdir="mail/notmuch-archive"
-	    check_for="${TEST_DIRECTORY}/corpus/$mail_subdir"
-	    ;;
-	*)
-	    mail_subdir=mail
-	    check_for="${TEST_DIRECTORY}/corpus/$mail_subdir/enron/wolfe-j"
-    esac
+    CORPUS_DIR=${TEST_DIRECTORY}/corpus
+    mkdir -p "${CORPUS_DIR}"
 
-    MAIL_CORPUS="${TEST_DIRECTORY}/corpus/$mail_subdir"
-    TAG_CORPUS="${TEST_DIRECTORY}/corpus/tags"
+    MAIL_CORPUS="${CORPUS_DIR}/mail.${corpus_size}"
+    TAG_CORPUS="${CORPUS_DIR}/tags"
 
-    args=()
-    if [ ! -d "$TAG_CORPUS" ] ; then
-	args+=("notmuch-email-corpus/tags")
+    if command -v pixz > /dev/null; then
+	XZ=pixz
+    else
+	XZ=xz
     fi
 
-    if [ ! -d "$check_for" ] ; then
-	args+=("notmuch-email-corpus/$mail_subdir")
+    if [ ! -d "${CORPUS_DIR}/manifest" ]; then
+
+	printf "Unpacking manifests\n"
+	tar --extract --use-compress-program ${XZ} --strip-components=1 \
+	    --directory ${TEST_DIRECTORY}/corpus \
+	    --wildcards --file ../download/notmuch-email-corpus-${PERFTEST_VERSION}.tar.xz \
+	    'notmuch-email-corpus/manifest/*'
     fi
 
-    if [[ ${#args[@]} > 0 ]]; then
-	if command -v pixz > /dev/null; then
-	    XZ=pixz
+    file_list=$(mktemp file_listXXXXXX)
+    if [ ! -d "$TAG_CORPUS" ] ; then
+	echo "notmuch-email-corpus/tags" >> $file_list
+    fi
+
+    if [ ! -d "$MAIL_CORPUS" ] ; then
+	if [[ "$corpus_size" != "large" ]]; then
+	    sed s,^,notmuch-email-corpus/, < \
+		${TEST_DIRECTORY}/corpus/manifest/MANIFEST.${corpus_size} >> $file_list
 	else
-	    XZ=xz
+	    echo "notmuch-email-corpus/mail" >> $file_list
 	fi
+    fi
 
-	printf "Unpacking corpus\n"
-	mkdir -p "${TEST_DIRECTORY}/corpus"
+    if [[ -s $file_list ]]; then
 
+	printf "Unpacking corpus\n"
 	tar --checkpoint=.5000 --extract --strip-components=1 \
 	    --directory ${TEST_DIRECTORY}/corpus \
 	    --use-compress-program ${XZ} \
 	    --file ../download/notmuch-email-corpus-${PERFTEST_VERSION}.tar.xz \
-	    "${args[@]}"
+	    --anchored --recursion \
+	    --files-from $file_list
 
 	printf "\n"
 
+	if [[ ! -d ${MAIL_CORPUS} ]]; then
+	    printf "creating link farm\n"
+
+	    if [[ "$corpus_size" = large ]]; then
+		cp -rl ${TEST_DIRECTORY}/corpus/mail ${MAIL_CORPUS}
+	    else
+		while read -r file; do
+		    tdir=${MAIL_CORPUS}/$(dirname $file)
+		    mkdir -p $tdir
+		    ln ${TEST_DIRECTORY}/corpus/$file $tdir
+		done <${TEST_DIRECTORY}/corpus/manifest/MANIFEST.${corpus_size}
+	    fi
+	fi
+
     fi
 
+    rm $file_list
     cp -lr $TAG_CORPUS $TMP_DIRECTORY/corpus.tags
     cp -lr $MAIL_CORPUS $MAIL_DIR
 }
diff --git a/performance-test/version.sh b/performance-test/version.sh
index afafc73..f02527a 100644
--- a/performance-test/version.sh
+++ b/performance-test/version.sh
@@ -1,3 +1,3 @@
 # this should be both a valid Makefile fragment and valid POSIX(ish) shell.
 
-PERFTEST_VERSION=0.3
+PERFTEST_VERSION=0.4
-- 
1.8.4.2



More information about the notmuch mailing list