[Patch v2 05/11] perf-test: cache unpacked corpus

david at tethera.net david at tethera.net
Wed Dec 5 19:01:42 PST 2012


From: David Bremner <bremner at debian.org>

Unpacking is not really the expensive step (compared to the initial
notmuch new), but this is a pre-requisite to caching the database.
---
 performance-test/.gitignore       |    1 +
 performance-test/Makefile.local   |    2 +-
 performance-test/perf-test-lib.sh |   51 ++++++++++++++++++++++++-------------
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/performance-test/.gitignore b/performance-test/.gitignore
index 53f2697..796ed01 100644
--- a/performance-test/.gitignore
+++ b/performance-test/.gitignore
@@ -1 +1,2 @@
 tmp.*/
+corpus/
diff --git a/performance-test/Makefile.local b/performance-test/Makefile.local
index 5d2acbd..63e4c3d 100644
--- a/performance-test/Makefile.local
+++ b/performance-test/Makefile.local
@@ -29,4 +29,4 @@ $(TXZFILE):
 download-corpus:
 	wget -O ${TXZFILE} ${DEFAULT_URL}
 
-CLEAN := $(CLEAN) $(dir)/tmp.*
+CLEAN := $(CLEAN) $(dir)/tmp.* $(dir)/corpus
diff --git a/performance-test/perf-test-lib.sh b/performance-test/perf-test-lib.sh
index 415bf15..0c10718 100644
--- a/performance-test/perf-test-lib.sh
+++ b/performance-test/perf-test-lib.sh
@@ -39,33 +39,50 @@ add_email_corpus ()
 {
     rm -rf ${MAIL_DIR}
 
-    case "$1" in
-	--small)
-	    arg="mail/enron/bailey-s"
+    case "$corpus_size" in
+	small)
+	    mail_subdir="mail/enron/bailey-s"
+	    check_for="${TEST_DIRECTORY}/corpus/$mail_subdir"
 	    ;;
-	--medium)
-	    arg="mail/notmuch-archive"
+	medium)
+	    mail_subdir="mail/notmuch-archive"
+	    check_for="${TEST_DIRECTORY}/corpus/$mail_subdir"
 	    ;;
 	*)
-	    arg=mail
+	    mail_subdir=mail
+	    check_for="${TEST_DIRECTORY}/corpus/$mail_subdir/enron/wolfe-j"
     esac
 
-    if command -v pixz > /dev/null; then
-	XZ=pixz
-    else
-	XZ=xz
+    MAIL_CORPUS="${TEST_DIRECTORY}/corpus/$mail_subdir"
+    args=()
+    if [ ! -d "$check_for" ] ; then
+	args+=("notmuch-email-corpus/$mail_subdir")
     fi
 
-    printf "Unpacking corpus\n"
-    tar --checkpoint=.5000 --extract --strip-components=1 \
-	--directory ${TMP_DIRECTORY} \
-	--use-compress-program ${XZ} \
-	--file ../download/notmuch-email-corpus-${PERFTEST_VERSION}.tar.xz \
-	notmuch-email-corpus/"$arg"
+    if [[ ${#args[@]} > 0 ]]; then
+	if command -v pixz > /dev/null; then
+	    XZ=pixz
+	else
+	    XZ=xz
+	fi
+
+	printf "Unpacking corpus\n"
+	mkdir -p "${TEST_DIRECTORY}/corpus"
+
+	tar --checkpoint=.5000 --extract --strip-components=1 \
+	    --directory ${TEST_DIRECTORY}/corpus \
+	    --use-compress-program ${XZ} \
+	    --file ../download/notmuch-email-corpus-${PERFTEST_VERSION}.tar.xz \
+	    "${args[@]}"
+
+	printf "\n"
 
-    printf "\n"
+    fi
+
+    cp -lr $MAIL_CORPUS $MAIL_DIR
 }
 
+
 print_header () {
     printf "[v%4s %6s]          Wall(s)\tUsr(s)\tSys(s)\tRes(K)\tIn/Out(512B)\n" \
 	   ${PERFTEST_VERSION} ${corpus_size}
-- 
1.7.10.4



More information about the notmuch mailing list