[Patch v5 1/6] dump: support gzipped output

David Bremner david at tethera.net
Tue Apr 1 18:16:16 PDT 2014


The main goal is to support gzipped output for future internal
calls (e.g. from notmuch-new) to notmuch_database_dump.

The additional dependency is not very heavy since xapian already pulls
in zlib.
---
 INSTALL                   | 17 +++++++++++----
 Makefile.local            |  2 +-
 configure                 | 28 +++++++++++++++++++++---
 doc/man1/notmuch-dump.rst |  3 +++
 notmuch-client.h          |  4 +++-
 notmuch-dump.c            | 54 ++++++++++++++++++++++++++++++-----------------
 test/T240-dump-restore.sh | 12 +++++++++++
 7 files changed, 92 insertions(+), 28 deletions(-)

diff --git a/INSTALL b/INSTALL
index 690b0ef..df318fa 100644
--- a/INSTALL
+++ b/INSTALL
@@ -20,8 +20,8 @@ configure stage.
 
 Dependencies
 ------------
-Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and
-Talloc which are each described below:
+Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6,
+Talloc, and zlib which are each described below:
 
 	Xapian
 	------
@@ -60,6 +60,15 @@ Talloc which are each described below:
 
 	Talloc is available from http://talloc.samba.org/
 
+	zlib
+	----
+
+	zlib is an extremely popular compression library. It is used
+	by Xapian, so if you installed that you will already have
+	zlib. You may need to install the zlib headers separately.
+
+	zlib is available from http://zlib.net
+
 Building Documentation
 ----------------------
 
@@ -79,11 +88,11 @@ dependencies with a simple simple command line. For example:
 
   For Debian and similar:
 
-        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx
+        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx
 
   For Fedora and similar:
 
-	sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx
+	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx
 
 On other systems, a similar command can be used, but the details of
 the package names may be different.
diff --git a/Makefile.local b/Makefile.local
index cb7b106..e5a20a7 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py
 # Smash together user's values with our extra values
 FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS)
 FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS)
-FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS)
+FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS)
 FINAL_NOTMUCH_LINKER = CC
 ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1)
 FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS)
diff --git a/configure b/configure
index 1d430b9..d685ab3 100755
--- a/configure
+++ b/configure
@@ -340,6 +340,18 @@ else
     errors=$((errors + 1))
 fi
 
+printf "Checking for zlib development files... "
+have_zlib=0
+if pkg-config --exists zlib; then
+    printf "Yes.\n"
+    have_zlib=1
+    zlib_cflags=$(pkg-config --cflags zlib)
+    zlib_ldflags=$(pkg-config --libs zlib)
+else
+    printf "No.\n"
+    errors=$((errors + 1))
+fi
+
 printf "Checking for talloc development files... "
 if pkg-config --exists talloc; then
     printf "Yes.\n"
@@ -496,6 +508,11 @@ EOF
 	echo "	Xapian library (including development files such as headers)"
 	echo "	http://xapian.org/"
     fi
+    if [ $have_zlib -eq 0 ]; then
+	echo "	zlib library (including development files such as headers)"
+	echo "	http://zlib.net/"
+	echo
+    fi
     if [ $have_gmime -eq 0 ]; then
 	echo "	Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR
 	echo "	(including development files such as headers)"
@@ -519,11 +536,11 @@ case a simple command will install everything you need. For example:
 
 On Debian and similar systems:
 
-	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev
+	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev
 
 Or on Fedora and similar systems:
 
-	sudo yum install xapian-core-devel gmime-devel libtalloc-devel
+	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel
 
 On other systems, similar commands can be used, but the details of the
 package names may be different.
@@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags}
 GMIME_CFLAGS = ${gmime_cflags}
 GMIME_LDFLAGS = ${gmime_ldflags}
 
+# Flags needed to compile and link against zlib
+ZLIB_CFLAGS = ${zlib_cflags}
+ZLIB_LDFLAGS = ${zlib_ldflags}
+
 # Flags needed to compile and link against talloc
 TALLOC_CFLAGS = ${talloc_cflags}
 TALLOC_LDFLAGS = ${talloc_ldflags}
@@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)      \\
 		   -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
 
 CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
+		     \$(ZLIB_CFLAGS)					 \\
 		     \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
 		     \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS)             \\
 		     -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)               \\
@@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
 		     -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)       \\
 		     -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
 
-CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
+CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
 EOF
diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst
index 17d1da5..d94cb4f 100644
--- a/doc/man1/notmuch-dump.rst
+++ b/doc/man1/notmuch-dump.rst
@@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is
 therefore the only critical thing to backup (and much more friendly to
 incremental backup than the native database files.)
 
+``--gzip``
+    Compress the output in a format compatible with **gzip(1)**.
+
 ``--format=(sup|batch-tag)``
     Notmuch restore supports two plain text dump formats, both with one
     message-id per line, followed by a list of tags.
diff --git a/notmuch-client.h b/notmuch-client.h
index d110648..e1efbe0 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -450,7 +450,9 @@ typedef enum dump_formats {
 int
 notmuch_database_dump (notmuch_database_t *notmuch,
 		       const char *output_file_name,
-		       const char *query_str, dump_format_t output_format);
+		       const char *query_str,
+		       dump_format_t output_format,
+		       notmuch_bool_t gzip_output);
 
 #include "command-line-arguments.h"
 #endif
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 21702d7..28342b7 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -21,9 +21,11 @@
 #include "notmuch-client.h"
 #include "hex-escape.h"
 #include "string-util.h"
+#include <zlib.h>
+
 
 static int
-database_dump_file (notmuch_database_t *notmuch, FILE *output,
+database_dump_file (notmuch_database_t *notmuch, gzFile output,
 		    const char *query_str, int output_format)
 {
     notmuch_query_t *query;
@@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 	}
 
 	if (output_format == DUMP_FORMAT_SUP) {
-	    fprintf (output, "%s (", message_id);
+	    gzprintf (output, "%s (", message_id);
 	}
 
 	for (tags = notmuch_message_get_tags (message);
@@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 	    const char *tag_str = notmuch_tags_get (tags);
 
 	    if (! first)
-		fputs (" ", output);
+		gzputs (output, " ");
 
 	    first = 0;
 
 	    if (output_format == DUMP_FORMAT_SUP) {
-		fputs (tag_str, output);
+		gzputs (output, tag_str);
 	    } else {
 		if (hex_encode (notmuch, tag_str,
 				&buffer, &buffer_size) != HEX_SUCCESS) {
@@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 			     tag_str);
 		    return EXIT_FAILURE;
 		}
-		fprintf (output, "+%s", buffer);
+		gzprintf (output, "+%s", buffer);
 	    }
 	}
 
 	if (output_format == DUMP_FORMAT_SUP) {
-	    fputs (")\n", output);
+	    gzputs (output, ")\n");
 	} else {
 	    if (make_boolean_term (notmuch, "id", message_id,
 				   &buffer, &buffer_size)) {
@@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 			     message_id, strerror (errno));
 		    return EXIT_FAILURE;
 	    }
-	    fprintf (output, " -- %s\n", buffer);
+	    gzprintf (output, " -- %s\n", buffer);
 	}
 
 	notmuch_message_destroy (message);
@@ -121,24 +123,36 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
 int
 notmuch_database_dump (notmuch_database_t *notmuch,
 		       const char *output_file_name,
-		       const char *query_str, dump_format_t output_format)
+		       const char *query_str,
+		       dump_format_t output_format,
+		       notmuch_bool_t gzip_output)
 {
-    FILE *output = stdout;
+    gzFile output;
+    const char *mode = gzip_output ? "w9" : "wT";
+
     int ret;
 
-    if (output_file_name) {
-	output = fopen (output_file_name, "w");
-	if (output == NULL) {
-	    fprintf (stderr, "Error opening %s for writing: %s\n",
-		     output_file_name, strerror (errno));
-	    return EXIT_FAILURE;
-	}
+    if (output_file_name)
+	output = gzopen (output_file_name, mode);
+    else
+	output = gzdopen (fileno (stdout), mode);
+
+    if (output == NULL) {
+	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
+		 output_file_name ? output_file_name : "stdout", strerror (errno));
+	return EXIT_FAILURE;
     }
 
     ret = database_dump_file (notmuch, output, query_str, output_format);
 
-    if (output != stdout)
-	fclose (output);
+    if (gzflush (output, Z_FINISH)) {
+	fprintf (stderr, "Error flushing output: %s\n",
+		 gzerror (output, NULL));
+	return EXIT_FAILURE;
+    }
+
+    if (output_file_name)
+	gzclose_w (output);
 
     return ret;
 }
@@ -158,6 +172,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
     int opt_index;
 
     int output_format = DUMP_FORMAT_BATCH_TAG;
+    notmuch_bool_t gzip_output = 0;
 
     notmuch_opt_desc_t options[] = {
 	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
@@ -165,6 +180,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
 				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
 				  { 0, 0 } } },
 	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
+	{ NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 },
 	{ 0, 0, 0, 0, 0 }
     };
 
@@ -181,7 +197,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
     }
 
     ret = notmuch_database_dump (notmuch, output_file_name, query_str,
-				 output_format);
+				 output_format, gzip_output);
 
     notmuch_database_destroy (notmuch);
 
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 0004438..d79aca8 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --"
 notmuch dump --output=dump-1-arg-dash.actual --
 test_expect_equal_file dump.expected dump-1-arg-dash.actual
 
+# gzipped output
+
+test_begin_subtest "dump --gzip"
+notmuch dump --gzip > dump-gzip.gz
+gunzip dump-gzip.gz
+test_expect_equal_file dump.expected dump-gzip
+
+test_begin_subtest "dump --gzip --output=outfile"
+notmuch dump --gzip --output=dump-gzip-outfile.gz
+gunzip dump-gzip-outfile.gz
+test_expect_equal_file dump.expected dump-gzip-outfile
+
 # Note, we assume all messages from cworth have a message-id
 # containing cworth.org
 
-- 
1.9.0



More information about the notmuch mailing list