[Patch v5 1/6] dump: support gzipped output

Tomi Ollila tomi.ollila at iki.fi
Wed Apr 2 09:50:14 PDT 2014


On Wed, Apr 02 2014, David Bremner <david at tethera.net> wrote:

> The main goal is to support gzipped output for future internal
> calls (e.g. from notmuch-new) to notmuch_database_dump.
>
> The additional dependency is not very heavy since xapian already pulls
> in zlib.
> ---
>  INSTALL                   | 17 +++++++++++----
>  Makefile.local            |  2 +-
>  configure                 | 28 +++++++++++++++++++++---
>  doc/man1/notmuch-dump.rst |  3 +++
>  notmuch-client.h          |  4 +++-
>  notmuch-dump.c            | 54 ++++++++++++++++++++++++++++++-----------------
>  test/T240-dump-restore.sh | 12 +++++++++++
>  7 files changed, 92 insertions(+), 28 deletions(-)
>
> diff --git a/INSTALL b/INSTALL
> index 690b0ef..df318fa 100644
> --- a/INSTALL
> +++ b/INSTALL
> @@ -20,8 +20,8 @@ configure stage.
>  
>  Dependencies
>  ------------
> -Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and
> -Talloc which are each described below:
> +Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6,
> +Talloc, and zlib which are each described below:
>  
>  	Xapian
>  	------
> @@ -60,6 +60,15 @@ Talloc which are each described below:
>  
>  	Talloc is available from http://talloc.samba.org/
>  
> +	zlib
> +	----
> +
> +	zlib is an extremely popular compression library. It is used
> +	by Xapian, so if you installed that you will already have
> +	zlib. You may need to install the zlib headers separately.
> +
> +	zlib is available from http://zlib.net
> +
>  Building Documentation
>  ----------------------
>  
> @@ -79,11 +88,11 @@ dependencies with a simple simple command line. For example:
>  
>    For Debian and similar:
>  
> -        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx
> +        sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx
>  
>    For Fedora and similar:
>  
> -	sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx
> +	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx
>  
>  On other systems, a similar command can be used, but the details of
>  the package names may be different.
> diff --git a/Makefile.local b/Makefile.local
> index cb7b106..e5a20a7 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py
>  # Smash together user's values with our extra values
>  FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS)
>  FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS)
> -FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS)
> +FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS)
>  FINAL_NOTMUCH_LINKER = CC
>  ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1)
>  FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS)
> diff --git a/configure b/configure
> index 1d430b9..d685ab3 100755
> --- a/configure
> +++ b/configure
> @@ -340,6 +340,18 @@ else
>      errors=$((errors + 1))
>  fi
>  
> +printf "Checking for zlib development files... "
> +have_zlib=0
> +if pkg-config --exists zlib; then
> +    printf "Yes.\n"
> +    have_zlib=1
> +    zlib_cflags=$(pkg-config --cflags zlib)
> +    zlib_ldflags=$(pkg-config --libs zlib)
> +else
> +    printf "No.\n"
> +    errors=$((errors + 1))
> +fi

Like Austin mentioned on IRC, zlib 1.2.5.2 (Dec 2011) is the first version
supporting 'wT', the test above should use:

pkg-config --atleast-version=1.2.5.2 zlib 

And the related documentation should mention this version dependency...

(maybe we can go with this and somebody(tm) who wants support to older
zlibs may provide tolerable patches for that to be done).

For example Scientific Linux 6.2 (RHEL 6.2-compatible) has zlib 1.2.3 (released
2005). Maybe there are backports and maybe 6.3 has newer... for me it is
just easier to compile newer zlib :D)


Tomi

> +
>  printf "Checking for talloc development files... "
>  if pkg-config --exists talloc; then
>      printf "Yes.\n"
> @@ -496,6 +508,11 @@ EOF
>  	echo "	Xapian library (including development files such as headers)"
>  	echo "	http://xapian.org/"
>      fi
> +    if [ $have_zlib -eq 0 ]; then
> +	echo "	zlib library (including development files such as headers)"
> +	echo "	http://zlib.net/"
> +	echo
> +    fi
>      if [ $have_gmime -eq 0 ]; then
>  	echo "	Either GMime 2.4 library" $GMIME_24_VERSION_CTR "or GMime 2.6 library" $GMIME_26_VERSION_CTR
>  	echo "	(including development files such as headers)"
> @@ -519,11 +536,11 @@ case a simple command will install everything you need. For example:
>  
>  On Debian and similar systems:
>  
> -	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev
> +	sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev
>  
>  Or on Fedora and similar systems:
>  
> -	sudo yum install xapian-core-devel gmime-devel libtalloc-devel
> +	sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel
>  
>  On other systems, similar commands can be used, but the details of the
>  package names may be different.
> @@ -844,6 +861,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags}
>  GMIME_CFLAGS = ${gmime_cflags}
>  GMIME_LDFLAGS = ${gmime_ldflags}
>  
> +# Flags needed to compile and link against zlib
> +ZLIB_CFLAGS = ${zlib_cflags}
> +ZLIB_LDFLAGS = ${zlib_ldflags}
> +
>  # Flags needed to compile and link against talloc
>  TALLOC_CFLAGS = ${talloc_cflags}
>  TALLOC_LDFLAGS = ${talloc_ldflags}
> @@ -882,6 +903,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)      \\
>  		   -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
>  
>  CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
> +		     \$(ZLIB_CFLAGS)					 \\
>  		     \$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
>  		     \$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS)             \\
>  		     -DHAVE_STRCASESTR=\$(HAVE_STRCASESTR)               \\
> @@ -892,5 +914,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS)    \\
>  		     -DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT)       \\
>  		     -DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
>  
> -CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
> +CONFIGURE_LDFLAGS =  \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
>  EOF
> diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst
> index 17d1da5..d94cb4f 100644
> --- a/doc/man1/notmuch-dump.rst
> +++ b/doc/man1/notmuch-dump.rst
> @@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is
>  therefore the only critical thing to backup (and much more friendly to
>  incremental backup than the native database files.)
>  
> +``--gzip``
> +    Compress the output in a format compatible with **gzip(1)**.
> +
>  ``--format=(sup|batch-tag)``
>      Notmuch restore supports two plain text dump formats, both with one
>      message-id per line, followed by a list of tags.
> diff --git a/notmuch-client.h b/notmuch-client.h
> index d110648..e1efbe0 100644
> --- a/notmuch-client.h
> +++ b/notmuch-client.h
> @@ -450,7 +450,9 @@ typedef enum dump_formats {
>  int
>  notmuch_database_dump (notmuch_database_t *notmuch,
>  		       const char *output_file_name,
> -		       const char *query_str, dump_format_t output_format);
> +		       const char *query_str,
> +		       dump_format_t output_format,
> +		       notmuch_bool_t gzip_output);
>  
>  #include "command-line-arguments.h"
>  #endif
> diff --git a/notmuch-dump.c b/notmuch-dump.c
> index 21702d7..28342b7 100644
> --- a/notmuch-dump.c
> +++ b/notmuch-dump.c
> @@ -21,9 +21,11 @@
>  #include "notmuch-client.h"
>  #include "hex-escape.h"
>  #include "string-util.h"
> +#include <zlib.h>
> +
>  
>  static int
> -database_dump_file (notmuch_database_t *notmuch, FILE *output,
> +database_dump_file (notmuch_database_t *notmuch, gzFile output,
>  		    const char *query_str, int output_format)
>  {
>      notmuch_query_t *query;
> @@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  	}
>  
>  	if (output_format == DUMP_FORMAT_SUP) {
> -	    fprintf (output, "%s (", message_id);
> +	    gzprintf (output, "%s (", message_id);
>  	}
>  
>  	for (tags = notmuch_message_get_tags (message);
> @@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  	    const char *tag_str = notmuch_tags_get (tags);
>  
>  	    if (! first)
> -		fputs (" ", output);
> +		gzputs (output, " ");
>  
>  	    first = 0;
>  
>  	    if (output_format == DUMP_FORMAT_SUP) {
> -		fputs (tag_str, output);
> +		gzputs (output, tag_str);
>  	    } else {
>  		if (hex_encode (notmuch, tag_str,
>  				&buffer, &buffer_size) != HEX_SUCCESS) {
> @@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  			     tag_str);
>  		    return EXIT_FAILURE;
>  		}
> -		fprintf (output, "+%s", buffer);
> +		gzprintf (output, "+%s", buffer);
>  	    }
>  	}
>  
>  	if (output_format == DUMP_FORMAT_SUP) {
> -	    fputs (")\n", output);
> +	    gzputs (output, ")\n");
>  	} else {
>  	    if (make_boolean_term (notmuch, "id", message_id,
>  				   &buffer, &buffer_size)) {
> @@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  			     message_id, strerror (errno));
>  		    return EXIT_FAILURE;
>  	    }
> -	    fprintf (output, " -- %s\n", buffer);
> +	    gzprintf (output, " -- %s\n", buffer);
>  	}
>  
>  	notmuch_message_destroy (message);
> @@ -121,24 +123,36 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
>  int
>  notmuch_database_dump (notmuch_database_t *notmuch,
>  		       const char *output_file_name,
> -		       const char *query_str, dump_format_t output_format)
> +		       const char *query_str,
> +		       dump_format_t output_format,
> +		       notmuch_bool_t gzip_output)
>  {
> -    FILE *output = stdout;
> +    gzFile output;
> +    const char *mode = gzip_output ? "w9" : "wT";
> +
>      int ret;
>  
> -    if (output_file_name) {
> -	output = fopen (output_file_name, "w");
> -	if (output == NULL) {
> -	    fprintf (stderr, "Error opening %s for writing: %s\n",
> -		     output_file_name, strerror (errno));
> -	    return EXIT_FAILURE;
> -	}
> +    if (output_file_name)
> +	output = gzopen (output_file_name, mode);
> +    else
> +	output = gzdopen (fileno (stdout), mode);
> +
> +    if (output == NULL) {
> +	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
> +		 output_file_name ? output_file_name : "stdout", strerror (errno));
> +	return EXIT_FAILURE;
>      }
>  
>      ret = database_dump_file (notmuch, output, query_str, output_format);
>  
> -    if (output != stdout)
> -	fclose (output);
> +    if (gzflush (output, Z_FINISH)) {
> +	fprintf (stderr, "Error flushing output: %s\n",
> +		 gzerror (output, NULL));
> +	return EXIT_FAILURE;
> +    }
> +
> +    if (output_file_name)
> +	gzclose_w (output);
>  
>      return ret;
>  }
> @@ -158,6 +172,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>      int opt_index;
>  
>      int output_format = DUMP_FORMAT_BATCH_TAG;
> +    notmuch_bool_t gzip_output = 0;
>  
>      notmuch_opt_desc_t options[] = {
>  	{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
> @@ -165,6 +180,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>  				  { "batch-tag", DUMP_FORMAT_BATCH_TAG },
>  				  { 0, 0 } } },
>  	{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0  },
> +	{ NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 },
>  	{ 0, 0, 0, 0, 0 }
>      };
>  
> @@ -181,7 +197,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
>      }
>  
>      ret = notmuch_database_dump (notmuch, output_file_name, query_str,
> -				 output_format);
> +				 output_format, gzip_output);
>  
>      notmuch_database_destroy (notmuch);
>  
> diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
> index 0004438..d79aca8 100755
> --- a/test/T240-dump-restore.sh
> +++ b/test/T240-dump-restore.sh
> @@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --"
>  notmuch dump --output=dump-1-arg-dash.actual --
>  test_expect_equal_file dump.expected dump-1-arg-dash.actual
>  
> +# gzipped output
> +
> +test_begin_subtest "dump --gzip"
> +notmuch dump --gzip > dump-gzip.gz
> +gunzip dump-gzip.gz
> +test_expect_equal_file dump.expected dump-gzip
> +
> +test_begin_subtest "dump --gzip --output=outfile"
> +notmuch dump --gzip --output=dump-gzip-outfile.gz
> +gunzip dump-gzip-outfile.gz
> +test_expect_equal_file dump.expected dump-gzip-outfile
> +
>  # Note, we assume all messages from cworth have a message-id
>  # containing cworth.org
>  
> -- 
> 1.9.0


More information about the notmuch mailing list