[PATCH] dump: support gzipped output
David Bremner
david at tethera.net
Sat Mar 29 06:46:25 PDT 2014
The main goal is to support gzipped output for future internal
calls (e.g. from notmuch-new) to notmuch_database_dump.
The additional dependency is not very heavy since xapian already pulls
in zlib.
---
I had a quick look at supporting gzipped input for restore; I think it
just requires an implementation of getline that uses gzgetc or gzgets;
the decompression can be completely transparent to the user based on
magic number.
INSTALL | 17 ++++++++++----
Makefile.local | 2 +-
configure | 23 ++++++++++++++++---
doc/man1/notmuch-dump.rst | 3 +++
notmuch-client.h | 4 +++-
notmuch-dump.c | 57 ++++++++++++++++++++++++++++++-----------------
test/T240-dump-restore.sh | 12 ++++++++++
7 files changed, 89 insertions(+), 29 deletions(-)
diff --git a/INSTALL b/INSTALL
index 690b0ef..2754e52 100644
--- a/INSTALL
+++ b/INSTALL
@@ -20,8 +20,8 @@ configure stage.
Dependencies
------------
-Notmuch depends on three libraries: Xapian, GMime 2.4 or 2.6, and
-Talloc which are each described below:
+Notmuch depends on four libraries: Xapian, GMime 2.4 or 2.6,
+Talloc, and zlib which are each described below:
Xapian
------
@@ -60,6 +60,15 @@ Talloc which are each described below:
Talloc is available from http://talloc.samba.org/
+ zlib
+ ----
+
+ zlib is an extremely popular compression library. It is used
+ by Xapian, so if you installed that you will already have
+ zlib. You may need to install the zlib headers seperately.
+
+ zlib is available from http://zlib.net
+
Building Documentation
----------------------
@@ -79,11 +88,11 @@ dependencies with a simple simple command line. For example:
For Debian and similar:
- sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev python-sphinx
+ sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev python-sphinx
For Fedora and similar:
- sudo yum install xapian-core-devel gmime-devel libtalloc-devel python-sphinx
+ sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel python-sphinx
On other systems, a similar command can be used, but the details of
the package names may be different.
diff --git a/Makefile.local b/Makefile.local
index cb7b106..e5a20a7 100644
--- a/Makefile.local
+++ b/Makefile.local
@@ -41,7 +41,7 @@ PV_FILE=bindings/python/notmuch/version.py
# Smash together user's values with our extra values
FINAL_CFLAGS = -DNOTMUCH_VERSION=$(VERSION) $(CPPFLAGS) $(CFLAGS) $(WARN_CFLAGS) $(extra_cflags) $(CONFIGURE_CFLAGS)
FINAL_CXXFLAGS = $(CPPFLAGS) $(CXXFLAGS) $(WARN_CXXFLAGS) $(extra_cflags) $(extra_cxxflags) $(CONFIGURE_CXXFLAGS)
-FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS)
+FINAL_NOTMUCH_LDFLAGS = $(LDFLAGS) -Lutil -lutil -Llib -lnotmuch $(AS_NEEDED_LDFLAGS) $(GMIME_LDFLAGS) $(TALLOC_LDFLAGS) $(ZLIB_LDFLAGS)
FINAL_NOTMUCH_LINKER = CC
ifneq ($(LINKER_RESOLVES_LIBRARY_DEPENDENCIES),1)
FINAL_NOTMUCH_LDFLAGS += $(CONFIGURE_LDFLAGS)
diff --git a/configure b/configure
index 1d430b9..89bb3f3 100755
--- a/configure
+++ b/configure
@@ -340,6 +340,18 @@ else
errors=$((errors + 1))
fi
+printf "Checking for zlib development files... "
+have_zlib=0
+if pkg-config --exists zlib; then
+ printf "Yes.\n"
+ have_zlib=1
+ zlib_cflags=$(pkg-config --cflags zlib)
+ zlib_ldflags=$(pkg-config --libs zlib)
+else
+ printf "No.\n"
+ errors=$((errors + 1))
+fi
+
printf "Checking for talloc development files... "
if pkg-config --exists talloc; then
printf "Yes.\n"
@@ -519,11 +531,11 @@ case a simple command will install everything you need. For example:
On Debian and similar systems:
- sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev
+ sudo apt-get install libxapian-dev libgmime-2.6-dev libtalloc-dev zlib1g-dev
Or on Fedora and similar systems:
- sudo yum install xapian-core-devel gmime-devel libtalloc-devel
+ sudo yum install xapian-core-devel gmime-devel libtalloc-devel zlib-devel
On other systems, similar commands can be used, but the details of the
package names may be different.
@@ -844,6 +856,10 @@ XAPIAN_LDFLAGS = ${xapian_ldflags}
GMIME_CFLAGS = ${gmime_cflags}
GMIME_LDFLAGS = ${gmime_ldflags}
+# Flags needed to compile and link against zlib
+ZLIB_CFLAGS = ${zlib_cflags}
+ZLIB_LDFLAGS = ${zlib_ldflags}
+
# Flags needed to compile and link against talloc
TALLOC_CFLAGS = ${talloc_cflags}
TALLOC_LDFLAGS = ${talloc_ldflags}
@@ -882,6 +898,7 @@ CONFIGURE_CFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\
-DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\
+ \$(ZLIB_CFLAGS) \\
\$(TALLOC_CFLAGS) -DHAVE_VALGRIND=\$(HAVE_VALGRIND) \\
\$(VALGRIND_CFLAGS) \$(XAPIAN_CXXFLAGS) \\
-DHAVE_STRCASESTR=\$(HAVE_STRCASESTR) \\
@@ -892,5 +909,5 @@ CONFIGURE_CXXFLAGS = -DHAVE_GETLINE=\$(HAVE_GETLINE) \$(GMIME_CFLAGS) \\
-DHAVE_XAPIAN_COMPACT=\$(HAVE_XAPIAN_COMPACT) \\
-DUTIL_BYTE_ORDER=\$(UTIL_BYTE_ORDER)
-CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(XAPIAN_LDFLAGS)
+CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) \$(XAPIAN_LDFLAGS)
EOF
diff --git a/doc/man1/notmuch-dump.rst b/doc/man1/notmuch-dump.rst
index 17d1da5..d94cb4f 100644
--- a/doc/man1/notmuch-dump.rst
+++ b/doc/man1/notmuch-dump.rst
@@ -19,6 +19,9 @@ recreated from the messages themselves. The output of notmuch dump is
therefore the only critical thing to backup (and much more friendly to
incremental backup than the native database files.)
+``--gzip``
+ Compress the output in a format compatible with **gzip(1)**.
+
``--format=(sup|batch-tag)``
Notmuch restore supports two plain text dump formats, both with one
message-id per line, followed by a list of tags.
diff --git a/notmuch-client.h b/notmuch-client.h
index d110648..e1efbe0 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -450,7 +450,9 @@ typedef enum dump_formats {
int
notmuch_database_dump (notmuch_database_t *notmuch,
const char *output_file_name,
- const char *query_str, dump_format_t output_format);
+ const char *query_str,
+ dump_format_t output_format,
+ notmuch_bool_t gzip_output);
#include "command-line-arguments.h"
#endif
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 21702d7..128a37d 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -21,10 +21,12 @@
#include "notmuch-client.h"
#include "hex-escape.h"
#include "string-util.h"
+#include <zlib.h>
+
static int
-database_dump_file (notmuch_database_t *notmuch, FILE *output,
- const char *query_str, int output_format)
+database_dump_file (notmuch_database_t *notmuch, gzFile output,
+ const char *query_str, int output_format)
{
notmuch_query_t *query;
notmuch_messages_t *messages;
@@ -69,7 +71,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
}
if (output_format == DUMP_FORMAT_SUP) {
- fprintf (output, "%s (", message_id);
+ gzprintf (output, "%s (", message_id);
}
for (tags = notmuch_message_get_tags (message);
@@ -78,12 +80,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
const char *tag_str = notmuch_tags_get (tags);
if (! first)
- fputs (" ", output);
+ gzputs (output, " ");
first = 0;
if (output_format == DUMP_FORMAT_SUP) {
- fputs (tag_str, output);
+ gzputs (output, tag_str);
} else {
if (hex_encode (notmuch, tag_str,
&buffer, &buffer_size) != HEX_SUCCESS) {
@@ -91,12 +93,12 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
tag_str);
return EXIT_FAILURE;
}
- fprintf (output, "+%s", buffer);
+ gzprintf (output, "+%s", buffer);
}
}
if (output_format == DUMP_FORMAT_SUP) {
- fputs (")\n", output);
+ gzputs (output, ")\n");
} else {
if (make_boolean_term (notmuch, "id", message_id,
&buffer, &buffer_size)) {
@@ -104,7 +106,7 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
message_id, strerror (errno));
return EXIT_FAILURE;
}
- fprintf (output, " -- %s\n", buffer);
+ gzprintf (output, " -- %s\n", buffer);
}
notmuch_message_destroy (message);
@@ -121,24 +123,37 @@ database_dump_file (notmuch_database_t *notmuch, FILE *output,
int
notmuch_database_dump (notmuch_database_t *notmuch,
const char *output_file_name,
- const char *query_str, dump_format_t output_format)
+ const char *query_str,
+ dump_format_t output_format,
+ notmuch_bool_t gzip_output)
{
- FILE *output = stdout;
+ gzFile output;
+ const char *mode = gzip_output ? "w9" : "wT";
+
int ret;
- if (output_file_name) {
- output = fopen (output_file_name, "w");
- if (output == NULL) {
- fprintf (stderr, "Error opening %s for writing: %s\n",
- output_file_name, strerror (errno));
- return EXIT_FAILURE;
- }
+ if (output_file_name)
+ output = gzopen (output_file_name, mode);
+ else
+ output = gzdopen (fileno (stdout), mode);
+
+ if (output == NULL) {
+ fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
+ output_file_name || "stdout", strerror (errno));
+ return EXIT_FAILURE;
}
ret = database_dump_file (notmuch, output, query_str, output_format);
- if (output != stdout)
- fclose (output);
+ /* unlike stdio, zlib needs explicit flushing */
+ if (gzflush (output, Z_FINISH)) {
+ fprintf (stderr, "Error flushing output: %s\n",
+ gzerror (output, NULL));
+ return EXIT_FAILURE;
+ }
+
+ if (output_file_name)
+ gzclose_w (output);
return ret;
}
@@ -158,6 +173,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
int opt_index;
int output_format = DUMP_FORMAT_BATCH_TAG;
+ notmuch_bool_t gzip_output = 0;
notmuch_opt_desc_t options[] = {
{ NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f',
@@ -165,6 +181,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
{ "batch-tag", DUMP_FORMAT_BATCH_TAG },
{ 0, 0 } } },
{ NOTMUCH_OPT_STRING, &output_file_name, "output", 'o', 0 },
+ { NOTMUCH_OPT_BOOLEAN, &gzip_output, "gzip", 'z', 0 },
{ 0, 0, 0, 0, 0 }
};
@@ -181,7 +198,7 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[])
}
ret = notmuch_database_dump (notmuch, output_file_name, query_str,
- output_format);
+ output_format, gzip_output);
notmuch_database_destroy (notmuch);
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 0004438..d79aca8 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -68,6 +68,18 @@ test_begin_subtest "dump --output=outfile --"
notmuch dump --output=dump-1-arg-dash.actual --
test_expect_equal_file dump.expected dump-1-arg-dash.actual
+# gzipped output
+
+test_begin_subtest "dump --gzip"
+notmuch dump --gzip > dump-gzip.gz
+gunzip dump-gzip.gz
+test_expect_equal_file dump.expected dump-gzip
+
+test_begin_subtest "dump --gzip --output=outfile"
+notmuch dump --gzip --output=dump-gzip-outfile.gz
+gunzip dump-gzip-outfile.gz
+test_expect_equal_file dump.expected dump-gzip-outfile
+
# Note, we assume all messages from cworth have a message-id
# containing cworth.org
--
1.9.0
More information about the notmuch
mailing list