[Patch v6 1/6] dump: support gzipped and atomic output

David Bremner david at tethera.net
Thu Apr 3 19:32:44 PDT 2014


David Bremner <david at tethera.net> writes:

> The main goal is to support gzipped output for future internal
> calls (e.g. from notmuch-new) to notmuch_database_dump.

Apologies if this later turns out to be duplicate. I'm not sure what
happened to the cover letter for this series.

This supercedes 

id:1396401381-18128-1-git-send-email-david at tethera.net

I tried to take care of Austin and Tomi's comments, but there were
several messages so hopefully I didn't miss anything.

I ended up merging the "gzipped output" and "atomic output" patches; I
fought it for a bit, but I realized I would have to do some of the
proposed changes twice and throw away the first one.

diff from v5 follows:

diff --git a/INSTALL b/INSTALL
index df318fa..b543c50 100644
--- a/INSTALL
+++ b/INSTALL
@@ -67,6 +67,9 @@ Talloc, and zlib which are each described below:
 	by Xapian, so if you installed that you will already have
 	zlib. You may need to install the zlib headers separately.
 
+	Notmuch needs the transparent write feature of zlib introduced
+	in version 1.2.5.2 (Dec. 2011).
+
 	zlib is available from http://zlib.net
 
 Building Documentation
diff --git a/configure b/configure
index d685ab3..1d624f7 100755
--- a/configure
+++ b/configure
@@ -340,9 +340,9 @@ else
     errors=$((errors + 1))
 fi
 
-printf "Checking for zlib development files... "
+printf "Checking for zlib (>= 1.2.5.2)... "
 have_zlib=0
-if pkg-config --exists zlib; then
+if pkg-config --atleast-version=1.2.5.2 zlib; then
     printf "Yes.\n"
     have_zlib=1
     zlib_cflags=$(pkg-config --cflags zlib)
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 05ed6b4..2a7252a 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -140,7 +140,7 @@ notmuch_database_dump (notmuch_database_t *notmuch,
 	tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name);
 	outfd = mkstemp (tempname);
     } else {
-	outfd = fileno (stdout);
+	outfd = dup (STDOUT_FILENO);
     }
 
     if (outfd < 0) {
@@ -153,6 +153,9 @@ notmuch_database_dump (notmuch_database_t *notmuch,
     if (output == NULL) {
 	fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
 		 name_for_error, strerror (errno));
+	if (close (outfd))
+	    fprintf (stderr, "Error closing %s during shutdown: %s\n",
+		 name_for_error, strerror (errno));
 	goto DONE;
     }
 
@@ -165,22 +168,25 @@ notmuch_database_dump (notmuch_database_t *notmuch,
 	goto DONE;
     }
 
-    ret = fdatasync (outfd);
-    if (ret) {
-	perror ("fdatasync");
-	goto DONE;
-    }
-
     if (output_file_name) {
-	ret = gzclose_w (output);
-	if (ret != Z_OK) {
-	    ret = EXIT_FAILURE;
+	ret = fdatasync (outfd);
+	if (ret) {
+	    fprintf (stderr, "Error syncing %s to disk: %s\n",
+		     name_for_error, strerror (errno));
 	    goto DONE;
 	}
+    }
 
+    if (gzclose_w (output) != Z_OK) {
+	ret = EXIT_FAILURE;
+	goto DONE;
+    }
+
+    if (output_file_name) {
 	ret = rename (tempname, output_file_name);
 	if (ret) {
-	    perror ("rename");
+	    fprintf (stderr, "Error renaming %s to %s: %s\n",
+		     tempname, output_file_name, strerror (errno));
 	    goto DONE;
 	}
 
diff --git a/notmuch-new.c b/notmuch-new.c
index e0431c6..d269c7c 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -997,7 +997,7 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
 	     * relatively small. */
 
 	    const char *backup_name =
-		talloc_asprintf (notmuch, "%s/backup-%04d-%02d-%02d-%02d:%02d:%02d.gz",
+		talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz",
 				 dot_notmuch_path,
 				 gm_time->tm_year + 1900,
 				 gm_time->tm_mon + 1,
@@ -1009,12 +1009,12 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
 	    if (add_files_state.verbosity >= VERBOSITY_NORMAL) {
 		printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n");
 		printf ("This process is safe to interrupt.\n");
-		printf ("Backing up tags to %s\n", backup_name);
+		printf ("Backing up tags to %s...\n", backup_name);
 	    }
 
 	    if (notmuch_database_dump (notmuch, backup_name, "",
 				       DUMP_FORMAT_BATCH_TAG, TRUE)) {
-		fprintf (stderr, "Backup failed. Aborting upgrade");
+		fprintf (stderr, "Backup failed. Aborting upgrade.");
 		return EXIT_FAILURE;
 	    }
 
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 86bce20..eb5b7b2 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -132,7 +132,6 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     gzFile input;
     char *line = NULL;
     void *line_ctx = NULL;
-    size_t line_size;
     ssize_t line_len;
 
     int ret = 0;
@@ -166,8 +165,16 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 
     if (input_file_name)
 	input = gzopen (input_file_name, "r");
-    else
-	input = gzdopen (fileno (stdin), "r");
+    else {
+	int infd = dup (STDIN_FILENO);
+	if (infd < 0) {
+	    fprintf (stderr, "Error duping stdin\n");
+	    return EXIT_FAILURE;
+	}
+	input = gzdopen (infd, "r");
+	if (! input)
+	    close (infd);
+    }
 
     if (input == NULL) {
 	fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
@@ -189,7 +196,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     do {
 	util_status_t status;
 
-	status = gz_getline (line_ctx, &line, &line_size, &line_len, input);
+	status = gz_getline (line_ctx, &line, &line_len, input);
 
 	/* empty input file not considered an error */
 	if (status == UTIL_EOF)
@@ -262,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 	if (ret)
 	    break;
 
-    }  while (gz_getline (line_ctx, &line, &line_size, &line_len, input) == UTIL_SUCCESS);
+    }  while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);
 
     if (line_ctx != NULL)
 	talloc_free (line_ctx);
@@ -272,8 +279,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 
     notmuch_database_destroy (notmuch);
 
-    if (input_file_name != NULL)
-	gzclose_r (input);
+    gzclose_r (input);
 
     return ret ? EXIT_FAILURE : EXIT_SUCCESS;
 }
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 50d4d48..efe463e 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -124,6 +124,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
     sort > OUTPUT.$test_count
 test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
 
+test_begin_subtest "format=batch-tag, missing newline"
+printf "+a_tag_without_newline -- id:20091117232137.GA7669 at griffis1.net" > IN
+notmuch restore --accumulate < IN
+notmuch dump id:20091117232137.GA7669 at griffis1.net > OUT
+cat <<EOF > EXPECTED
++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669 at griffis1.net
+EOF
+test_expect_equal_file EXPECTED OUT
+
 test_begin_subtest "format=batch-tag, # round-trip"
 notmuch dump --format=sup | sort > EXPECTED.$test_count
 notmuch dump --format=batch-tag | notmuch restore --format=batch-tag
diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index 7a68ddf..7d5d5aa 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -37,7 +37,7 @@ Your notmuch database has now been upgraded to database format version 2.
 No new mail."
 
 test_begin_subtest "tag backup matches pre-upgrade dump"
-gunzip -c ${MAIL_DIR}/.notmuch/backup-*.gz | sort > backup-dump
+gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump
 test_expect_equal_file pre-upgrade-dump backup-dump
 
 test_begin_subtest "folder: no longer matches in the middle of path"
diff --git a/util/zlib-extra.c b/util/zlib-extra.c
index cb1eba0..922ab52 100644
--- a/util/zlib-extra.c
+++ b/util/zlib-extra.c
@@ -25,34 +25,36 @@
 
 /* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */
 util_status_t
-gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read,
-	    gzFile stream)
+gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream)
 {
-    size_t len = *bufsiz;
     char *buf = *bufptr;
+    unsigned int len;
     size_t offset = 0;
 
-    if (len == 0 || buf == NULL) {
+    if (buf) {
+	len = talloc_array_length (buf);
+    } else {
 	/* same as getdelim from gnulib */
 	len = 120;
-	buf = talloc_size (talloc_ctx, len);
+	buf = talloc_array (talloc_ctx, char, len);
 	if (buf == NULL)
 	    return UTIL_OUT_OF_MEMORY;
     }
 
     while (1) {
 	if (! gzgets (stream, buf + offset, len - offset)) {
+	    /* Null indicates EOF or error */
 	    int zlib_status = 0;
 	    (void) gzerror (stream, &zlib_status);
 	    switch (zlib_status) {
 	    case Z_OK:
-		/* follow getline behaviour */
-		*bytes_read = -1;
-		return UTIL_EOF;
-		break;
+		/* no data read before EOF */
+		if (offset == 0)
+		    return UTIL_EOF;
+		else
+		    goto SUCCESS;
 	    case Z_ERRNO:
 		return UTIL_FILE;
-		break;
 	    default:
 		return UTIL_ERROR;
 	    }
@@ -60,17 +62,16 @@ gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read
 
 	offset += strlen (buf + offset);
 
-	if ( buf[offset - 1] == '\n' )
-	    break;
+	if (buf[offset - 1] == '\n')
+	    goto SUCCESS;
 
 	len *= 2;
 	buf = talloc_realloc (talloc_ctx, buf, char, len);
 	if (buf == NULL)
 	    return UTIL_OUT_OF_MEMORY;
     }
-
+ SUCCESS:
     *bufptr = buf;
-    *bufsiz = len;
     *bytes_read = offset;
     return UTIL_SUCCESS;
 }
diff --git a/util/zlib-extra.h b/util/zlib-extra.h
index ed46ac1..ed67115 100644
--- a/util/zlib-extra.h
+++ b/util/zlib-extra.h
@@ -1,11 +1,11 @@
 #ifndef _ZLIB_EXTRA_H
 #define _ZLIB_EXTRA_H
 
-#include <zlib.h>
 #include "util.h"
+#include <zlib.h>
+
 /* Like getline, but read from a gzFile. Allocation is with talloc */
 util_status_t
-gz_getline (void *ctx, char **lineptr, size_t *line_size, ssize_t *bytes_read,
-	    gzFile stream);
+gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream);
 
 #endif


More information about the notmuch mailing list