[Patch v6 1/6] dump: support gzipped and atomic output
David Bremner
david at tethera.net
Thu Apr 3 19:32:44 PDT 2014
David Bremner <david at tethera.net> writes:
> The main goal is to support gzipped output for future internal
> calls (e.g. from notmuch-new) to notmuch_database_dump.
Apologies if this later turns out to be duplicate. I'm not sure what
happened to the cover letter for this series.
This supercedes
id:1396401381-18128-1-git-send-email-david at tethera.net
I tried to take care of Austin and Tomi's comments, but there were
several messages so hopefully I didn't miss anything.
I ended up merging the "gzipped output" and "atomic output" patches; I
fought it for a bit, but I realized I would have to do some of the
proposed changes twice and throw away the first one.
diff from v5 follows:
diff --git a/INSTALL b/INSTALL
index df318fa..b543c50 100644
--- a/INSTALL
+++ b/INSTALL
@@ -67,6 +67,9 @@ Talloc, and zlib which are each described below:
by Xapian, so if you installed that you will already have
zlib. You may need to install the zlib headers separately.
+ Notmuch needs the transparent write feature of zlib introduced
+ in version 1.2.5.2 (Dec. 2011).
+
zlib is available from http://zlib.net
Building Documentation
diff --git a/configure b/configure
index d685ab3..1d624f7 100755
--- a/configure
+++ b/configure
@@ -340,9 +340,9 @@ else
errors=$((errors + 1))
fi
-printf "Checking for zlib development files... "
+printf "Checking for zlib (>= 1.2.5.2)... "
have_zlib=0
-if pkg-config --exists zlib; then
+if pkg-config --atleast-version=1.2.5.2 zlib; then
printf "Yes.\n"
have_zlib=1
zlib_cflags=$(pkg-config --cflags zlib)
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 05ed6b4..2a7252a 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -140,7 +140,7 @@ notmuch_database_dump (notmuch_database_t *notmuch,
tempname = talloc_asprintf (notmuch, "%s.XXXXXX", output_file_name);
outfd = mkstemp (tempname);
} else {
- outfd = fileno (stdout);
+ outfd = dup (STDOUT_FILENO);
}
if (outfd < 0) {
@@ -153,6 +153,9 @@ notmuch_database_dump (notmuch_database_t *notmuch,
if (output == NULL) {
fprintf (stderr, "Error opening %s for (gzip) writing: %s\n",
name_for_error, strerror (errno));
+ if (close (outfd))
+ fprintf (stderr, "Error closing %s during shutdown: %s\n",
+ name_for_error, strerror (errno));
goto DONE;
}
@@ -165,22 +168,25 @@ notmuch_database_dump (notmuch_database_t *notmuch,
goto DONE;
}
- ret = fdatasync (outfd);
- if (ret) {
- perror ("fdatasync");
- goto DONE;
- }
-
if (output_file_name) {
- ret = gzclose_w (output);
- if (ret != Z_OK) {
- ret = EXIT_FAILURE;
+ ret = fdatasync (outfd);
+ if (ret) {
+ fprintf (stderr, "Error syncing %s to disk: %s\n",
+ name_for_error, strerror (errno));
goto DONE;
}
+ }
+ if (gzclose_w (output) != Z_OK) {
+ ret = EXIT_FAILURE;
+ goto DONE;
+ }
+
+ if (output_file_name) {
ret = rename (tempname, output_file_name);
if (ret) {
- perror ("rename");
+ fprintf (stderr, "Error renaming %s to %s: %s\n",
+ tempname, output_file_name, strerror (errno));
goto DONE;
}
diff --git a/notmuch-new.c b/notmuch-new.c
index e0431c6..d269c7c 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -997,7 +997,7 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
* relatively small. */
const char *backup_name =
- talloc_asprintf (notmuch, "%s/backup-%04d-%02d-%02d-%02d:%02d:%02d.gz",
+ talloc_asprintf (notmuch, "%s/dump-%04d%02d%02dT%02d%02d%02d.gz",
dot_notmuch_path,
gm_time->tm_year + 1900,
gm_time->tm_mon + 1,
@@ -1009,12 +1009,12 @@ notmuch_new_command (notmuch_config_t *config, int argc, char *argv[])
if (add_files_state.verbosity >= VERBOSITY_NORMAL) {
printf ("Welcome to a new version of notmuch! Your database will now be upgraded.\n");
printf ("This process is safe to interrupt.\n");
- printf ("Backing up tags to %s\n", backup_name);
+ printf ("Backing up tags to %s...\n", backup_name);
}
if (notmuch_database_dump (notmuch, backup_name, "",
DUMP_FORMAT_BATCH_TAG, TRUE)) {
- fprintf (stderr, "Backup failed. Aborting upgrade");
+ fprintf (stderr, "Backup failed. Aborting upgrade.");
return EXIT_FAILURE;
}
diff --git a/notmuch-restore.c b/notmuch-restore.c
index 86bce20..eb5b7b2 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -132,7 +132,6 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
gzFile input;
char *line = NULL;
void *line_ctx = NULL;
- size_t line_size;
ssize_t line_len;
int ret = 0;
@@ -166,8 +165,16 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
if (input_file_name)
input = gzopen (input_file_name, "r");
- else
- input = gzdopen (fileno (stdin), "r");
+ else {
+ int infd = dup (STDIN_FILENO);
+ if (infd < 0) {
+ fprintf (stderr, "Error duping stdin\n");
+ return EXIT_FAILURE;
+ }
+ input = gzdopen (infd, "r");
+ if (! input)
+ close (infd);
+ }
if (input == NULL) {
fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
@@ -189,7 +196,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
do {
util_status_t status;
- status = gz_getline (line_ctx, &line, &line_size, &line_len, input);
+ status = gz_getline (line_ctx, &line, &line_len, input);
/* empty input file not considered an error */
if (status == UTIL_EOF)
@@ -262,7 +269,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
if (ret)
break;
- } while (gz_getline (line_ctx, &line, &line_size, &line_len, input) == UTIL_SUCCESS);
+ } while (gz_getline (line_ctx, &line, &line_len, input) == UTIL_SUCCESS);
if (line_ctx != NULL)
talloc_free (line_ctx);
@@ -272,8 +279,7 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
notmuch_database_destroy (notmuch);
- if (input_file_name != NULL)
- gzclose_r (input);
+ gzclose_r (input);
return ret ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index 50d4d48..efe463e 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -124,6 +124,15 @@ notmuch dump --format=batch-tag from:cworth | sed 's/^.*-- id://' | \
sort > OUTPUT.$test_count
test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count
+test_begin_subtest "format=batch-tag, missing newline"
+printf "+a_tag_without_newline -- id:20091117232137.GA7669 at griffis1.net" > IN
+notmuch restore --accumulate < IN
+notmuch dump id:20091117232137.GA7669 at griffis1.net > OUT
+cat <<EOF > EXPECTED
++a_tag_without_newline +inbox +unread -- id:20091117232137.GA7669 at griffis1.net
+EOF
+test_expect_equal_file EXPECTED OUT
+
test_begin_subtest "format=batch-tag, # round-trip"
notmuch dump --format=sup | sort > EXPECTED.$test_count
notmuch dump --format=batch-tag | notmuch restore --format=batch-tag
diff --git a/test/T530-upgrade.sh b/test/T530-upgrade.sh
index 7a68ddf..7d5d5aa 100755
--- a/test/T530-upgrade.sh
+++ b/test/T530-upgrade.sh
@@ -37,7 +37,7 @@ Your notmuch database has now been upgraded to database format version 2.
No new mail."
test_begin_subtest "tag backup matches pre-upgrade dump"
-gunzip -c ${MAIL_DIR}/.notmuch/backup-*.gz | sort > backup-dump
+gunzip -c ${MAIL_DIR}/.notmuch/dump-*.gz | sort > backup-dump
test_expect_equal_file pre-upgrade-dump backup-dump
test_begin_subtest "folder: no longer matches in the middle of path"
diff --git a/util/zlib-extra.c b/util/zlib-extra.c
index cb1eba0..922ab52 100644
--- a/util/zlib-extra.c
+++ b/util/zlib-extra.c
@@ -25,34 +25,36 @@
/* mimic POSIX/glibc getline, but on a zlib gzFile stream, and using talloc */
util_status_t
-gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read,
- gzFile stream)
+gz_getline (void *talloc_ctx, char **bufptr, ssize_t *bytes_read, gzFile stream)
{
- size_t len = *bufsiz;
char *buf = *bufptr;
+ unsigned int len;
size_t offset = 0;
- if (len == 0 || buf == NULL) {
+ if (buf) {
+ len = talloc_array_length (buf);
+ } else {
/* same as getdelim from gnulib */
len = 120;
- buf = talloc_size (talloc_ctx, len);
+ buf = talloc_array (talloc_ctx, char, len);
if (buf == NULL)
return UTIL_OUT_OF_MEMORY;
}
while (1) {
if (! gzgets (stream, buf + offset, len - offset)) {
+ /* Null indicates EOF or error */
int zlib_status = 0;
(void) gzerror (stream, &zlib_status);
switch (zlib_status) {
case Z_OK:
- /* follow getline behaviour */
- *bytes_read = -1;
- return UTIL_EOF;
- break;
+ /* no data read before EOF */
+ if (offset == 0)
+ return UTIL_EOF;
+ else
+ goto SUCCESS;
case Z_ERRNO:
return UTIL_FILE;
- break;
default:
return UTIL_ERROR;
}
@@ -60,17 +62,16 @@ gz_getline (void *talloc_ctx, char **bufptr, size_t *bufsiz, ssize_t *bytes_read
offset += strlen (buf + offset);
- if ( buf[offset - 1] == '\n' )
- break;
+ if (buf[offset - 1] == '\n')
+ goto SUCCESS;
len *= 2;
buf = talloc_realloc (talloc_ctx, buf, char, len);
if (buf == NULL)
return UTIL_OUT_OF_MEMORY;
}
-
+ SUCCESS:
*bufptr = buf;
- *bufsiz = len;
*bytes_read = offset;
return UTIL_SUCCESS;
}
diff --git a/util/zlib-extra.h b/util/zlib-extra.h
index ed46ac1..ed67115 100644
--- a/util/zlib-extra.h
+++ b/util/zlib-extra.h
@@ -1,11 +1,11 @@
#ifndef _ZLIB_EXTRA_H
#define _ZLIB_EXTRA_H
-#include <zlib.h>
#include "util.h"
+#include <zlib.h>
+
/* Like getline, but read from a gzFile. Allocation is with talloc */
util_status_t
-gz_getline (void *ctx, char **lineptr, size_t *line_size, ssize_t *bytes_read,
- gzFile stream);
+gz_getline (void *ctx, char **lineptr, ssize_t *bytes_read, gzFile stream);
#endif
More information about the notmuch
mailing list