[Patch v7 4/6] restore: transparently support gzipped input

David Bremner david at tethera.net
Sat Apr 5 08:43:54 PDT 2014


We rely completely on zlib to do the right thing in detecting gzipped
input. Since our dump format is chosen to be 7 bit ascii, this should
be fine.
---
 doc/man1/notmuch-restore.rst |  8 ++++
 notmuch-restore.c            | 93 +++++++++++++++++++++++++++++++++-----------
 test/T240-dump-restore.sh    | 14 +++++++
 3 files changed, 92 insertions(+), 23 deletions(-)

diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
index d6cf19a..936b138 100644
--- a/doc/man1/notmuch-restore.rst
+++ b/doc/man1/notmuch-restore.rst
@@ -50,6 +50,14 @@ Supported options for **restore** include
             format, this heuristic, based the fact that batch-tag format
             contains no parentheses, should be accurate.
 
+GZIPPED INPUT
+=============
+
+\ **notmuch restore** will detect if the input is compressed in
+**gzip(1)** format and automatically decompress it while reading. This
+detection does not depend on file naming and in particular works for
+standard input.
+
 SEE ALSO
 ========
 
diff --git a/notmuch-restore.c b/notmuch-restore.c
index c54d513..7abee0a 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -22,6 +22,7 @@
 #include "hex-escape.h"
 #include "tag-util.h"
 #include "string-util.h"
+#include "zlib-extra.h"
 
 static regex_t regex;
 
@@ -128,10 +129,10 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     tag_op_list_t *tag_ops;
 
     char *input_file_name = NULL;
-    FILE *input = stdin;
+    const char *name_for_error = NULL;
+    gzFile input = NULL;
     char *line = NULL;
     void *line_ctx = NULL;
-    size_t line_size;
     ssize_t line_len;
 
     int ret = 0;
@@ -157,39 +158,69 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
     };
 
     opt_index = parse_arguments (argc, argv, options, 1);
-    if (opt_index < 0)
-	return EXIT_FAILURE;
+    if (opt_index < 0) {
+	ret = EXIT_FAILURE;
+	goto DONE;
+    }
+
+    name_for_error = input_file_name ? input_file_name : "stdin";
 
     if (! accumulate)
 	flags |= TAG_FLAG_REMOVE_ALL;
 
-    if (input_file_name) {
-	input = fopen (input_file_name, "r");
-	if (input == NULL) {
-	    fprintf (stderr, "Error opening %s for reading: %s\n",
-		     input_file_name, strerror (errno));
-	    return EXIT_FAILURE;
+    errno = 0;
+    if (input_file_name)
+	input = gzopen (input_file_name, "r");
+    else {
+	int infd = dup (STDIN_FILENO);
+	if (infd < 0) {
+	    fprintf (stderr, "Error duping stdin: %s\n",
+		     strerror (errno));
+	    ret = EXIT_FAILURE;
+	    goto DONE;
 	}
+	input = gzdopen (infd, "r");
+	if (! input)
+	    close (infd);
+    }
+
+    if (input == NULL) {
+	fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
+		 name_for_error, strerror (errno));
+	ret = EXIT_FAILURE;
+	goto DONE;
     }
 
     if (opt_index < argc) {
 	fprintf (stderr, "Unused positional parameter: %s\n", argv[opt_index]);
-	return EXIT_FAILURE;
+	ret = EXIT_FAILURE;
+	goto DONE;
     }
 
     tag_ops = tag_op_list_create (config);
     if (tag_ops == NULL) {
 	fprintf (stderr, "Out of memory.\n");
-	return EXIT_FAILURE;
+	ret = EXIT_FAILURE;
+	goto DONE;
     }
 
     do {
-	line_len = getline (&line, &line_size, input);
+	util_status_t status;
+
+	status = gz_getline (line_ctx, &line, &line_len, input);
 
 	/* empty input file not considered an error */
-	if (line_len < 0)
-	    return EXIT_SUCCESS;
+	if (status == UTIL_EOF) {
+	    ret = EXIT_SUCCESS;
+	    goto DONE;
+	}
 
+	if (status) {
+	    fprintf (stderr, "Error reading (gzipped) input: %s\n",
+		     gz_error_string(status, input));
+	    ret = EXIT_FAILURE;
+	    goto DONE;
+	}
     } while ((line_len == 0) ||
 	     (line[0] == '#') ||
 	     /* the cast is safe because we checked about for line_len < 0 */
@@ -254,21 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int argc, char *argv[])
 	if (ret)
 	    break;
 
-    }  while ((line_len = getline (&line, &line_size, input)) != -1);
+    }  while (! (ret = gz_getline (line_ctx, &line, &line_len, input)));
+    
 
-    if (line_ctx != NULL)
-	talloc_free (line_ctx);
+    /* EOF is normal loop termination condition, UTIL_SUCCESS is
+     * impossible here */
+    if (ret == UTIL_EOF) {
+	ret = UTIL_SUCCESS;
+    } else {
+	fprintf (stderr, "Error reading (gzipped) input: %s\n",
+		 gz_error_string (ret, input));
+    }
+
+    /* currently this should not be after DONE: since we don't 
+     * know if the xregcomp was reached
+     */
 
     if (input_format == DUMP_FORMAT_SUP)
 	regfree (&regex);
 
-    if (line)
-	free (line);
+ DONE:
+    if (line_ctx != NULL)
+	talloc_free (line_ctx);
 
-    notmuch_database_destroy (notmuch);
+    if (notmuch)
+	notmuch_database_destroy (notmuch);
 
-    if (input != stdin)
-	fclose (input);
+    if (input && gzclose_r (input)) {
+	fprintf (stderr, "Error closing %s: %s\n",
+		 name_for_error, gzerror (input, NULL));
+	ret = EXIT_FAILURE;
+    }
 
     return ret ? EXIT_FAILURE : EXIT_SUCCESS;
 }
diff --git a/test/T240-dump-restore.sh b/test/T240-dump-restore.sh
index b6d8602..efe463e 100755
--- a/test/T240-dump-restore.sh
+++ b/test/T240-dump-restore.sh
@@ -80,6 +80,20 @@ notmuch dump --gzip --output=dump-gzip-outfile.gz
 gunzip dump-gzip-outfile.gz
 test_expect_equal_file dump.expected dump-gzip-outfile
 
+test_begin_subtest "restoring gzipped stdin"
+notmuch dump --gzip --output=backup.gz
+notmuch tag +new_tag '*'
+notmuch restore < backup.gz
+notmuch dump --output=dump.actual
+test_expect_equal_file dump.expected dump.actual
+
+test_begin_subtest "restoring gzipped file"
+notmuch dump --gzip --output=backup.gz
+notmuch tag +new_tag '*'
+notmuch restore --input=backup.gz
+notmuch dump --output=dump.actual
+test_expect_equal_file dump.expected dump.actual
+
 # Note, we assume all messages from cworth have a message-id
 # containing cworth.org
 
-- 
1.9.0



More information about the notmuch mailing list