V3b of batch tagging/dump/restore patches

david at tethera.net david at tethera.net
Thu Dec 6 17:26:38 PST 2012


Here is a second piece of the tagging/dump/restore series.

it obsoletes 8 of the patches in the series 

   id:1353792017-31459-1-git-send-email-david at tethera.net

 [Patch v3b 1/9] notmuch-dump: add --format=(batch-tag|sup)
 [Patch v3b 3/9] util: add string-util.[ch]
 [Patch v3b 4/9] tag-util.[ch]: New files for common tagging routines
 [Patch v3b 5/9] notmuch-restore: add support for input format
 [Patch v3b 6/9] test: update dump-restore roundtripping test for
 [Patch v3b 7/9] test: second set of dump/restore --format=batch-tag
 [Patch v3b 8/9] notmuch-{dump,restore}.1: document new format
 [Patch v3b 9/9] tag-util: optimization of tag application

It adds one new patch

 [Patch v3b 2/9] test: add sanity check for dump --format=batch-tag.

I still have to work through some of the comments on the batch
tagging; I still intend for that to follow fairly shortly, I just want
to break the series at a logical point.

Most of the changes are detailed in the following log (of changes
before I squashed them)

commit 5045d2f58beb4c3bc8e10f9419341e1c1b7748f2
Author: David Bremner <bremner at debian.org>
Date:   Tue Dec 4 13:39:48 2012 -0400

    fixup for tag-util error messages

diff --git a/tag-util.c b/tag-util.c
index 2bb8355..ea05ee5 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -86,9 +86,8 @@ parse_tag_line (void *ctx, char *line,
 
     /* tok now points to the query string */
     if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-	/* FIXME: line has been modified! */
-	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
-		 line);
+	fprintf (stderr, "Hex decoding of %s failed\n",
+		 tok);
 	return 1;
     }
 

commit 5a1d697dc408c67424d586b6377976fdfb86d4ed
Author: David Bremner <bremner at debian.org>
Date:   Tue Dec 4 13:40:09 2012 -0400

    fixup for restore error messages

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 22fcd2d..e7584bb 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -77,7 +77,7 @@ parse_sup_line (void *ctx, char *line,
 
     rerr = xregexec (&regex, line, 3, match, 0);
     if (rerr == REG_NOMATCH) {
-	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
+	fprintf (stderr, "Warning: Ignoring invalid sup format line: %s\n",
 		 line);
 	return 1;
     }

commit 7136d3b4e974a4ba8e247f328c71362efd0c9e11
Author: David Bremner <bremner at debian.org>
Date:   Tue Dec 4 22:35:30 2012 -0400

    fixup for tag-util error handling and permit the null set of tag operations

diff --git a/tag-util.c b/tag-util.c
index ea05ee5..de7ecc8 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -21,6 +21,8 @@ parse_tag_line (void *ctx, char *line,
 {
     char *tok = line;
     size_t tok_len = 0;
+    char *line_for_error=talloc_strdup (ctx, line);
+    int ret=0;
 
     chomp_newline (line);
 
@@ -29,8 +31,10 @@ parse_tag_line (void *ctx, char *line,
 	tok++;
 
     /* Skip empty and comment lines. */
-    if (*tok == '\0' || *tok == '#')
-	    return 1;
+    if (*tok == '\0' || *tok == '#') {
+	ret=1;
+	goto DONE;
+    }
 
     tag_op_list_reset (tag_ops);
 
@@ -51,8 +55,9 @@ parse_tag_line (void *ctx, char *line,
 
 	/* If tag is terminated by NUL, there's no query string. */
 	if (*(tok + tok_len) == '\0') {
-	    tok = NULL;
-	    break;
+	    fprintf (stderr, "no query string: %s\n", line_for_error);
+	    ret = 1;
+	    goto DONE;
 	}
 
 	/* Terminate, and start next token after terminator. */
@@ -63,37 +68,43 @@ parse_tag_line (void *ctx, char *line,
 
 	/* Maybe refuse empty tags. */
 	if (!(flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
-	    tok = NULL;
-	    break;
+	    fprintf (stderr, "Error: empty tag: %s\n", line_for_error);
+	    goto DONE;
 	}
 
 	/* Decode tag. */
 	if (hex_decode_inplace (tag) != HEX_SUCCESS) {
-	    tok = NULL;
-	    break;
+	    fprintf (stderr, "Hex decoding of tag %s failed\n",
+		 tag);
+	    ret = 1;
+	    goto DONE;
 	}
 
-	if (tag_op_list_append (ctx, tag_ops, tag, remove))
-	    return -1;
+	if (tag_op_list_append (ctx, tag_ops, tag, remove)) {
+	    ret = -1;
+	    goto DONE;
+	}
     }
 
-    if (tok == NULL || tag_ops->count == 0) {
-	/* FIXME: line has been modified! */
+    if (tok == NULL) {
 	fprintf (stderr, "Warning: Ignoring invalid input line: %s\n",
-		 line);
-	return 1;
+		 line_for_error);
+	ret = 1;
+	goto DONE;
     }
 
     /* tok now points to the query string */
     if (hex_decode_inplace (tok) != HEX_SUCCESS) {
-	fprintf (stderr, "Hex decoding of %s failed\n",
+	fprintf (stderr, "Hex decoding of query %s failed\n",
 		 tok);
-	return 1;
+	ret = 1;
+	goto DONE;
     }
 
     *query_string = tok;
-
-    return 0;
+ DONE:
+    talloc_free (line_for_error);
+    return ret;
 }
 
 static inline void

commit 5912c738d3683aae24ca5529839eb0513520d190
Author: David Bremner <bremner at debian.org>
Date:   Thu Dec 6 07:49:15 2012 -0400

    fixup for id:87wqx1qrmq.fsf at nikula.org; use size_t for tag_op_list count

diff --git a/tag-util.c b/tag-util.c
index de7ecc8..1a0cf53 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -1,6 +1,7 @@
 #include "string-util.h"
 #include "tag-util.h"
 #include "hex-escape.h"
+#include <assert.h>
 
 struct _tag_operation_t {
     const char *tag;
@@ -9,8 +10,8 @@ struct _tag_operation_t {
 
 struct _tag_op_list_t {
     tag_operation_t *ops;
-    int count;
-    int size;
+    size_t count;
+    size_t size;
 };
 
 int
@@ -44,7 +45,7 @@ parse_tag_line (void *ctx, char *line,
 	char *tag;
 
 	/* Optional explicit end of tags marker. */
-	if (strncmp (tok, "--", tok_len) == 0) {
+	if (tok_len == 2 && strncmp (tok, "--", tok_len) == 0) {
 	    tok = strtok_len (tok + tok_len, " ", &tok_len);
 	    break;
 	}
@@ -126,17 +127,16 @@ makes_changes (notmuch_message_t *message,
 	       tag_op_list_t *list,
 	       tag_op_flag_t flags)
 {
-
-    int i;
-
     notmuch_tags_t *tags;
     notmuch_bool_t changes = FALSE;
+    size_t i;
 
     /* First, do we delete an existing tag? */
     changes = FALSE;
     for (tags = notmuch_message_get_tags (message);
 	 ! changes && notmuch_tags_valid (tags);
 	 notmuch_tags_move_to_next (tags)) {
+
 	const char *cur_tag = notmuch_tags_get (tags);
 	int last_op =  (flags & TAG_FLAG_REMOVE_ALL) ? -1 : 0;
 
@@ -182,8 +182,7 @@ tag_op_list_apply (notmuch_message_t *message,
 		   tag_op_list_t *list,
 		   tag_op_flag_t flags)
 {
-    int i;
-
+    size_t i;
     notmuch_status_t status = 0;
     tag_operation_t *tag_ops = list->ops;
 
@@ -199,7 +198,7 @@ tag_op_list_apply (notmuch_message_t *message,
     if (flags & TAG_FLAG_REMOVE_ALL) {
 	status = notmuch_message_remove_all_tags (message);
 	if (status) {
-	    message_error (message, status, "removing all tags" );
+	    message_error (message, status, "removing all tags");
 	    return status;
 	}
     }
@@ -241,8 +240,8 @@ tag_op_list_apply (notmuch_message_t *message,
 }
 
 
-/* Array of tagging operations (add or remove), terminated with an
- * empty element. Size will be increased as necessary. */
+/* Array of tagging operations (add or remove.  Size will be increased
+ * as necessary. */
 
 tag_op_list_t *
 tag_op_list_create (void *ctx)
@@ -299,6 +298,7 @@ tag_op_list_append (void *ctx,
 notmuch_bool_t
 tag_op_list_isremove (const tag_op_list_t *list, size_t i)
 {
+    assert (i < list->count);
     return list->ops[i].remove;
 }
 
@@ -329,5 +329,6 @@ tag_op_list_size (const tag_op_list_t *list)
 const char *
 tag_op_list_tag (const tag_op_list_t *list, size_t i)
 {
+    assert (i < list->count);
     return list->ops[i].tag;
 }

commit e1af69d57854d5c6e927b0870be97e9d2e2f28ea
Author: David Bremner <bremner at debian.org>
Date:   Thu Dec 6 07:51:43 2012 -0400

    changes for id:87wqx1qrmq.fsf at nikula.org. tag_op_list_t.count -> size_t

diff --git a/tag-util.c b/tag-util.c
index 1a0cf53..ad13147 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -22,8 +22,8 @@ parse_tag_line (void *ctx, char *line,
 {
     char *tok = line;
     size_t tok_len = 0;
-    char *line_for_error=talloc_strdup (ctx, line);
-    int ret=0;
+    char *line_for_error = talloc_strdup (ctx, line);
+    int ret = 0;
 
     chomp_newline (line);
 
@@ -33,7 +33,7 @@ parse_tag_line (void *ctx, char *line,
 
     /* Skip empty and comment lines. */
     if (*tok == '\0' || *tok == '#') {
-	ret=1;
+	ret = 1;
 	goto DONE;
     }
 
@@ -68,7 +68,7 @@ parse_tag_line (void *ctx, char *line,
 	tag = tok + 1;
 
 	/* Maybe refuse empty tags. */
-	if (!(flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
+	if (! (flags & TAG_FLAG_BE_GENEROUS) && *tag == '\0') {
 	    fprintf (stderr, "Error: empty tag: %s\n", line_for_error);
 	    goto DONE;
 	}
@@ -76,7 +76,7 @@ parse_tag_line (void *ctx, char *line,
 	/* Decode tag. */
 	if (hex_decode_inplace (tag) != HEX_SUCCESS) {
 	    fprintf (stderr, "Hex decoding of tag %s failed\n",
-		 tag);
+		     tag);
 	    ret = 1;
 	    goto DONE;
 	}
@@ -103,7 +103,7 @@ parse_tag_line (void *ctx, char *line,
     }
 
     *query_string = tok;
- DONE:
+  DONE:
     talloc_free (line_for_error);
     return ret;
 }

commit 3f00fa4eba68876635df86ea54f60b68d172f580
Author: David Bremner <bremner at debian.org>
Date:   Thu Dec 6 08:30:58 2012 -0400

    changes for id:87zk1wd1ko.fsf at nikula.org

diff --git a/notmuch-restore.c b/notmuch-restore.c
index e7584bb..41b742f 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -48,11 +48,10 @@ tag_message (unused (void *ctx),
 
     /* In order to detect missing messages, this check/optimization is
      * intentionally done *after* first finding the message. */
-    if ( (flags & TAG_FLAG_REMOVE_ALL) || (tag_op_list_size (tag_ops)))
+    if ((flags & TAG_FLAG_REMOVE_ALL) || tag_op_list_size (tag_ops))
 	tag_op_list_apply (message, tag_ops, flags);
 
-    if (message)
-	notmuch_message_destroy (message);
+    notmuch_message_destroy (message);
 
     return ret;
 }
@@ -184,6 +183,12 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
     if (line_len == 0)
 	return 0;
 
+    tag_ops = tag_op_list_create (ctx);
+    if (tag_ops == NULL) {
+	fprintf (stderr, "Out of memory.\n");
+	return 1;
+    }
+
     for (p = line; *p; p++) {
 	if (*p == '(')
 	    input_format = DUMP_FORMAT_SUP;
@@ -198,28 +203,28 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 		       REG_EXTENDED) )
 	    INTERNAL_ERROR ("compile time constant regex failed.");
 
-    tag_ops = tag_op_list_create (ctx);
-    if (tag_ops == NULL) {
-	fprintf (stderr, "Out of memory.\n");
-	return 1;
-    }
-
     do {
 	char *query_string;
 
 	if (input_format == DUMP_FORMAT_SUP) {
-	    ret =  parse_sup_line (ctx, line, &query_string, tag_ops);
+	    ret = parse_sup_line (ctx, line, &query_string, tag_ops);
 	} else {
-	    ret =  parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
-				   &query_string, tag_ops);
+	    ret = parse_tag_line (ctx, line, TAG_FLAG_BE_GENEROUS,
+				  &query_string, tag_ops);
 
 	    if (ret == 0) {
-		if ( strncmp ("id:", query_string, 3) != 0) {
+		if (strncmp ("id:", query_string, 3) != 0) {
 		    fprintf (stderr, "Unsupported query: %s\n", query_string);
 		    continue;
 		}
-		/* delete id: from front of string; tag_message expects a
-		 * raw message-id */
+		/* delete id: from front of string; tag_message
+		 * expects a raw message-id.
+		 *
+		 * XXX: Note that query string id:foo and bar will be
+		 * interpreted as a message id "foo and bar". This
+		 * should eventually be fixed to give a better error
+		 * message.
+		 */
 		query_string = query_string + 3;
 	    }
 	}
@@ -233,8 +238,8 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 
     }  while ((line_len = getline (&line, &line_size, input)) != -1);
 
-
-    regfree (&regex);
+    if (input_format == DUMP_FORMAT_SUP)
+	regfree (&regex);
 
     if (line)
 	free (line);

commit 473fa928931080004706cff169c7cc9337601172
Author: David Bremner <bremner at debian.org>
Date:   Thu Dec 6 13:33:42 2012 -0400

    fixup: notmuch-restore only auto-detect in auto mode

diff --git a/notmuch-restore.c b/notmuch-restore.c
index 41b742f..ceec2d3 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -189,7 +189,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
 	return 1;
     }
 
-    for (p = line; *p; p++) {
+    for (p = line; (input_format == DUMP_FORMAT_AUTO) && *p; p++) {
 	if (*p == '(')
 	    input_format = DUMP_FORMAT_SUP;
     }

commit ee7d25521e3f6f70b3f4fb79f586a11d39efec15
Author: David Bremner <bremner at debian.org>
Date:   Thu Dec 6 19:39:37 2012 -0400

    changes for id:87wqx0d124.fsf at nikula.org; no deprecation for the moment

diff --git a/man/man1/notmuch-dump.1 b/man/man1/notmuch-dump.1
index 9f59905..770b00f 100644
--- a/man/man1/notmuch-dump.1
+++ b/man/man1/notmuch-dump.1
@@ -64,15 +64,16 @@ and tags containing whitespace or non-\fBascii\fR(7) characters.
 Each line has the form
 
 .RS 4
-.RI "+<" "encoded-tag" "> " "" "+<" "encoded-tag" "> ... -- " "" " <" encoded-message-id >
+.RI "+<" "encoded-tag" "> " "" "+<" "encoded-tag" "> ... -- " "" " id:<" encoded-message-id >
 
 where encoded means that every byte not matching the regex
-.B [A-Za-z0-9+-_@=.:,]
+.B [A-Za-z0-9@=.,_+-]
 is replace by
 .B %nn
 where nn is the two digit hex encoding.
 The astute reader will notice this is a special case of the batch input
-format for \fBnotmuch-tag\fR(1).
+format for \fBnotmuch-tag\fR(1); note that the single message-id query is
+mandatory for \fBnotmuch-restore\fR(1).
 
 .RE
 
diff --git a/man/man1/notmuch-restore.1 b/man/man1/notmuch-restore.1
index 3860829..6bba628 100644
--- a/man/man1/notmuch-restore.1
+++ b/man/man1/notmuch-restore.1
@@ -32,8 +32,8 @@ replacing each message's tags as they are read in from the dump file.
 .TP 4
 .B \-\-format=(sup|batch-tag|auto)
 
-Notmuch restore supports two plain text dump formats, with one message-id
-per line, and a list of tags.
+Notmuch restore supports two plain text dump formats, with each line
+specifying a message-id and a set of tags.
 For details of the actual formats, see \fBnotmuch-dump\fR(1).
 
 .RS 4

commit 96c383be46cdb8ceaf7ed15590ef876799d6357e
Author: David Bremner <bremner at debian.org>
Date:   Thu Dec 6 20:40:55 2012 -0400

    Changes for id:87txs4cy7v.fsf at nikula.org

diff --git a/tag-util.c b/tag-util.c
index ad13147..9ab07e9 100644
--- a/tag-util.c
+++ b/tag-util.c
@@ -140,9 +140,11 @@ makes_changes (notmuch_message_t *message,
 	const char *cur_tag = notmuch_tags_get (tags);
 	int last_op =  (flags & TAG_FLAG_REMOVE_ALL) ? -1 : 0;
 
-	for (i = 0; i < list->count; i++) {
+	/* slight contortions to count down with an unsigned index */
+	for (i = list->count; i-- > 0; /*nothing*/) {
 	    if (strcmp (cur_tag, list->ops[i].tag) == 0) {
 		last_op = list->ops[i].remove ? -1 : 1;
+		break;
 	    }
 	}
 
@@ -157,6 +159,9 @@ makes_changes (notmuch_message_t *message,
     for (i = 0; i < list->count; i++) {
 	notmuch_bool_t exists = FALSE;
 
+	if (list->ops[i].remove)
+	    continue;
+
 	for (tags = notmuch_message_get_tags (message);
 	     notmuch_tags_valid (tags);
 	     notmuch_tags_move_to_next (tags)) {
@@ -168,9 +173,11 @@ makes_changes (notmuch_message_t *message,
 	}
 	notmuch_tags_destroy (tags);
 
-	/* the following test is conservative, it's ok to think we
-	 * make changes when we don't */
-	if ( ! exists && ! list->ops[i].remove )
+	/* the following test is conservative,
+	 * in the sense it ignores cases like +foo ... -foo
+	 * but this is OK from a correctness point of view
+	 */
+	if (! exists)
 	    return TRUE;
     }
     return FALSE;


More information about the notmuch mailing list