[PATCH v5 0/6] Use Xapian query syntax for batch-tag dump/restore
Mark Walters
markwalters1009 at gmail.com
Sun Jan 6 14:05:58 PST 2013
LGTM too +1
Mark
On Sun, 06 Jan 2013, Jani Nikula <jani at nikula.org> wrote:
> On Sun, 06 Jan 2013, Austin Clements <amdragon at MIT.EDU> wrote:
>> This obsoletes
>>
>> id:1356936162-2589-1-git-send-email-amdragon at mit.edu
>>
>> v5 should address all of the comments on v4 except those I
>> specifically replied to (via the ML or IRC). It also adds a new patch
>> at the beginning that makes missing message IDs non-fatal in restore,
>> like they were in 0.14. This patch can be pushed separately; it's in
>> this series because later tests rely on it.
>
> The series LGMT,
> Jani.
>
>>
>> The diff from v4 follows.
>>
>> diff --git a/notmuch-dump.c b/notmuch-dump.c
>> index bf01a39..a3244e0 100644
>> --- a/notmuch-dump.c
>> +++ b/notmuch-dump.c
>> @@ -103,6 +103,18 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>> message = notmuch_messages_get (messages);
>> message_id = notmuch_message_get_message_id (message);
>>
>> + if (output_format == DUMP_FORMAT_BATCH_TAG &&
>> + strchr (message_id, '\n')) {
>> + /* This will produce a line break in the output, which
>> + * would be difficult to handle in tools. However, it's
>> + * also impossible to produce an email containing a line
>> + * break in a message ID because of unfolding, so we can
>> + * safely disallow it. */
>> + fprintf (stderr, "Warning: skipping message id containing line break: \"%s\"\n", message_id);
>> + notmuch_message_destroy (message);
>> + continue;
>> + }
>> +
>> if (output_format == DUMP_FORMAT_SUP) {
>> fprintf (output, "%s (", message_id);
>> }
>> @@ -133,19 +145,10 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[])
>> if (output_format == DUMP_FORMAT_SUP) {
>> fputs (")\n", output);
>> } else {
>> - if (strchr (message_id, '\n')) {
>> - /* This will produce a line break in the output, which
>> - * would be difficult to handle in tools. However,
>> - * it's also impossible to produce an email containing
>> - * a line break in a message ID because of unfolding,
>> - * so we can safely disallow it. */
>> - fprintf (stderr, "Error: cannot dump message id containing line break: %s\n", message_id);
>> - return 1;
>> - }
>> if (make_boolean_term (notmuch, "id", message_id,
>> &buffer, &buffer_size)) {
>> - fprintf (stderr, "Error: failed to quote message id %s\n",
>> - message_id);
>> + fprintf (stderr, "Error quoting message id %s: %s\n",
>> + message_id, strerror (errno));
>> return 1;
>> }
>> fprintf (output, " -- %s\n", buffer);
>> diff --git a/notmuch-restore.c b/notmuch-restore.c
>> index 77a4c27..81d4d98 100644
>> --- a/notmuch-restore.c
>> +++ b/notmuch-restore.c
>> @@ -26,7 +26,8 @@
>> static regex_t regex;
>>
>> /* Non-zero return indicates an error in retrieving the message,
>> - * or in applying the tags.
>> + * or in applying the tags. Missing messages are reported, but not
>> + * considered errors.
>> */
>> static int
>> tag_message (unused (void *ctx),
>> @@ -40,13 +41,17 @@ tag_message (unused (void *ctx),
>> int ret = 0;
>>
>> status = notmuch_database_find_message (notmuch, message_id, &message);
>> - if (status || message == NULL) {
>> - fprintf (stderr, "Warning: cannot apply tags to %smessage: %s\n",
>> - message ? "" : "missing ", message_id);
>> - if (status)
>> - fprintf (stderr, "%s\n", notmuch_status_to_string (status));
>> + if (status) {
>> + fprintf (stderr, "Error applying tags to message %s: %s\n",
>> + message_id, notmuch_status_to_string (status));
>> return 1;
>> }
>> + if (message == NULL) {
>> + fprintf (stderr, "Warning: cannot apply tags to missing message: %s\n",
>> + message_id);
>> + /* We consider this a non-fatal error. */
>> + return 0;
>> + }
>>
>> /* In order to detect missing messages, this check/optimization is
>> * intentionally done *after* first finding the message. */
>> @@ -222,12 +227,17 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[])
>> if (ret == 0) {
>> ret = parse_boolean_term (line_ctx, query_string,
>> &prefix, &term);
>> - if (ret) {
>> - fprintf (stderr, "Warning: cannot parse query: %s\n",
>> - query_string);
>> + if (ret && errno == EINVAL) {
>> + fprintf (stderr, "Warning: cannot parse query: %s (skipping)\n", query_string);
>> continue;
>> + } else if (ret) {
>> + /* This is more fatal (e.g., out of memory) */
>> + fprintf (stderr, "Error parsing query: %s\n",
>> + strerror (errno));
>> + ret = 1;
>> + break;
>> } else if (strcmp ("id", prefix) != 0) {
>> - fprintf (stderr, "Warning: not an id query: %s\n", query_string);
>> + fprintf (stderr, "Warning: not an id query: %s (skipping)\n", query_string);
>> continue;
>> }
>> query_string = term;
>> diff --git a/test/dump-restore b/test/dump-restore
>> index f9ae5b3..f076c12 100755
>> --- a/test/dump-restore
>> +++ b/test/dump-restore
>> @@ -202,18 +202,32 @@ a
>> + +e -- id:20091117232137.GA7669 at griffis1.net
>> # valid id, but warning about missing message
>> +e id:missing_message_id
>> +# exercise parser
>> ++e -- id:some)stuff
>> ++e -- id:some stuff
>> ++e -- id:some"stuff
>> ++e -- id:"a_message_id_with""_a_quote"
>> ++e -- id:"a message id with spaces"
>> ++e -- id:an_id_with_leading_and_trailing_ws \
>> +
>> EOF
>>
>> cat <<EOF > EXPECTED
>> -Warning: cannot parse query: a
>> +Warning: cannot parse query: a (skipping)
>> Warning: no query string [+0]
>> Warning: no query string [+a +b]
>> Warning: missing query string [+a +b ]
>> Warning: no query string after -- [+c +d --]
>> Warning: hex decoding of tag %zz failed [+%zz -- id:whatever]
>> -Warning: cannot parse query: id:"
>> -Warning: not an id query: tag:abc
>> +Warning: cannot parse query: id:" (skipping)
>> +Warning: not an id query: tag:abc (skipping)
>> Warning: cannot apply tags to missing message: missing_message_id
>> +Warning: cannot parse query: id:some)stuff (skipping)
>> +Warning: cannot parse query: id:some stuff (skipping)
>> +Warning: cannot apply tags to missing message: some"stuff
>> +Warning: cannot apply tags to missing message: a_message_id_with"_a_quote
>> +Warning: cannot apply tags to missing message: a message id with spaces
>> +Warning: cannot apply tags to missing message: an_id_with_leading_and_trailing_ws
>> EOF
>>
>> test_expect_equal_file EXPECTED OUTPUT
>> diff --git a/util/string-util.c b/util/string-util.c
>> index 52c7781..aba9aa8 100644
>> --- a/util/string-util.c
>> +++ b/util/string-util.c
>> @@ -23,6 +23,7 @@
>> #include "talloc.h"
>>
>> #include <ctype.h>
>> +#include <errno.h>
>>
>> char *
>> strtok_len (char *s, const char *delim, size_t *len)
>> @@ -36,6 +37,12 @@ strtok_len (char *s, const char *delim, size_t *len)
>> return *len ? s : NULL;
>> }
>>
>> +static int
>> +is_unquoted_terminator (unsigned char c)
>> +{
>> + return c == 0 || c <= ' ' || c == ')';
>> +}
>> +
>> int
>> make_boolean_term (void *ctx, const char *prefix, const char *term,
>> char **buf, size_t *len)
>> @@ -49,7 +56,8 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>> * containing a quote, even though it only matters at the
>> * beginning, and anything containing non-ASCII text. */
>> for (in = term; *in && !need_quoting; in++)
>> - if (*in <= ' ' || *in == ')' || *in == '"' || (unsigned char)*in > 127)
>> + if (is_unquoted_terminator (*in) || *in == '"'
>> + || (unsigned char)*in > 127)
>> need_quoting = 1;
>>
>> if (need_quoting)
>> @@ -67,8 +75,10 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>> *buf = talloc_realloc (ctx, *buf, char, *len);
>> }
>>
>> - if (! *buf)
>> - return 1;
>> + if (! *buf) {
>> + errno = ENOMEM;
>> + return -1;
>> + }
>>
>> out = *buf;
>>
>> @@ -102,7 +112,7 @@ make_boolean_term (void *ctx, const char *prefix, const char *term,
>> static const char*
>> skip_space (const char *str)
>> {
>> - while (*str && isspace (*str))
>> + while (*str && isspace ((unsigned char) *str))
>> ++str;
>> return str;
>> }
>> @@ -111,6 +121,7 @@ int
>> parse_boolean_term (void *ctx, const char *str,
>> char **prefix_out, char **term_out)
>> {
>> + int err = EINVAL;
>> *prefix_out = *term_out = NULL;
>>
>> /* Parse prefix */
>> @@ -119,12 +130,20 @@ parse_boolean_term (void *ctx, const char *str,
>> if (! pos)
>> goto FAIL;
>> *prefix_out = talloc_strndup (ctx, str, pos - str);
>> + if (! *prefix_out) {
>> + err = ENOMEM;
>> + goto FAIL;
>> + }
>> ++pos;
>>
>> /* Implement de-quoting compatible with make_boolean_term. */
>> if (*pos == '"') {
>> char *out = talloc_array (ctx, char, strlen (pos));
>> int closed = 0;
>> + if (! out) {
>> + err = ENOMEM;
>> + goto FAIL;
>> + }
>> *term_out = out;
>> /* Skip the opening quote, find the closing quote, and
>> * un-double doubled internal quotes. */
>> @@ -148,18 +167,25 @@ parse_boolean_term (void *ctx, const char *str,
>> } else {
>> const char *start = pos;
>> /* Check for text after the boolean term. */
>> - while (*pos > ' ' && *pos != ')')
>> + while (! is_unquoted_terminator (*pos))
>> ++pos;
>> - if (*skip_space (pos))
>> + if (*skip_space (pos)) {
>> + err = EINVAL;
>> goto FAIL;
>> + }
>> /* No trailing text; dup the string so the caller can free
>> * it. */
>> *term_out = talloc_strndup (ctx, start, pos - start);
>> + if (! *term_out) {
>> + err = ENOMEM;
>> + goto FAIL;
>> + }
>> }
>> return 0;
>>
>> FAIL:
>> talloc_free (*prefix_out);
>> talloc_free (*term_out);
>> - return 1;
>> + errno = err;
>> + return -1;
>> }
>> diff --git a/util/string-util.h b/util/string-util.h
>> index 8b9fe50..0194607 100644
>> --- a/util/string-util.h
>> +++ b/util/string-util.h
>> @@ -28,7 +28,8 @@ char *strtok_len (char *s, const char *delim, size_t *len);
>> * can be parsed by parse_boolean_term.
>> *
>> * Output is into buf; it may be talloc_realloced.
>> - * Return: 0 on success, non-zero on memory allocation failure.
>> + * Return: 0 on success, -1 on error. errno will be set to ENOMEM if
>> + * there is an allocation failure.
>> */
>> int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
>> char **buf, size_t *len);
>> @@ -42,7 +43,8 @@ int make_boolean_term (void *talloc_ctx, const char *prefix, const char *term,
>> * of the quoting styles supported by Xapian (and hence notmuch).
>> * *prefix_out and *term_out will be talloc'd with context ctx.
>> *
>> - * Return: 0 on success, non-zero on parse error.
>> + * Return: 0 on success, -1 on error. errno will be set to EINVAL if
>> + * there is a parse error or ENOMEM if there is an allocation failure.
>> */
>> int
>> parse_boolean_term (void *ctx, const char *str,
More information about the notmuch
mailing list