[PATCH] FIXED: Added better support for multiple structured output formats.
Mark Walters
markwalters1009 at gmail.com
Tue Jul 10 05:45:20 PDT 2012
> notmuch-search.c | 453 +++++++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 326 insertions(+), 127 deletions(-)
Hi for such a large patch this was surprisingly easy to follow (though I
am not saying I have worked through all the details). However, there are
some preliminary comments below which I think are worth fixing before a
full review as they will make it easier for other reviewers. One thing
that would help a lot is some comments.
Best wishes
Mark
> As discussed in <id:20120121220407.GK16740 at mit.edu>, this patch adds
> support for new structured output formats (like s-expressions) by using
> stateful structure_printers. An implementation of the JSON structure
> printer that passes all tests is included.
If I understand it correctly, the output should be identical to before?
If that is the case I think it's worth saying.
> Structure printers have functions for starting a map, a list, closing
> any number of these two, printing a map key, a number value, a string
> value, and boolean values. By threading a state through the
> invocations, arbitrary structured formatting can be achieved.
I think I would also add something saying that the text format is just
different (and that a significant chunk of the patch is just that).
> In a second patch, the structured output code should be isolated in a
> separate file, and also used in all other parts of notmuch.
> ---
>
> diff --git a/notmuch-search.c b/notmuch-search.c
> index 3be296d..3413b79 100644
> --- a/notmuch-search.c
> +++ b/notmuch-search.c
> @@ -28,6 +28,181 @@ typedef enum {
> OUTPUT_TAGS
> } output_t;
>
> +typedef void * structure_printer_state_t;
> +
> +typedef struct structure_printer {
> + int (*map)(void *state);
> + int (*list)(void *state);
> + void (*pop)(void *state, int level);
> + void (*map_key)(void *state, const char *key);
> + void (*number)(void *state, int val);
> + void (*string)(void *state, const char *val);
> + void (*bool)(void *state, notmuch_bool_t val);
> + void *(*initial_state)(const struct structure_printer *sp, FILE *output);
> +} structure_printer_t;
I think this needs some comments on what these functions do. number,
string and boolean are relatively clear (but saying "output a number"
etc is worthwhile). But what map and list do is much less clear and
definitely deserves a comment. And it is definitely unclear what they
are meant to return.
I would also say something about the variable state: eg "the variable
`state` can contain any state the structure_printer wishes to maintain".
> +
> +/* JSON structure printer */
> +
> +typedef struct json_list {
> + int type;
> + int first_already_seen;
> + struct json_list *rest;
> +} json_list_t;
> +
> +#define TYPE_JSON_MAP 1
> +#define TYPE_JSON_ARRAY 2
> +
> +typedef struct json_state {
> + FILE *output;
> + json_list_t *stack;
> + int level;
> +} json_state_t;
> +
> +int json_map(void *state);
> +int json_list(void *state);
> +void json_pop(void *state, int level);
> +void json_map_key(void *state, const char *key);
> +void json_number(void *state, int val);
> +void json_string(void *state, const char *val);
> +void json_bool(void *state, notmuch_bool_t val);
> +void *json_initial_state(const struct structure_printer *sp, FILE *output);
> +
> +int json_map(void *st) {
> + json_state_t *state = (json_state_t*)st;
> + FILE *output = state->output;
> + if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> + fputs(",", output);
> + if(state->level == 1)
> + fputs("\n", output);
> + else
> + fputs(" ", output);
> + }
> + if(state->stack != NULL) {
> + state->stack->first_already_seen = TRUE;
> + }
> + fputs("{", output);
> + void *ctx_json_map = talloc_new (0);
> + json_list_t *el = talloc(ctx_json_map, json_list_t);
> + el->type = TYPE_JSON_MAP;
> + el->first_already_seen = FALSE;
> + el->rest = state->stack;
> + state->stack = el;
> + return state->level++;
> +}
> +
> +int json_list(void *st) {
> + json_state_t *state = (json_state_t*)st;
> + FILE *output = state->output;
> + if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> + fputs(",", output);
> + if(state->level == 1)
> + fputs("\n", output);
> + else
> + fputs(" ", output);
> + }
> + if(state->stack != NULL) {
> + state->stack->first_already_seen = TRUE;
> + }
> + fputs("[", output);
> + void *ctx_json_map = talloc_new (0);
> + json_list_t *el = talloc(ctx_json_map, json_list_t);
> + el->type = TYPE_JSON_ARRAY;
> + el->first_already_seen = FALSE;
> + el->rest = state->stack;
> + state->stack = el;
> + return state->level++;
> +}
> +
> +void json_pop(void *st, int level) {
> + int i;
> + json_state_t *state = (json_state_t*)st;
> + FILE *output = state->output;
> + for(i = state->level; i > level; i--) {
> + json_list_t *tos = state->stack;
> + if(tos->type == TYPE_JSON_MAP) {
> + fputs("}", output);
> + }
> + if(tos->type == TYPE_JSON_ARRAY) {
> + fputs("]", output);
> + }
> + state->stack = tos->rest;
> + state->level--;
> + talloc_free(tos);
> + }
> + if(state->level == 0)
> + fputs("\n", output);
> +}
> +
> +void json_map_key(void *st, const char *key) {
> + json_state_t *state = (json_state_t*)st;
> + FILE *output = state->output;
> + if(state->stack != NULL && state->stack->first_already_seen) {
> + fputs(",\n", output);
> + }
> + fputs("\"", output);
> + fputs(key, output);
> + fputs("\": ", output);
> +}
> +
> +void json_number(void *st, int val) {
> + json_state_t *state = (json_state_t*)st;
> + FILE *output = state->output;
> + if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> + fputs(", ", output);
> + }
> + state->stack->first_already_seen = TRUE;
> + fprintf(output, "%i", val);
> +}
> +
> +void json_string(void *st, const char *val) {
> + json_state_t *state = (json_state_t*)st;
> + FILE *output = state->output;
> + void *ctx = talloc_new(0);
> + if(state->stack != NULL && state->stack->type == TYPE_JSON_ARRAY && state->stack->first_already_seen) {
> + fputs(",", output);
> + if(state->level == 1)
> + fputs("\n", output);
> + else
> + fputs(" ", output);
> + }
> +
> + state->stack->first_already_seen = TRUE;
> + fprintf(output, "%s", json_quote_str(ctx, val));
> + talloc_free(ctx);
> +}
> +
> +void json_bool(void *st, notmuch_bool_t val) {
> + json_state_t *state = (json_state_t*)st;
> + FILE *output = state->output;
> + if(val)
> + fputs("true", output);
> + else
> + fputs("false", output);
> +}
> +
> +void *json_initial_state(const struct structure_printer *sp, FILE *output) {
> + (void)sp;
> + json_state_t *st = talloc(0, json_state_t);
> + st->level = 0;
> + st->stack = NULL;
> + st->output = output;
> + return st;
> +}
> +
> +structure_printer_t json_structure_printer = {
> + &json_map,
> + &json_list,
> + &json_pop,
> + &json_map_key,
> + &json_number,
> + &json_string,
> + &json_bool,
> + &json_initial_state
> +};
Since you forward declare all these functions I think this would be more
natural before the full declarations.
> +structure_printer_t *text_structure_printer = NULL;
I would rename this particularly given the next comment: it makes it
seem like there is non-structured text output and structured text
output. Maybe `unstructured_text_printer`?
> +
> +/* legacy, only needed for non-structured text output */
> typedef struct search_format {
> const char *results_start;
> const char *item_start;
> @@ -51,6 +226,7 @@ typedef struct search_format {
> const char *results_null;
> } search_format_t;
>
> +
> static void
> format_item_id_text (const void *ctx,
> const char *item_type,
just a whitespace change so omit.
> @@ -64,6 +240,7 @@ format_thread_text (const void *ctx,
> const int total,
> const char *authors,
> const char *subject);
> +
> static const search_format_t format_text = {
> "",
> "",
just a whitespace change so omit.
(there are also some trailing whitespaces in various of the lines that
should be omitted).
> @@ -78,35 +255,6 @@ static const search_format_t format_text = {
> };
>
> static void
> -format_item_id_json (const void *ctx,
> - const char *item_type,
> - const char *item_id);
> -
> -static void
> -format_thread_json (const void *ctx,
> - const char *thread_id,
> - const time_t date,
> - const int matched,
> - const int total,
> - const char *authors,
> - const char *subject);
> -
> -/* Any changes to the JSON format should be reflected in the file
> - * devel/schemata. */
> -static const search_format_t format_json = {
> - "[",
> - "{",
> - format_item_id_json,
> - format_thread_json,
> - "\"tags\": [",
> - "\"%s\"", ", ",
> - "]", ",\n",
> - "}",
> - "]\n",
> - "]\n",
> -};
> -
> -static void
> format_item_id_text (unused (const void *ctx),
> const char *item_type,
> const char *item_id)
> @@ -153,50 +301,9 @@ format_thread_text (const void *ctx,
> talloc_free (ctx_quote);
> }
>
> -static void
> -format_item_id_json (const void *ctx,
> - unused (const char *item_type),
> - const char *item_id)
> -{
> - void *ctx_quote = talloc_new (ctx);
> -
> - printf ("%s", json_quote_str (ctx_quote, item_id));
> -
> - talloc_free (ctx_quote);
> -
> -}
> -
> -static void
> -format_thread_json (const void *ctx,
> - const char *thread_id,
> - const time_t date,
> - const int matched,
> - const int total,
> - const char *authors,
> - const char *subject)
> -{
> - void *ctx_quote = talloc_new (ctx);
> -
> - printf ("\"thread\": %s,\n"
> - "\"timestamp\": %ld,\n"
> - "\"date_relative\": \"%s\",\n"
> - "\"matched\": %d,\n"
> - "\"total\": %d,\n"
> - "\"authors\": %s,\n"
> - "\"subject\": %s,\n",
> - json_quote_str (ctx_quote, thread_id),
> - date,
> - notmuch_time_relative_date (ctx, date),
> - matched,
> - total,
> - json_quote_str (ctx_quote, authors),
> - json_quote_str (ctx_quote, subject));
> -
> - talloc_free (ctx_quote);
> -}
> -
> static int
> -do_search_threads (const search_format_t *format,
> +do_search_threads (const structure_printer_t *format,
> + void *state,
> notmuch_query_t *query,
> notmuch_sort_t sort,
> output_t output,
> @@ -209,7 +316,8 @@ do_search_threads (const search_format_t *format,
> time_t date;
> int first_thread = 1;
> int i;
> -
> + int outermost_level = 0;
> + int items_level = 0;
> if (offset < 0) {
> offset += notmuch_query_count_threads (query);
> if (offset < 0)
> @@ -220,7 +328,10 @@ do_search_threads (const search_format_t *format,
> if (threads == NULL)
> return 1;
>
> - fputs (format->results_start, stdout);
> + if(format == text_structure_printer)
> + fputs(format_text.results_start, stdout);
> + else
> + outermost_level = format->list(state);
>
> for (i = 0;
> notmuch_threads_valid (threads) && (limit < 0 || i < offset + limit);
> @@ -235,43 +346,93 @@ do_search_threads (const search_format_t *format,
> continue;
> }
>
> - if (! first_thread)
> - fputs (format->item_sep, stdout);
> + if (format == text_structure_printer && ! first_thread)
> + fputs (format_text.item_sep, stdout);
>
> if (output == OUTPUT_THREADS) {
> - format->item_id (thread, "thread:",
> - notmuch_thread_get_thread_id (thread));
> + if(format == text_structure_printer) {
> + format_text.item_id (thread, "thread:",
> + notmuch_thread_get_thread_id (thread));
> + }
> + else {
> + char buffer[128];
> + strncpy(buffer, "thread:", 1 + strlen("thread:"));
> + strncat(buffer, notmuch_thread_get_thread_id (thread), 128 - strlen("thread:"));
> + format->string(state, buffer);
This seems rather ugly: wouldn't a snprintf or possibly a talloc_printf
or something be nicer?
> + }
> +
> } else { /* output == OUTPUT_SUMMARY */
> - fputs (format->item_start, stdout);
> + int tags_level = 0;
> + void *ctx = talloc_new (0);
> +
> + if(format == text_structure_printer)
> + fputs (format_text.item_start, stdout);
> + else
> + items_level = format->map(state);
>
> if (sort == NOTMUCH_SORT_OLDEST_FIRST)
> date = notmuch_thread_get_oldest_date (thread);
> else
> date = notmuch_thread_get_newest_date (thread);
>
> - format->thread_summary (thread,
> - notmuch_thread_get_thread_id (thread),
> - date,
> - notmuch_thread_get_matched_messages (thread),
> - notmuch_thread_get_total_messages (thread),
> - notmuch_thread_get_authors (thread),
> - notmuch_thread_get_subject (thread));
> + if(format == text_structure_printer) {
> + format_text.thread_summary (thread,
> + notmuch_thread_get_thread_id (thread),
> + date,
> + notmuch_thread_get_matched_messages (thread),
> + notmuch_thread_get_total_messages (thread),
> + notmuch_thread_get_authors (thread),
> + notmuch_thread_get_subject (thread));
> + } else {
> + format->map_key(state, "thread");
> + format->string(state, notmuch_thread_get_thread_id (thread));
> + format->map_key(state, "timestamp");
> + format->number(state, date);
> + format->map_key(state, "date_relative");
> + format->string(state, notmuch_time_relative_date(ctx, date));
> + format->map_key(state, "matched");
> + format->number(state, notmuch_thread_get_matched_messages(thread));
> + format->map_key(state, "total");
> + format->number(state, notmuch_thread_get_total_messages(thread));
> + format->map_key(state, "authors");
> + format->string(state, notmuch_thread_get_authors(thread));
> + format->map_key(state, "subject");
> + format->string(state, notmuch_thread_get_subject(thread));
> + }
> +
> + if(format == text_structure_printer) {
> + fputs (format_text.tag_start, stdout);
> + } else {
> + format->map_key(state, "tags");
> +
> + tags_level = format->list(state);
> + }
>
> - fputs (format->tag_start, stdout);
>
> for (tags = notmuch_thread_get_tags (thread);
> notmuch_tags_valid (tags);
> notmuch_tags_move_to_next (tags))
> {
> - if (! first_tag)
> - fputs (format->tag_sep, stdout);
> - printf (format->tag, notmuch_tags_get (tags));
> + if (format == text_structure_printer && ! first_tag) {
> + fputs (format_text.tag_sep, stdout);
> + }
> + if(format == text_structure_printer) {
> + printf (format_text.tag, notmuch_tags_get (tags));
> + } else {
> + format->string(state, notmuch_tags_get(tags));
> + }
> first_tag = 0;
> }
>
> - fputs (format->tag_end, stdout);
> + if(format == text_structure_printer)
> + fputs (format_text.tag_end, stdout);
> + else
> + format->pop(state, tags_level);
>
> - fputs (format->item_end, stdout);
> + if(format == text_structure_printer)
> + fputs (format_text.item_end, stdout);
> + else
> + format->pop(state, items_level);
> }
>
> first_thread = 0;
> @@ -279,16 +440,21 @@ do_search_threads (const search_format_t *format,
> notmuch_thread_destroy (thread);
> }
>
> - if (first_thread)
> - fputs (format->results_null, stdout);
> - else
> - fputs (format->results_end, stdout);
> + if(format == text_structure_printer) {
> + if (first_thread)
> + fputs (format_text.results_null, stdout);
> + else
> + fputs (format_text.results_end, stdout);
> + } else {
> + format->pop(state, outermost_level);
> + }
>
> return 0;
> }
>
> static int
> -do_search_messages (const search_format_t *format,
> +do_search_messages (const structure_printer_t *format,
> + void *state,
> notmuch_query_t *query,
> output_t output,
> int offset,
> @@ -299,6 +465,7 @@ do_search_messages (const search_format_t *format,
> notmuch_filenames_t *filenames;
> int first_message = 1;
> int i;
> + int outermost_level = 0;
>
> if (offset < 0) {
> offset += notmuch_query_count_messages (query);
> @@ -310,7 +477,10 @@ do_search_messages (const search_format_t *format,
> if (messages == NULL)
> return 1;
>
> - fputs (format->results_start, stdout);
> + if(format == text_structure_printer)
> + fputs (format_text.results_start, stdout);
> + else
> + outermost_level = format->list(state);
>
> for (i = 0;
> notmuch_messages_valid (messages) && (limit < 0 || i < offset + limit);
> @@ -328,23 +498,32 @@ do_search_messages (const search_format_t *format,
> notmuch_filenames_valid (filenames);
> notmuch_filenames_move_to_next (filenames))
> {
> - if (! first_message)
> - fputs (format->item_sep, stdout);
> -
> - format->item_id (message, "",
> - notmuch_filenames_get (filenames));
> -
> + if(format == text_structure_printer) {
> + if (! first_message)
> + fputs (format_text.item_sep, stdout);
> +
> + format_text.item_id (message, "",
> + notmuch_filenames_get (filenames));
> + } else {
> + format->string(state, notmuch_filenames_get (filenames));
> + }
> +
> first_message = 0;
> }
>
> notmuch_filenames_destroy( filenames );
>
> } else { /* output == OUTPUT_MESSAGES */
> - if (! first_message)
> - fputs (format->item_sep, stdout);
> + if(format == text_structure_printer) {
> + if (! first_message)
> + fputs (format_text.item_sep, stdout);
> +
> + format_text.item_id (message, "id:",
> + notmuch_message_get_message_id (message));
> + } else {
> + format->string(state, notmuch_message_get_message_id (message));
> + }
>
> - format->item_id (message, "id:",
> - notmuch_message_get_message_id (message));
> first_message = 0;
> }
>
> @@ -353,23 +532,29 @@ do_search_messages (const search_format_t *format,
>
> notmuch_messages_destroy (messages);
>
> - if (first_message)
> - fputs (format->results_null, stdout);
> - else
> - fputs (format->results_end, stdout);
> + if(format == text_structure_printer) {
> + if (first_message)
> + fputs (format_text.results_null, stdout);
> + else
> + fputs (format_text.results_end, stdout);
> + } else {
> + format->pop(state, outermost_level);
> + }
>
> return 0;
> }
>
> static int
> do_search_tags (notmuch_database_t *notmuch,
> - const search_format_t *format,
> + const structure_printer_t *format,
> + void *state,
> notmuch_query_t *query)
> {
> notmuch_messages_t *messages = NULL;
> notmuch_tags_t *tags;
> const char *tag;
> int first_tag = 1;
> + int outermost_level = 0;
>
> /* should the following only special case if no excluded terms
> * specified? */
> @@ -387,7 +572,10 @@ do_search_tags (notmuch_database_t *notmuch,
> if (tags == NULL)
> return 1;
>
> - fputs (format->results_start, stdout);
> + if(format == text_structure_printer)
> + fputs (format_text.results_start, stdout);
> + else
> + outermost_level = format->list(state);
>
> for (;
> notmuch_tags_valid (tags);
> @@ -395,10 +583,14 @@ do_search_tags (notmuch_database_t *notmuch,
> {
> tag = notmuch_tags_get (tags);
>
> - if (! first_tag)
> - fputs (format->item_sep, stdout);
> + if(format == text_structure_printer) {
> + if (! first_tag)
> + fputs (format_text.item_sep, stdout);
>
> - format->item_id (tags, "", tag);
> + format_text.item_id (tags, "", tag);
> + } else {
> + format->string(state, tag);
> + }
>
> first_tag = 0;
> }
> @@ -408,10 +600,14 @@ do_search_tags (notmuch_database_t *notmuch,
> if (messages)
> notmuch_messages_destroy (messages);
>
> - if (first_tag)
> - fputs (format->results_null, stdout);
> - else
> - fputs (format->results_end, stdout);
> + if(format == text_structure_printer) {
> + if (first_tag)
> + fputs (format_text.results_null, stdout);
> + else
> + fputs (format_text.results_end, stdout);
> + } else {
> + format->pop(state, outermost_level);
> + }
>
> return 0;
> }
> @@ -430,7 +626,8 @@ notmuch_search_command (void *ctx, int argc, char *argv[])
> notmuch_query_t *query;
> char *query_str;
> notmuch_sort_t sort = NOTMUCH_SORT_NEWEST_FIRST;
> - const search_format_t *format = &format_text;
> + const structure_printer_t *format = text_structure_printer;
> + void *state = NULL;
> int opt_index, ret;
> output_t output = OUTPUT_SUMMARY;
> int offset = 0;
> @@ -475,10 +672,12 @@ notmuch_search_command (void *ctx, int argc, char *argv[])
>
> switch (format_sel) {
> case NOTMUCH_FORMAT_TEXT:
> - format = &format_text;
> + format = text_structure_printer;
> + state = 0;
> break;
> case NOTMUCH_FORMAT_JSON:
> - format = &format_json;
> + format = &json_structure_printer;
> + state = format->initial_state(format, stdout);
> break;
> }
>
> @@ -532,14 +731,14 @@ notmuch_search_command (void *ctx, int argc, char *argv[])
> default:
> case OUTPUT_SUMMARY:
> case OUTPUT_THREADS:
> - ret = do_search_threads (format, query, sort, output, offset, limit);
> + ret = do_search_threads (format, state, query, sort, output, offset, limit);
> break;
> case OUTPUT_MESSAGES:
> case OUTPUT_FILES:
> - ret = do_search_messages (format, query, output, offset, limit);
> + ret = do_search_messages (format, state, query, output, offset, limit);
> break;
> case OUTPUT_TAGS:
> - ret = do_search_tags (notmuch, format, query);
> + ret = do_search_tags (notmuch, format, state, query);
> break;
> }
>
One final comment: you have quite a lot of things of the form
if(format == text_structure_printer) {
do_something
} else {
do_something_else
}
in some of the cases they are obviously analagous things (eg output a
string) but in other cases they look like they might really just be
different. If there are some of the latter (and I haven't worked through
it carefully enough to be sure) then my preference would be to have them
as
if(format == text_structure_printer) {
do_something
}
if (format != text_structure_printer) {
do_something_else
}
but that is a personal preference and others (and you) may easily disagree.
Best wishes
Mark
More information about the notmuch
mailing list