[PATCH v5 2/3] Add structured output formatter for JSON and text.

Fri Jul 13 19:02:11 PDT 2012

Quoth Peter Feigl on Jul 13 at 10:11 am:
> From: <craven at gmx.net>
> 
> Using the new structured printer support in sprinter.h, implement
> sprinter_json_create, which returns a new JSON structured output
> formatter. The formatter prints output similar to the existing JSON, but
> with differences in whitespace (mostly newlines, --output=summary prints
> the entire message summary on one line, not split across multiple lines).
> 
> Also implement a "structured" formatter that prints the current plain
> text format. This passes all tests, but the exact formatting is probably
> specific to notmuch-search and cannot easily (if at all) be adapted to
> be used across all of notmuch-{search,reply,show,...}.
> ---
>  Makefile.local         |   2 +
>  sprinter-json.c        | 184 +++++++++++++++++++++++++++++++++++++++++++++++
>  sprinter-text-search.c | 190 +++++++++++++++++++++++++++++++++++++++++++++++++
>  sprinter.h             |   8 +++
>  4 files changed, 384 insertions(+)
>  create mode 100644 sprinter-json.c
>  create mode 100644 sprinter-text-search.c
> 
> diff --git a/Makefile.local b/Makefile.local
> index a890df2..b6c7e0c 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -290,6 +290,8 @@ notmuch_client_srcs =		\
>  	notmuch-show.c		\
>  	notmuch-tag.c		\
>  	notmuch-time.c		\
> +	sprinter-json.c		\
> +	sprinter-text-search.c	\
>  	query-string.c		\
>  	mime-node.c		\
>  	crypto.c		\
> diff --git a/sprinter-json.c b/sprinter-json.c
> new file mode 100644
> index 0000000..215151d
> --- /dev/null
> +++ b/sprinter-json.c
> @@ -0,0 +1,184 @@
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <talloc.h>
> +#include "sprinter.h"
> +
> +struct sprinter_json {
> +    struct sprinter vtable;
> +    FILE *stream;
> +    /* Top of the state stack, or NULL if the printer is not currently
> +     * inside any aggregate types. */
> +    struct json_state *state;
> +
> +    /* A flag to signify that a separator should be inserted in the
> +     * output as soon as possible.
> +     */
> +    notmuch_bool_t insert_separator;
> +};
> +
> +struct json_state {
> +    struct json_state *parent;
> +    /* True if nothing has been printed in this aggregate yet.
> +     * Suppresses the comma before a value. */
> +    notmuch_bool_t first;
> +    /* The character that closes the current aggregate. */
> +    char close;
> +};
> +
> +/* Helper function to set up the stream to print a value.  If this
> + * value follows another value, prints a comma. */
> +static struct sprinter_json *
> +json_begin_value (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +
> +    if (spj->state) {
> +	if (! spj->state->first) {
> +	    fputc (',', spj->stream);
> +	    if (spj->insert_separator) {
> +		fputc ('\n', spj->stream);
> +		spj->insert_separator = FALSE;
> +	    } else
> +		fputc (' ', spj->stream);
> +	} else
> +	    spj->state->first = FALSE;
> +    }
> +    return spj;
> +}
> +
> +/* Helper function to begin an aggregate type.  Prints the open
> + * character and pushes a new state frame. */
> +static void
> +json_begin_aggregate (struct sprinter *sp, char open, char close)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +    struct json_state *state = talloc (spj, struct json_state);
> +
> +    fputc (open, spj->stream);
> +    state->parent = spj->state;
> +    state->first = TRUE;
> +    state->close = close;
> +    spj->state = state;
> +}
> +
> +static void
> +json_begin_map (struct sprinter *sp)
> +{
> +    json_begin_aggregate (sp, '{', '}');
> +}
> +
> +static void
> +json_begin_list (struct sprinter *sp)
> +{
> +    json_begin_aggregate (sp, '[', ']');
> +}
> +
> +static void
> +json_end (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +    struct json_state *state = spj->state;
> +
> +    fputc (spj->state->close, spj->stream);
> +    spj->state = state->parent;
> +    talloc_free (state);
> +    if (spj->state == NULL)
> +	fputc ('\n', spj->stream);
> +}
> +
> +static void
> +json_string (struct sprinter *sp, const char *val)
> +{
> +    static const char *const escapes[] = {
> +	['\"'] = "\\\"", ['\\'] = "\\\\", ['\b'] = "\\b",
> +	['\f'] = "\\f",  ['\n'] = "\\n",  ['\t'] = "\\t"
> +    };
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fputc ('"', spj->stream);
> +    for (; *val; ++val) {
> +	unsigned char ch = *val;
> +	if (ch < ARRAY_SIZE (escapes) && escapes[ch])
> +	    fputs (escapes[ch], spj->stream);
> +	else if (ch >= 32)
> +	    fputc (ch, spj->stream);
> +	else
> +	    fprintf (spj->stream, "\\u%04x", ch);
> +    }
> +    fputc ('"', spj->stream);
> +}
> +
> +static void
> +json_integer (struct sprinter *sp, int val)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fprintf (spj->stream, "%d", val);
> +}
> +
> +static void
> +json_boolean (struct sprinter *sp, notmuch_bool_t val)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fputs (val ? "true" : "false", spj->stream);
> +}
> +
> +static void
> +json_null (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = json_begin_value (sp);
> +
> +    fputs ("null", spj->stream);
> +}
> +
> +static void
> +json_map_key (struct sprinter *sp, const char *key)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +
> +    json_string (sp, key);
> +    fputs (": ", spj->stream);
> +    spj->state->first = TRUE;
> +}
> +
> +static void
> +json_set_prefix (unused (struct sprinter *sp), unused (const char *name))
> +{
> +}
> +
> +static void
> +json_separator (struct sprinter *sp)
> +{
> +    struct sprinter_json *spj = (struct sprinter_json *) sp;
> +
> +    spj->insert_separator = TRUE;
> +}
> +
> +struct sprinter *
> +sprinter_json_create (const void *ctx, FILE *stream)
> +{
> +    static const struct sprinter_json template = {
> +	.vtable = {
> +	    .begin_map = json_begin_map,
> +	    .begin_list = json_begin_list,
> +	    .end = json_end,
> +	    .string = json_string,
> +	    .integer = json_integer,
> +	    .boolean = json_boolean,
> +	    .null = json_null,
> +	    .map_key = json_map_key,
> +	    .separator = json_separator,
> +	    .set_prefix = json_set_prefix,
> +	}
> +    };
> +    struct sprinter_json *res;
> +
> +    res = talloc (ctx, struct sprinter_json);
> +    if (! res)
> +	return NULL;
> +
> +    *res = template;
> +    res->stream = stream;
> +    return &res->vtable;
> +}
> diff --git a/sprinter-text-search.c b/sprinter-text-search.c
> new file mode 100644
> index 0000000..95ed9cb
> --- /dev/null
> +++ b/sprinter-text-search.c
> @@ -0,0 +1,190 @@
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <talloc.h>
> +#include "sprinter.h"
> +
> +/* "Structured printer" interface for unstructured text printing.
> + * This is at best a misuse of the interface, but it simplifies the code
> + * in notmuch-search.c considerably.
> + */
> +
> +struct sprinter_text_search {
> +    struct sprinter vtable;
> +    FILE *stream;
> +
> +    /* The current name or prefix to be printed with string/integer/boolean
> +     * data.
> +     */
> +    const char *current_name;

"current_prefix"?  Or maybe just "prefix"?  This is the only place
where you use the term "name" for this.

> +
> +    /* A flag to indicate if this is the first tag. Used in list of tags
> +     * for summary.
> +     */
> +    notmuch_bool_t first_tag;
> +};
> +
> +/* struct text_search_state { */
> +/*     struct text_search_state *parent; */
> +/* }; */

Left over scratch code?

> +
> +static notmuch_bool_t
> +current_context (struct sprinter_text_search *sptxt, const char *marker)
> +{
> +    return (sptxt->current_name != NULL
> +	    && ! strncmp (marker, sptxt->current_name, strlen (marker)));
> +}

All of this context stuff seems way more complicated than having a few
special cases in notmuch-search.c.  This is, in effect, just moving
this special casing from there to here, but since the text format is
highly irregular, attempting to generalize it is only going to
obfuscate it.

I think the simplest and most readable thing to do is to make all of
these functions no-ops (literally empty function bodies) except
text_search_separator and text_search_set_prefix, which should be like
you have them, and text_search_string:

static void
text_search_string (struct sprinter *sp, const char *val)
{
    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;

    if (sptxt->current_name)
	fprintf (sptxt->stream, "%s:", sptxt->current_name);
    print_sanitized_string (sptxt->stream, val);
}

For the summary output, you'll have to format the summary line in
notmuch-search.c, much like you did in v4, which was a much simpler
and more readable way to put together the text format lines.

> +
> +static void
> +print_sanitized_string (FILE *stream, const char *str)
> +{
> +    if (NULL == str)
> +	return;
> +
> +    for (; *str; str++) {
> +	if ((unsigned char) (*str) < 32)
> +	    fputc ('?', stream);
> +	else
> +	    fputc (*str, stream);
> +    }
> +}
> +
> +static void
> +text_search_begin_map (unused (struct sprinter *sp))
> +{
> +}
> +
> +static void
> +text_search_begin_list (struct sprinter *sp)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    if (current_context (sptxt, "tags")) {
> +	fputs (" (", sptxt->stream);
> +	sptxt->first_tag = TRUE;
> +    }
> +}
> +
> +static void
> +text_search_end (struct sprinter *sp)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    if (current_context (sptxt, "tags")) {
> +	fputc (')', sptxt->stream);
> +	sptxt->current_name = NULL;
> +    }
> +}
> +
> +static void
> +text_search_string (struct sprinter *sp, const char *val)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    if (sptxt->current_name != NULL) {
> +	if (current_context (sptxt, "thread"))
> +	    fprintf ( sptxt->stream, "thread:%s ", val);
> +	else if (current_context (sptxt, "date_relative"))
> +	    fprintf ( sptxt->stream, "%12s ", val);
> +	else if (current_context (sptxt, "authors")) {
> +	    print_sanitized_string (sptxt->stream, val);
> +	    fputs ("; ", sptxt->stream);
> +	} else if (current_context (sptxt, "subject"))
> +	    print_sanitized_string (sptxt->stream, val);
> +	else if (current_context (sptxt, "tags")) {
> +	    if (! sptxt->first_tag)
> +		fputc (' ', sptxt->stream);
> +	    else
> +		sptxt->first_tag = FALSE;
> +
> +	    fputs (val, sptxt->stream);
> +	} else {
> +	    fputs (sptxt->current_name, sptxt->stream);
> +	    fputc (':', sptxt->stream);
> +	    fputs (val, sptxt->stream);
> +	}
> +    } else {
> +	fputs (val, sptxt->stream);
> +    }
> +}
> +
> +static void
> +text_search_integer (struct sprinter *sp, int val)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    if (sptxt->current_name != NULL) {
> +	if (current_context (sptxt, "matched"))
> +	    fprintf ( sptxt->stream, "[%d/", val);
> +	else if (current_context (sptxt, "total"))
> +	    fprintf ( sptxt->stream, "%d] ", val);
> +    } else
> +	fprintf (sptxt->stream, "%d", val);
> +}
> +
> +static void
> +text_search_boolean (struct sprinter *sp, notmuch_bool_t val)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    fputs (val ? "true" : "false", sptxt->stream);
> +}
> +
> +static void
> +text_search_null (struct sprinter *sp)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    fputs ("null", sptxt->stream);
> +}
> +
> +static void
> +text_search_map_key (struct sprinter *sp, const char *key)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    sptxt->current_name = key;
> +}
> +
> +static void
> +text_search_separator (struct sprinter *sp)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    fputc ('\n', sptxt->stream);
> +}
> +
> +static void
> +text_search_set_prefix (struct sprinter *sp, const char *name)
> +{
> +    struct sprinter_text_search *sptxt = (struct sprinter_text_search *) sp;
> +
> +    sptxt->current_name = name;
> +}
> +
> +struct sprinter *
> +sprinter_text_search_create (const void *ctx, FILE *stream)
> +{
> +    static const struct sprinter_text_search template = {
> +	.vtable = {
> +	    .begin_map = text_search_begin_map,
> +	    .begin_list = text_search_begin_list,
> +	    .end = text_search_end,
> +	    .string = text_search_string,
> +	    .integer = text_search_integer,
> +	    .boolean = text_search_boolean,
> +	    .null = text_search_null,
> +	    .map_key = text_search_map_key,
> +	    .separator = text_search_separator,
> +	    .set_prefix = text_search_set_prefix,
> +	}
> +    };
> +    struct sprinter_text_search *res;
> +
> +    res = talloc (ctx, struct sprinter_text_search);
> +    if (! res)
> +	return NULL;
> +
> +    *res = template;
> +    res->stream = stream;
> +    return &res->vtable;
> +}
> diff --git a/sprinter.h b/sprinter.h
> index c9cd6a6..4241d65 100644
> --- a/sprinter.h
> +++ b/sprinter.h
> @@ -47,4 +47,12 @@ typedef struct sprinter {
>      void (*separator)(struct sprinter *);
>  } sprinter_t;
>  
> +/* Create a new unstructured printer that emits the default Text format for search. */

Wrap at 70 columns.

> +struct sprinter *
> +sprinter_text_search_create (const void *ctx, FILE *stream);
> +
> +/* Create a new structure printer that emits JSON. */
> +struct sprinter *
> +sprinter_json_create (const void *ctx, FILE *stream);
> +
>  #endif // NOTMUCH_SPRINTER_H