[PATCH v2 1/5] Adding an S-expression structured output printer.

Austin Clements aclements at csail.mit.edu
Tue Dec 4 11:14:34 PST 2012


On Tue, 04 Dec 2012, Peter Feigl <craven at gmx.net> wrote:
> This commit adds a structured output printer for Lisp
> S-Expressions. Later commits will use this printer in notmuch search,
> show and reply.
>
> The structure is the same as json, but:
> - arrays are written as lists: ("foo" "bar" "baaz" 1 2 3)
> - maps are written as a-lists: ((key "value") (other-key "other-value"))

I thought the plan was to use plists.  Or are we going to support both?

> - true is written as t
> - false is written as nil
> - null is written as nil
> ---
>  Makefile.local  |   1 +
>  sprinter-sexp.c | 250 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  sprinter.h      |   4 +
>  3 files changed, 255 insertions(+)
>  create mode 100644 sprinter-sexp.c
>
> diff --git a/Makefile.local b/Makefile.local
> index 2b91946..0db1713 100644
> --- a/Makefile.local
> +++ b/Makefile.local
> @@ -270,6 +270,7 @@ notmuch_client_srcs =		\
>  	notmuch-tag.c		\
>  	notmuch-time.c		\
>  	sprinter-json.c		\
> +	sprinter-sexp.c		\
>  	sprinter-text.c		\
>  	query-string.c		\
>  	mime-node.c		\
> diff --git a/sprinter-sexp.c b/sprinter-sexp.c
> new file mode 100644
> index 0000000..6d6bbad
> --- /dev/null
> +++ b/sprinter-sexp.c
> @@ -0,0 +1,250 @@
> +/* notmuch - Not much of an email program, (just index and search)
> + *
> + * Copyright © 2012 Carl Worth

This should probably be your name.

> + *
> + * This program is free software: you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation, either version 3 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see http://www.gnu.org/licenses/ .
> + *
> + * Author: Carl Worth <cworth at cworth.org>

Same here.

> + */
> +
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <talloc.h>
> +#include "sprinter.h"
> +
> +struct sprinter_sexp {
> +    struct sprinter vtable;
> +    FILE *stream;
> +    /* Top of the state stack, or NULL if the printer is not currently
> +     * inside any aggregate types. */
> +    struct sexp_state *state;
> +
> +    /* A flag to signify that a separator should be inserted in the
> +     * output as soon as possible.
> +     */
> +    notmuch_bool_t insert_separator;
> +};
> +
> +struct sexp_state {
> +    struct sexp_state *parent;
> +
> +    /* True if nothing has been printed in this aggregate yet.
> +     * Suppresses the space before a value. */
> +    notmuch_bool_t first;
> +
> +    /* True if the state is a map state.
> +     * Used to add a space between key/value pairs. */
> +    notmuch_bool_t in_map;

Maybe in_alist?

> +
> +    /* The character that closes the current aggregate. */
> +    char close;

Given that the close character is always ')', why have this field?

> +};
> +
> +/* Helper function to set up the stream to print a value.  If this
> + * value follows another value, prints a space. */
> +static struct sprinter_sexp *
> +sexp_begin_value (struct sprinter *sp)
> +{
> +    struct sprinter_sexp *sps = (struct sprinter_sexp *) sp;
> +
> +    if (sps->state) {
> +	if (! sps->state->first) {
> +	    if (sps->insert_separator) {
> +		fputc ('\n', sps->stream);
> +		sps->insert_separator = FALSE;
> +	    } else {
> +		if (! sps->state->in_map)
> +		    fputc (' ', sps->stream);
> +	    }
> +	} else {
> +	    sps->state->first = FALSE;
> +	}
> +    }
> +    return sps;
> +}
> +
> +/* Helper function to begin an aggregate type.  Prints the open
> + * character and pushes a new state frame. */
> +static void
> +sexp_begin_aggregate (struct sprinter *sp, char open, char close)

The open and close arguments seem unnecessary here, since they're always
'(' and ')'.  Perhaps this should instead take in_map as an argument?

> +{
> +    struct sprinter_sexp *sps = sexp_begin_value (sp);
> +    struct sexp_state *state = talloc (sps, struct sexp_state);
> +    fputc (open, sps->stream);
> +    state->parent = sps->state;
> +    state->first = TRUE;
> +    state->in_map = FALSE;
> +    state->close = close;
> +    sps->state = state;
> +}
> +
> +static void
> +sexp_begin_map (struct sprinter *sp)
> +{
> +    struct sprinter_sexp *sps = (struct sprinter_sexp *) sp;
> +    sexp_begin_aggregate (sp, '(', ')');
> +    sps->state->in_map = TRUE;
> +}
> +
> +static void
> +sexp_begin_list (struct sprinter *sp)
> +{
> +    sexp_begin_aggregate (sp, '(', ')');
> +}
> +
> +static void
> +sexp_end (struct sprinter *sp)
> +{
> +    struct sprinter_sexp *sps = (struct sprinter_sexp *) sp;
> +    struct sexp_state *state = sps->state;
> +
> +    if (sps->state->in_map)
> +	fputc (')', sps->stream);
> +    fputc (sps->state->close, sps->stream);
> +    sps->state = state->parent;
> +    talloc_free (state);
> +    if (sps->state == NULL)
> +	fputc ('\n', sps->stream);
> +}
> +
> +static void
> +sexp_string_len_internal (struct sprinter *sp, const char *val, size_t len, notmuch_bool_t quote)
> +{
> +    static const char *const escapes[] = {
> +	['\"'] = "\\\"", ['\\'] = "\\\\", ['\b'] = "\\b",
> +	['\f'] = "\\f",  ['\n'] = "\\n",  ['\t'] = "\\t"

It's unfortunate that different Lisps have different string escaping
conventions.  All of these will work in Elisp.  R5RS only specifies \"
and \\ (anything else is unspecified, though at least MIT Scheme, Racket
R5RS, and Chicken support the others).  R6RS specifies all of these.  In
Common Lisp, \" and \\ work as expected, but \ before anything else will
just ignore the \ (so "\n" is the same as "n").

Conveniently, in all of these, no characters other than " and \ actually
need escaping, so I'd be inclined to print any other character
literally.

Consumers will have to be sure to use a UTF-8 encoding when reading.

> +    };
> +    struct sprinter_sexp *sps = sexp_begin_value (sp);
> +
> +    if(quote)
> +	fputc ('"', sps->stream);
> +    for (; len; ++val, --len) {
> +	unsigned char ch = *val;
> +	if (ch < ARRAY_SIZE (escapes) && escapes[ch])
> +	    fputs (escapes[ch], sps->stream);
> +	else if (ch >= 32)
> +	    fputc (ch, sps->stream);
> +	else
> +	    fprintf (sps->stream, "\\u%04x", ch);

If we do have to include numeric character escapes, "\\%03o" would be
better.  As mentioned above, R5RS and Common Lisp have no means to do
this.  Even worse, R6RS doesn't specify octal escapes but does specify
"\xNN;" (note the semicolon), which isn't compatible with *anything*,
including most R5RS implementations.  In practice, though, most things
seem to accept the octal escape I suggested (confirmed in Elisp, MIT
Scheme, Racket R5RS, and Chicken).

> +    }
> +    if(quote)
> +	fputc ('"', sps->stream);
> +}
> +
> +static void
> +sexp_string_len (struct sprinter *sp, const char *val, size_t len)
> +{
> +    sexp_string_len_internal (sp, val, len, TRUE); /* print quoted */
> +}
> +
> +static void
> +sexp_symbol_len (struct sprinter *sp, const char *val, size_t len)
> +{
> +    sexp_string_len_internal (sp, val, len, FALSE); /* print unquoted */

I don't understand this.  The quoting rules for symbols are completely
different from the rules for strings.  It seems like it would be better
to print the symbol literally with fputs than to apply incorrect quoting
rules to it.  Even better would be to INTERNAL_ERROR if the symbol
contains any characters that might require escaping, though if someone
does introduce such a symbol, they'll probably find out quickly enough.

> +}
> +
> +static void
> +sexp_string (struct sprinter *sp, const char *val)
> +{
> +    if (val == NULL)
> +	val = "";
> +    sexp_string_len (sp, val, strlen (val));
> +}
> +
> +static void
> +sexp_symbol (struct sprinter *sp, const char *val)
> +{
> +    if (val == NULL)
> +	val = "";
> +    sexp_symbol_len (sp, val, strlen (val));
> +}
> +
> +static void
> +sexp_integer (struct sprinter *sp, int val)
> +{
> +    struct sprinter_sexp *sps = sexp_begin_value (sp);
> +
> +    fprintf (sps->stream, "%d", val);
> +}
> +
> +static void
> +sexp_boolean (struct sprinter *sp, notmuch_bool_t val)
> +{
> +    struct sprinter_sexp *sps = sexp_begin_value (sp);
> +
> +    fputs (val ? "t" : "nil", sps->stream);
> +}
> +
> +static void
> +sexp_null (struct sprinter *sp)
> +{
> +    struct sprinter_sexp *sps = sexp_begin_value (sp);
> +
> +    fputs ("nil", sps->stream);
> +}
> +
> +static void
> +sexp_map_key (struct sprinter *sp, const char *key)
> +{
> +    struct sprinter_sexp *sps = (struct sprinter_sexp *) sp;
> +
> +    if (sps->state->in_map && ! sps->state->first)
> +	fputs (") ", sps->stream);
> +    fputc ('(', sps->stream);
> +    sexp_symbol (sp, key);

Since this is the only use of sexp_symbol, perhaps the code should be
folded in?  At least sexp_symbol and sexp_symbol_len should be combined
into sexp_symbol.

> +    fputc (' ', sps->stream);
> +}
> +
> +static void
> +sexp_set_prefix (unused (struct sprinter *sp), unused (const char *name))
> +{
> +}
> +
> +static void
> +sexp_separator (struct sprinter *sp)
> +{
> +    struct sprinter_sexp *sps = (struct sprinter_sexp *) sp;
> +
> +    sps->insert_separator = TRUE;
> +}
> +
> +struct sprinter *
> +sprinter_sexp_create (const void *ctx, FILE *stream)
> +{
> +    static const struct sprinter_sexp template = {
> +	.vtable = {
> +	    .begin_map = sexp_begin_map,
> +	    .begin_list = sexp_begin_list,
> +	    .end = sexp_end,
> +	    .string = sexp_string,
> +	    .string_len = sexp_string_len,
> +	    .integer = sexp_integer,
> +	    .boolean = sexp_boolean,
> +	    .null = sexp_null,
> +	    .map_key = sexp_map_key,
> +	    .separator = sexp_separator,
> +	    .set_prefix = sexp_set_prefix,
> +	    .is_text_printer = FALSE,
> +	}
> +    };
> +    struct sprinter_sexp *res;
> +
> +    res = talloc (ctx, struct sprinter_sexp);
> +    if (! res)
> +	return NULL;
> +
> +    *res = template;
> +    res->stream = stream;
> +    return &res->vtable;
> +}
> diff --git a/sprinter.h b/sprinter.h
> index 912a526..59776a9 100644
> --- a/sprinter.h
> +++ b/sprinter.h
> @@ -70,4 +70,8 @@ sprinter_text_create (const void *ctx, FILE *stream);
>  struct sprinter *
>  sprinter_json_create (const void *ctx, FILE *stream);
>  
> +/* Create a new structure printer that emits S-Expressions. */
> +struct sprinter *
> +sprinter_sexp_create (const void *ctx, FILE *stream);
> +
>  #endif // NOTMUCH_SPRINTER_H
> -- 
> 1.8.0
>
> _______________________________________________
> notmuch mailing list
> notmuch at notmuchmail.org
> http://notmuchmail.org/mailman/listinfo/notmuch


More information about the notmuch mailing list