[PATCH v2 1/7] cli: allow query to come from stdin

Austin Clements amdragon at MIT.EDU
Sat Nov 24 09:41:34 PST 2012


Quoth markwalters1009 on Nov 24 at  1:20 pm:
> From: Mark Walters <markwalters1009 at gmail.com>
> 
> After this series there will be times when a caller will want to pass
> a very large query string to notmuch (eg a list of 10,000 message-ids)
> and this can exceed the size of ARG_MAX. Hence allow notmuch to take
> the query from stdin (if the query is -).
> ---
>  query-string.c |   41 +++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 41 insertions(+), 0 deletions(-)
> 
> diff --git a/query-string.c b/query-string.c
> index 6536512..b1fbdeb 100644
> --- a/query-string.c
> +++ b/query-string.c
> @@ -20,6 +20,44 @@
>  
>  #include "notmuch-client.h"
>  
> +/* Read a single query string from STDIN, using
> + * 'ctx' as the talloc owner for all allocations.
> + *
> + * This function returns NULL in case of insufficient memory or read
> + * errors.
> + */
> +static char *
> +query_string_from_stdin (void *ctx)
> +{
> +    char *query_string;
> +    char buf[4096];
> +    ssize_t remain;
> +
> +    query_string = talloc_strdup (ctx, "");
> +    if (query_string == NULL)
> +	return NULL;
> +
> +    for (;;) {
> +	remain = read (STDIN_FILENO, buf, sizeof(buf) - 1);
> +	if (remain == 0)
> +	    break;
> +	if (remain < 0) {
> +	    if (errno == EINTR)
> +		continue;
> +	    fprintf (stderr, "Error: reading from standard input: %s\n",
> +		     strerror (errno));

talloc_free (query_string) ?

> +	    return NULL;
> +	}
> +
> +	buf[remain] = '\0';
> +	query_string = talloc_strdup_append (query_string, buf);

Eliminate the NUL in buf and instead
 talloc_strndup_append (query_string, buf, remain) ?

Should there be some (large) bound on the size of the query string to
prevent runaway?

> +	if (query_string == NULL)

Technically it would be good to talloc_free the old pointer here, too.

> +	    return NULL;
> +    }
> +
> +    return query_string;
> +}
> +

This whole approach is O(n^2), which might actually matter for large
query strings.  How about (tested, but only a little):

#define MAX_QUERY_STRING_LENGTH (16 * 1024 * 1024)

/* Read a single query string from STDIN, using 'ctx' as the talloc
 * owner for all allocations.
 *
 * This function returns NULL in case of insufficient memory or read
 * errors.
 */
static char *
query_string_from_stdin (void *ctx)
{
    char *query_string = NULL, *new_qs;
    size_t pos = 0, end = 0;
    ssize_t got;

    for (;;) {
	if (end - pos < 512) {
	    end = MAX(end * 2, 1024);
	    if (end >= MAX_QUERY_STRING_LENGTH) {
		fprintf (stderr, "Error: query too long\n");
		goto FAIL;
	    }
	    new_qs = talloc_realloc (ctx, query_string, char, end);
	    if (new_qs == NULL)
		goto FAIL;
	    query_string = new_qs;
	}

	got = read (STDIN_FILENO, query_string + pos, end - pos - 1);
	if (got == 0)
	    break;
	if (got < 0) {
	   if (errno == EINTR)
	       continue;
	   fprintf (stderr, "Error: reading from standard input: %s\n",
		    strerror (errno));
	   goto FAIL;
	}
	pos += got;
    }

    query_string[pos] = '\0';
    return query_string;

 FAIL:
    talloc_free (query_string);
    return NULL;
}

>  /* Construct a single query string from the passed arguments, using
>   * 'ctx' as the talloc owner for all allocations.
>   *
> @@ -35,6 +73,9 @@ query_string_from_args (void *ctx, int argc, char *argv[])
>      char *query_string;
>      int i;
>  
> +    if ((argc == 1) && (strcmp ("-", argv[0]) == 0))
> +	return query_string_from_stdin (ctx);
> +
>      query_string = talloc_strdup (ctx, "");
>      if (query_string == NULL)
>  	return NULL;


More information about the notmuch mailing list