[PATCH v3 3/4] cli: Extend the search command for --output={sender, recipients}

Mon Oct 13 12:00:58 PDT 2014

On Mon, Oct 13 2014, Michal Sojka <sojkam1 at fel.cvut.cz> wrote:

> The new outputs allow printing senders, recipients or both of matching
> messages. The --output option is converted from "keyword" argument to
> "flags" argument, which means that the user can use --output=sender and
> --output=recipients simultaneously, to print both. Other combinations
> produce an error.
>
> This code based on a patch from Jani Nikula.
> ---
>  completion/notmuch-completion.bash |   2 +-
>  completion/notmuch-completion.zsh  |   3 +-
>  doc/man1/notmuch-search.rst        |  22 +++++++-
>  notmuch-search.c                   | 110 ++++++++++++++++++++++++++++++++++---
>  test/T090-search-output.sh         |  64 +++++++++++++++++++++
>  5 files changed, 189 insertions(+), 12 deletions(-)
>
> diff --git a/completion/notmuch-completion.bash b/completion/notmuch-completion.bash
> index 0571dc9..cfbd389 100644
> --- a/completion/notmuch-completion.bash
> +++ b/completion/notmuch-completion.bash
> @@ -294,7 +294,7 @@ _notmuch_search()
>  	    return
>  	    ;;
>  	--output)
> -	    COMPREPLY=( $( compgen -W "summary threads messages files tags" -- "${cur}" ) )
> +	    COMPREPLY=( $( compgen -W "summary threads messages files tags sender recipients" -- "${cur}" ) )
>  	    return
>  	    ;;
>  	--sort)
> diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-completion.zsh
> index 67a9aba..3e52a00 100644
> --- a/completion/notmuch-completion.zsh
> +++ b/completion/notmuch-completion.zsh
> @@ -52,7 +52,8 @@ _notmuch_search()
>    _arguments -s : \
>      '--max-threads=[display only the first x threads from the search results]:number of threads to show: ' \
>      '--first=[omit the first x threads from the search results]:number of threads to omit: ' \
> -    '--sort=[sort results]:sorting:((newest-first\:"reverse chronological order" oldest-first\:"chronological order"))'
> +    '--sort=[sort results]:sorting:((newest-first\:"reverse chronological order" oldest-first\:"chronological order"))' \
> +    '--output=[select what to output]:output:((summary threads messages files tags sender recipients))'
>  }
>  
>  _notmuch()
> diff --git a/doc/man1/notmuch-search.rst b/doc/man1/notmuch-search.rst
> index 90160f2..c9d38b1 100644
> --- a/doc/man1/notmuch-search.rst
> +++ b/doc/man1/notmuch-search.rst
> @@ -35,7 +35,7 @@ Supported options for **search** include
>          intended for programs that invoke **notmuch(1)** internally. If
>          omitted, the latest supported version will be used.
>  
> -    ``--output=(summary|threads|messages|files|tags)``
> +    ``--output=(summary|threads|messages|files|tags|sender|recipients)``
>  
>          **summary**
>              Output a summary of each thread with any message matching
> @@ -78,6 +78,26 @@ Supported options for **search** include
>              by null characters (--format=text0), as a JSON array
>              (--format=json), or as an S-Expression list (--format=sexp).
>  
> +	**sender**
> +            Output all addresses from the *From* header that appear on
> +            any message matching the search terms, either one per line
> +            (--format=text), separated by null characters
> +            (--format=text0), as a JSON array (--format=json), or as
> +            an S-Expression list (--format=sexp).
> +
> +	    Note: Searching for **sender** should be much faster than
> +	    searching for **recipients**, because sender addresses are
> +	    cached directly in the database whereas other addresses
> +	    need to be fetched from message files.
> +
> +	**recipients**
> +            Like **sender** but for addresses from *To*, *Cc* and
> +	    *Bcc* headers.
> +
> +	This option can be given multiple times to combine different
> +	outputs. Curently, this is only supported for **sender** and
> +	**recipients** outputs.
> +
>      ``--sort=``\ (**newest-first**\ \|\ **oldest-first**)
>          This option can be used to present results in either
>          chronological order (**oldest-first**) or reverse chronological
> diff --git a/notmuch-search.c b/notmuch-search.c
> index 5ac2a26..74588f8 100644
> --- a/notmuch-search.c
> +++ b/notmuch-search.c
> @@ -23,11 +23,14 @@
>  #include "string-util.h"
>  
>  typedef enum {
> -    OUTPUT_SUMMARY,
> -    OUTPUT_THREADS,
> -    OUTPUT_MESSAGES,
> -    OUTPUT_FILES,
> -    OUTPUT_TAGS
> +    OUTPUT_SUMMARY	= 1 << 0,
> +    OUTPUT_THREADS	= 1 << 1,
> +    OUTPUT_MESSAGES	= 1 << 2,
> +    OUTPUT_FILES	= 1 << 3,
> +    OUTPUT_TAGS		= 1 << 4,
> +    OUTPUT_SENDER	= 1 << 5,
> +    OUTPUT_RECIPIENTS	= 1 << 6,
> +    OUTPUT_ADDRESSES	= OUTPUT_SENDER | OUTPUT_RECIPIENTS,

leftover, like mentioned below (this comment added just before sending)

>  } output_t;
>  
>  typedef struct {
> @@ -220,6 +223,67 @@ do_search_threads (search_options_t *o)
>      return 0;
>  }
>  
> +static void
> +print_address_list (const search_options_t *o, InternetAddressList *list)
> +{
> +    InternetAddress *address;
> +    int i;
> +
> +    for (i = 0; i < internet_address_list_length (list); i++) {
> +	address = internet_address_list_get_address (list, i);
> +	if (INTERNET_ADDRESS_IS_GROUP (address)) {
> +	    InternetAddressGroup *group;
> +	    InternetAddressList *group_list;
> +
> +	    group = INTERNET_ADDRESS_GROUP (address);
> +	    group_list = internet_address_group_get_members (group);
> +	    if (group_list == NULL)
> +		continue;
> +
> +	    print_address_list (o, group_list);
> +	} else {
> +	    InternetAddressMailbox *mailbox;
> +	    const char *name;
> +	    const char *addr;
> +	    char *full_address;
> +
> +	    mailbox = INTERNET_ADDRESS_MAILBOX (address);
> +
> +	    name = internet_address_get_name (address);
> +	    addr = internet_address_mailbox_get_addr (mailbox);
> +
> +	    if (name && *name)
> +		full_address = talloc_asprintf (o->format, "%s <%s>", name, addr);
> +	    else
> +		full_address = talloc_strdup (o->format, addr);
> +
> +	    if (!full_address) {

Apart from minor style issue like space after ! and the leftover ADDRESSES
parts (w/o that I would not have commented about !<SPC>) the first 3
patches look pretty good to me. I have not tested those yet.

But we keep to have some disagreement w/ unique/duplicate/filter-by
handling ;/

I (currently) rest the case of first/last/most common handling to just
how the --sort=(newest-first|oldest-first) affects the order...

Let's consider the following list of output if no /deduplication/ is done:

  John Doe <john at example.com>
  Dr. John Doe <john at example.com>
  John Doe <JOHN at EXAMPLE.COM>
  John Doe <john at doe.name>
  Dr. John Doe <john at doe.name>
  John Doe <JOHN at doe.name.example.com>
  John Doe <john at doe.name>
  Dr. John Doe <john at example.com>
  Dr. John Doe <john at example.com>
  Dr. John Doe <john at doe.name>
  John Doe <john at example.com>
  John Doe <john at doe.name.example.com>

To stir the pool a little more, this could be the output when
--duplicate=all (the default) is given.

With --duplicate=none the output could be (first match by unique
case-insensitive address):

  John Doe <john at example.com>
  John Doe <john at doe.name>
  John Doe <john at doe.name.example.com>

(many people may have the same name, but email address is unique per person
-- therefore I think 'none' limiting that just to John Doe <john at example.com>
would be too little)

and with --duplicate=address

  John Doe <john at example.com>
  Dr. John Doe <john at example.com>
  John Doe <john at doe.name>
  Dr. John Doe <john at doe.name>
  John Doe <JOHN at doe.name.example.com>

(from this output user can choose how the recipient is to be called
(like "pseudonyms" mentioned in id:20141010113202.GE28601 at TP_L520.localdomain )
when sending email)

and --duplicate=address-casesensitive

  John Doe <john at example.com>
  Dr. John Doe <john at example.com>
  John Doe <JOHN at EXAMPLE.COM>
  John Doe <john at doe.name>
  Dr. John Doe <john at doe.name>
  John Doe <JOHN at doe.name.example.com>
  John Doe <john at doe.name.example.com>

This reuse of --duplicate was thought out after Jani's irc mention of it.
This scheme would leave no room tho the filter-by=name suggestion -- for
completeness that would make this look:

  John Doe <john at example.com>
  Dr. John Doe <john at example.com>

This doesn't look too bad in this particular case but not having ability to
see all potential addresses (perhaps the only working address is now
hidden) isn't not much for general use. Well, maybe for some specific use
--duplicate=no-unique-addresses could be useful :O

Ok, this took an hour to get written to (w/ some interruptions). Healthy
criticism appreciated :D

Tomi

// stuff deleted before some 'ADDRESSES' leftover...

> @@ -455,18 +542,23 @@ notmuch_search_command (notmuch_config_t *config, int argc, char *argv[])
>      }
>  
>      switch (o.output) {
> -    default:
>      case OUTPUT_SUMMARY:
>      case OUTPUT_THREADS:
>  	ret = do_search_threads (&o);
>  	break;
>      case OUTPUT_MESSAGES:
> +    case OUTPUT_SENDER:
> +    case OUTPUT_RECIPIENTS:
> +    case OUTPUT_ADDRESSES:
>      case OUTPUT_FILES:
>  	ret = do_search_messages (&o);
>  	break;
>      case OUTPUT_TAGS:
>  	ret = do_search_tags (notmuch, o.format, o.query);
>  	break;
> +    default:
> +	fprintf (stderr, "Error: the combination of outputs is not supported.\n");
> +	ret = 1;
>      }
>  
>      notmuch_query_destroy (o.query);