[PATCH v3 6/6] lib: parse messages only once

Austin Clements amdragon at MIT.EDU
Mon Feb 3 13:40:03 PST 2014


Quoth Jani Nikula on Feb 03 at  9:51 pm:
> Use the previously parsed gmime message for indexing instead of
> running an extra parsing pass.
> 
> After this change, we'll only do unnecessary parsing of the message
> body for duplicates and non-messages. For regular non-duplicate
> messages, we have now shaved off an extra header parsing round during
> indexing.
> ---
>  lib/database.cc       |  2 +-
>  lib/index.cc          | 59 ++++++---------------------------------------------
>  lib/message-file.c    |  9 ++++++++
>  lib/notmuch-private.h | 16 ++++++++++++--
>  4 files changed, 30 insertions(+), 56 deletions(-)
> 
> diff --git a/lib/database.cc b/lib/database.cc
> index d1bea88..3a29fe7 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -2029,7 +2029,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
>  	    date = notmuch_message_file_get_header (message_file, "date");
>  	    _notmuch_message_set_header_values (message, date, from, subject);
>  
> -	    ret = _notmuch_message_index_file (message, filename);
> +	    ret = _notmuch_message_index_file (message, message_file);
>  	    if (ret)
>  		goto DONE;
>  	} else {
> diff --git a/lib/index.cc b/lib/index.cc
> index 976e49f..71397da 100644
> --- a/lib/index.cc
> +++ b/lib/index.cc
> @@ -425,52 +425,15 @@ _index_mime_part (notmuch_message_t *message,
>  
>  notmuch_status_t
>  _notmuch_message_index_file (notmuch_message_t *message,
> -			     const char *filename)
> +			     notmuch_message_file_t *message_file)
>  {
> -    GMimeStream *stream = NULL;
> -    GMimeParser *parser = NULL;
> -    GMimeMessage *mime_message = NULL;
> +    GMimeMessage *mime_message;
>      InternetAddressList *addresses;
> -    FILE *file = NULL;
>      const char *from, *subject;
> -    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
> -    static int initialized = 0;
> -    char from_buf[5];
> -    bool is_mbox = false;
> -
> -    if (! initialized) {
> -	g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
> -	initialized = 1;
> -    }
> -
> -    file = fopen (filename, "r");
> -    if (! file) {
> -	fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
> -	ret = NOTMUCH_STATUS_FILE_ERROR;
> -	goto DONE;
> -    }
> -
> -    /* Is this mbox? */
> -    if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
> -	strncmp (from_buf, "From ", 5) == 0)
> -	is_mbox = true;
> -    rewind (file);
> -
> -    /* Evil GMime steals my FILE* here so I won't fclose it. */
> -    stream = g_mime_stream_file_new (file);
> -
> -    parser = g_mime_parser_new_with_stream (stream);
> -    g_mime_parser_set_scan_from (parser, is_mbox);
>  
> -    mime_message = g_mime_parser_construct_message (parser);
> -
> -    if (is_mbox) {
> -	if (!g_mime_parser_eos (parser)) {
> -	    /* This is a multi-message mbox. */
> -	    ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
> -	    goto DONE;
> -	}
> -    }
> +    mime_message = notmuch_message_file_get_mime_message (message_file);
> +    if (! mime_message)
> +	return NOTMUCH_STATUS_FILE_NOT_EMAIL; /* more like internal error */

Are there situations other than forgetting to call
notmuch_message_file_parse that could cause this?  (Speaking of which,
where is notmuch_message_file_parse called?)

>  
>      from = g_mime_message_get_sender (mime_message);
>  
> @@ -491,15 +454,5 @@ _notmuch_message_index_file (notmuch_message_t *message,
>  
>      _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
>  
> -  DONE:
> -    if (mime_message)
> -	g_object_unref (mime_message);
> -
> -    if (parser)
> -	g_object_unref (parser);
> -
> -    if (stream)
> -	g_object_unref (stream);
> -
> -    return ret;
> +    return NOTMUCH_STATUS_SUCCESS;
>  }
> diff --git a/lib/message-file.c b/lib/message-file.c
> index 33f6468..99e1dc8 100644
> --- a/lib/message-file.c
> +++ b/lib/message-file.c
> @@ -250,6 +250,15 @@ mboxes is deprecated and may be removed in the future.\n", message->filename);
>      return NOTMUCH_STATUS_SUCCESS;
>  }
>  
> +GMimeMessage *
> +notmuch_message_file_get_mime_message (notmuch_message_file_t *message)
> +{
> +    if (! message->parsed)
> +	return NULL;

This seems like another good opportunity to call the parser lazily and
hide notmuch_message_file_parse from the caller, rather than requiring
the caller to implement a particular call sequence (which I wasn't
even able to find above).  This might also clean up the error handling
in the call to notmuch_message_file_get_mime_message above.

> +
> +    return message->message;
> +}
> +
>  /* return NULL on errors, empty string for non-existing headers */
>  const char *
>  notmuch_message_file_get_header (notmuch_message_file_t *message,
> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> index 7277df1..7559521 100644
> --- a/lib/notmuch-private.h
> +++ b/lib/notmuch-private.h
> @@ -46,6 +46,8 @@ NOTMUCH_BEGIN_DECLS
>  
>  #include <talloc.h>
>  
> +#include <gmime/gmime.h>
> +
>  #include "xutil.h"
>  #include "error_util.h"
>  
> @@ -320,9 +322,11 @@ notmuch_message_get_author (notmuch_message_t *message);
>  
>  /* index.cc */
>  
> +typedef struct _notmuch_message_file notmuch_message_file_t;
> +
>  notmuch_status_t
>  _notmuch_message_index_file (notmuch_message_t *message,
> -			     const char *filename);
> +			     notmuch_message_file_t *message_file);
>  
>  /* message-file.c */
>  
> @@ -330,7 +334,6 @@ _notmuch_message_index_file (notmuch_message_t *message,
>   * into the public interface in notmuch.h
>   */
>  
> -typedef struct _notmuch_message_file notmuch_message_file_t;
>  
>  /* Open a file containing a single email message.
>   *
> @@ -377,6 +380,15 @@ void
>  notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
>  					va_list va_headers);
>  
> +/* Get the gmime message of a parsed message file.
> + *
> + * Returns NULL if the message file has not been parsed.
> + *
> + * XXX: Would be nice to not have to expose GMimeMessage here.

Maybe just forward-declare struct GMimeMessage?  Then you also
wouldn't need to add the gmime #include.

> + */
> +GMimeMessage *
> +notmuch_message_file_get_mime_message (notmuch_message_file_t *message);
> +
>  /* Get the value of the specified header from the message as a UTF-8 string.
>   *
>   * The header name is case insensitive.


More information about the notmuch mailing list