[PATCH v3 6/6] lib: parse messages only once
Austin Clements
amdragon at MIT.EDU
Mon Feb 3 13:40:03 PST 2014
Quoth Jani Nikula on Feb 03 at 9:51 pm:
> Use the previously parsed gmime message for indexing instead of
> running an extra parsing pass.
>
> After this change, we'll only do unnecessary parsing of the message
> body for duplicates and non-messages. For regular non-duplicate
> messages, we have now shaved off an extra header parsing round during
> indexing.
> ---
> lib/database.cc | 2 +-
> lib/index.cc | 59 ++++++---------------------------------------------
> lib/message-file.c | 9 ++++++++
> lib/notmuch-private.h | 16 ++++++++++++--
> 4 files changed, 30 insertions(+), 56 deletions(-)
>
> diff --git a/lib/database.cc b/lib/database.cc
> index d1bea88..3a29fe7 100644
> --- a/lib/database.cc
> +++ b/lib/database.cc
> @@ -2029,7 +2029,7 @@ notmuch_database_add_message (notmuch_database_t *notmuch,
> date = notmuch_message_file_get_header (message_file, "date");
> _notmuch_message_set_header_values (message, date, from, subject);
>
> - ret = _notmuch_message_index_file (message, filename);
> + ret = _notmuch_message_index_file (message, message_file);
> if (ret)
> goto DONE;
> } else {
> diff --git a/lib/index.cc b/lib/index.cc
> index 976e49f..71397da 100644
> --- a/lib/index.cc
> +++ b/lib/index.cc
> @@ -425,52 +425,15 @@ _index_mime_part (notmuch_message_t *message,
>
> notmuch_status_t
> _notmuch_message_index_file (notmuch_message_t *message,
> - const char *filename)
> + notmuch_message_file_t *message_file)
> {
> - GMimeStream *stream = NULL;
> - GMimeParser *parser = NULL;
> - GMimeMessage *mime_message = NULL;
> + GMimeMessage *mime_message;
> InternetAddressList *addresses;
> - FILE *file = NULL;
> const char *from, *subject;
> - notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
> - static int initialized = 0;
> - char from_buf[5];
> - bool is_mbox = false;
> -
> - if (! initialized) {
> - g_mime_init (GMIME_ENABLE_RFC2047_WORKAROUNDS);
> - initialized = 1;
> - }
> -
> - file = fopen (filename, "r");
> - if (! file) {
> - fprintf (stderr, "Error opening %s: %s\n", filename, strerror (errno));
> - ret = NOTMUCH_STATUS_FILE_ERROR;
> - goto DONE;
> - }
> -
> - /* Is this mbox? */
> - if (fread (from_buf, sizeof (from_buf), 1, file) == 1 &&
> - strncmp (from_buf, "From ", 5) == 0)
> - is_mbox = true;
> - rewind (file);
> -
> - /* Evil GMime steals my FILE* here so I won't fclose it. */
> - stream = g_mime_stream_file_new (file);
> -
> - parser = g_mime_parser_new_with_stream (stream);
> - g_mime_parser_set_scan_from (parser, is_mbox);
>
> - mime_message = g_mime_parser_construct_message (parser);
> -
> - if (is_mbox) {
> - if (!g_mime_parser_eos (parser)) {
> - /* This is a multi-message mbox. */
> - ret = NOTMUCH_STATUS_FILE_NOT_EMAIL;
> - goto DONE;
> - }
> - }
> + mime_message = notmuch_message_file_get_mime_message (message_file);
> + if (! mime_message)
> + return NOTMUCH_STATUS_FILE_NOT_EMAIL; /* more like internal error */
Are there situations other than forgetting to call
notmuch_message_file_parse that could cause this? (Speaking of which,
where is notmuch_message_file_parse called?)
>
> from = g_mime_message_get_sender (mime_message);
>
> @@ -491,15 +454,5 @@ _notmuch_message_index_file (notmuch_message_t *message,
>
> _index_mime_part (message, g_mime_message_get_mime_part (mime_message));
>
> - DONE:
> - if (mime_message)
> - g_object_unref (mime_message);
> -
> - if (parser)
> - g_object_unref (parser);
> -
> - if (stream)
> - g_object_unref (stream);
> -
> - return ret;
> + return NOTMUCH_STATUS_SUCCESS;
> }
> diff --git a/lib/message-file.c b/lib/message-file.c
> index 33f6468..99e1dc8 100644
> --- a/lib/message-file.c
> +++ b/lib/message-file.c
> @@ -250,6 +250,15 @@ mboxes is deprecated and may be removed in the future.\n", message->filename);
> return NOTMUCH_STATUS_SUCCESS;
> }
>
> +GMimeMessage *
> +notmuch_message_file_get_mime_message (notmuch_message_file_t *message)
> +{
> + if (! message->parsed)
> + return NULL;
This seems like another good opportunity to call the parser lazily and
hide notmuch_message_file_parse from the caller, rather than requiring
the caller to implement a particular call sequence (which I wasn't
even able to find above). This might also clean up the error handling
in the call to notmuch_message_file_get_mime_message above.
> +
> + return message->message;
> +}
> +
> /* return NULL on errors, empty string for non-existing headers */
> const char *
> notmuch_message_file_get_header (notmuch_message_file_t *message,
> diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
> index 7277df1..7559521 100644
> --- a/lib/notmuch-private.h
> +++ b/lib/notmuch-private.h
> @@ -46,6 +46,8 @@ NOTMUCH_BEGIN_DECLS
>
> #include <talloc.h>
>
> +#include <gmime/gmime.h>
> +
> #include "xutil.h"
> #include "error_util.h"
>
> @@ -320,9 +322,11 @@ notmuch_message_get_author (notmuch_message_t *message);
>
> /* index.cc */
>
> +typedef struct _notmuch_message_file notmuch_message_file_t;
> +
> notmuch_status_t
> _notmuch_message_index_file (notmuch_message_t *message,
> - const char *filename);
> + notmuch_message_file_t *message_file);
>
> /* message-file.c */
>
> @@ -330,7 +334,6 @@ _notmuch_message_index_file (notmuch_message_t *message,
> * into the public interface in notmuch.h
> */
>
> -typedef struct _notmuch_message_file notmuch_message_file_t;
>
> /* Open a file containing a single email message.
> *
> @@ -377,6 +380,15 @@ void
> notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
> va_list va_headers);
>
> +/* Get the gmime message of a parsed message file.
> + *
> + * Returns NULL if the message file has not been parsed.
> + *
> + * XXX: Would be nice to not have to expose GMimeMessage here.
Maybe just forward-declare struct GMimeMessage? Then you also
wouldn't need to add the gmime #include.
> + */
> +GMimeMessage *
> +notmuch_message_file_get_mime_message (notmuch_message_file_t *message);
> +
> /* Get the value of the specified header from the message as a UTF-8 string.
> *
> * The header name is case insensitive.
More information about the notmuch
mailing list