[PATCH v2 8/8] new: Add scan support for mbox:// URIs

Ethan Glasser-Camp ethan.glasser.camp at gmail.com
Sun Jul 1 09:39:50 PDT 2012


This fixes the broken tests introduced by the last commit.

Signed-off-by: Ethan Glasser-Camp <ethan at betacantrips.com>
---
More text was added to clarify how mbox scanning works.

 notmuch-config.c |    4 +
 notmuch-new.c    |  304 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 303 insertions(+), 5 deletions(-)

diff --git a/notmuch-config.c b/notmuch-config.c
index 387f855..e02b6a9 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -56,6 +56,10 @@ static const char new_config_comment[] =
     "\t	Each URL denotes a \"root\" which will be searched for mail files.\n"
     "\t	How this search is performed depends on the scheme of the URL (the\n"
     "\t	part before the first colon).\n"
+    "\n"
+    "\t\tmbox:///path	scans all subdirectories starting at path for mbox\n"
+    "\t\t		files, and scans all mbox files for all messages.\n"
+    "\n"
     "\t	The maildir located at database.path, above, will automatically be added.\n";
 
 static const char user_config_comment[] =
diff --git a/notmuch-new.c b/notmuch-new.c
index 5250562..061a1a8 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -19,6 +19,7 @@
  */
 
 #include "notmuch-client.h"
+#include <uriparser/Uri.h>
 
 #include <unistd.h>
 
@@ -653,14 +654,307 @@ add_files (notmuch_database_t *notmuch,
     return ret;
 }
 
+/* Scan an mbox file for messages.
+ *
+ * We assume that mboxes are append only -- this function does not
+ * check to see if messages have gone missing.
+ *
+ * The mtime of the mbox file is stored in a "directory" document in
+ * Xapian.
+ */
+/* FIXME: a certain amount of this code appears in add_files_recursive,
+ * and could be refactored
+ */
+static notmuch_status_t
+add_messages_mbox_file (notmuch_database_t *notmuch,
+			const char *path,
+			add_files_state_t *state)
+{
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
+    struct stat st;
+    time_t fs_mtime, db_mtime, stat_time;
+    FILE *mbox;
+    char *line, *path_uri = NULL, *message_uri = NULL;
+    int line_len;
+    size_t offset, end_offset, line_size = 0;
+    notmuch_directory_t *directory;
+    int content_length = -1, is_headers;
+
+    if (stat (path, &st)) {
+	fprintf (stderr, "Error reading mbox file %s: %s\n",
+		 path, strerror (errno));
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    stat_time = time (NULL);
+    if (! S_ISREG (st.st_mode)) {
+	fprintf (stderr, "Error: %s is not a file.\n", path);
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    fs_mtime = st.st_mtime;
+
+    path_uri = talloc_asprintf (notmuch, "mbox://%s", path);
+    status = notmuch_database_get_directory (notmuch, path_uri, &directory);
+    if (status) {
+	ret = status;
+	goto DONE;
+    }
+    db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0;
+
+    if (directory && db_mtime == fs_mtime) {
+	goto DONE;
+    }
+
+    mbox = fopen (path, "r");
+    if (mbox == NULL) {
+	fprintf (stderr, "Error: couldn't open %s for reading.\n",
+		 path);
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    line_len = getline (&line, &line_size, mbox);
+
+    if (line_len == -1) {
+	fprintf (stderr, "Error: reading from %s failed: %s\n",
+		 path, strerror (errno));
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    if (strncmp (line, "From ", 5) != 0) {
+	fprintf (stderr, "Note: Ignoring non-mbox file: %s\n",
+		 path);
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    /* Loop invariant: At the beginning of the loop, we have just read
+     * a From_ line, but haven't yet read any of the headers.
+     */
+    while (! feof (mbox)) {
+	is_headers = 1;
+	offset = ftell (mbox);
+	content_length = -1;
+
+	/* Read lines until we either get to the next From_ header, or
+	 * we find a Content-Length header (mboxcl) and we run out of headers.
+	 */
+	do {
+	    /* Get the offset before we read, in case we got another From_ header. */
+	    end_offset = ftell (mbox);
+
+	    line_len = getline (&line, &line_size, mbox);
+
+	    /* Check to see if this line is a content-length header,
+	     * or the end of the headers. */
+	    if (is_headers && strncasecmp (line, "Content-Length: ",
+					   strlen ("Content-Length: ")) == 0)
+		content_length = strtol (line + strlen ("Content-Length: "),
+					 NULL, 10);
+
+	    if (is_headers && strlen (line) == 1 && *line == '\n') {
+		is_headers = 0;
+		/* If we got a content_length, skip the message body. */
+		if (content_length != -1) {
+		    fseek (mbox, content_length, SEEK_CUR);
+		    line_len = getline (&line, &line_size, mbox);
+
+		    /* We should be at the end of the message.  Sanity
+		     * check: there should be a blank line, and then
+		     * another From_ header. */
+		    if (strlen (line) != 1 || *line != '\n') {
+			fprintf (stderr, "Warning: message with Content-Length not "
+				 "immediately followed by blank line (%d)\n", offset);
+		    }
+
+		    end_offset = ftell (mbox);
+		    line_len = getline (&line, &line_size, mbox);
+
+		    if (line_len != -1 && strncmp (line, "From ", 5) != 0) {
+			fprintf (stderr, "Warning: message with Content-Length not "
+				 "immediately followed by another message (%d)\n", offset);
+			fprintf (stderr, "Line was: %s", line);
+		    }
+		}
+	    }
+
+	} while (! feof (mbox) && strncmp (line, "From ", 5) != 0);
+
+	/* end_offset is now after the \n but before the From_. */
+	message_uri = talloc_asprintf (notmuch, "mbox://%s#%d+%d",
+				       path, offset, (end_offset - 1) - offset);
+
+	_report_before_adding_file (state, message_uri);
+
+	status = _add_message (state, notmuch, message_uri);
+	if (status) {
+	    ret = status;
+	    goto DONE;
+	}
+
+	_report_added_file (state);
+
+	talloc_free (message_uri);
+	message_uri = NULL;
+    }
+
+    /* This is the same precaution we take in maildir. */
+    if (fs_mtime != stat_time)
+	_filename_list_add (state->directory_mtimes, path_uri)->mtime = fs_mtime;
+
+DONE:
+    if (line)
+	free (line);
+    if (path_uri)
+	talloc_free (path_uri);
+    if (message_uri)
+	talloc_free (message_uri);
+    if (directory)
+	notmuch_directory_destroy (directory);
+
+    return ret;
+}
+
+/*
+ * Examine path recursively as follows:
+ *
+ * - Recurse on each subdirectory, as in add_files.
+ *
+ * - Call add_messages_mbox_file on every non-directory.
+ */
+/* FIXME: this is almost entirely bits-and-pieces from
+ * add_files_recursive and could do with a refactor */
+static notmuch_status_t
+add_files_mbox (notmuch_database_t *notmuch,
+		const char *path,
+		add_files_state_t *state)
+{
+    struct dirent **fs_entries = NULL;
+    struct dirent *entry = NULL;
+    char *next = NULL;
+    int num_fs_entries = 0, i, entry_type;
+    struct stat st;
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
+
+    if (stat (path, &st)) {
+	fprintf (stderr, "Error reading directory %s: %s\n",
+		 path, strerror (errno));
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    num_fs_entries = scandir (path, &fs_entries, 0,
+			      dirent_sort_inode);
+
+    if (num_fs_entries == -1) {
+	fprintf (stderr, "Error opening directory %s: %s\n",
+		 path, strerror (errno));
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    for (i = 0; i < num_fs_entries; i++) {
+	if (interrupted)
+	    break;
+
+	entry = fs_entries[i];
+
+	entry_type = dirent_type (path, entry);
+	if (entry_type == -1) {
+	    /* Be pessimistic, e.g. so we don't lose lots of mail just
+	     * because a user broke a symlink. */
+	    fprintf (stderr, "Error reading file %s/%s: %s\n",
+		     path, entry->d_name, strerror (errno));
+	    return NOTMUCH_STATUS_FILE_ERROR;
+	} else if (entry_type != S_IFDIR) {
+	    continue;
+	}
+
+	/* Ignore special directories to avoid infinite recursion.
+	 * Also ignore the .notmuch directory, any "tmp" directory
+	 * that appears within a maildir and files/directories
+	 * the user has configured to be ignored.
+	 */
+	if (strcmp (entry->d_name, ".") == 0 ||
+	    strcmp (entry->d_name, "..") == 0 ||
+	    strcmp (entry->d_name, ".notmuch") == 0 ||
+	    _entry_in_ignore_list (entry->d_name, state))
+	{
+	    continue;
+	}
+
+	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	status = add_files_mbox (notmuch, next, state);
+	if (status) {
+	    ret = status;
+	    goto DONE;
+	}
+	talloc_free (next);
+	next = NULL;
+    }
+
+    /* Pass 2: Scan for new files, removed files, and removed directories. */
+    for (i = 0; i < num_fs_entries; i++)
+    {
+	if (interrupted)
+	    break;
+
+        entry = fs_entries[i];
+
+	/* Ignore files & directories user has configured to be ignored */
+	if (_entry_in_ignore_list (entry->d_name, state))
+	    continue;
+
+	/* Only add regular files (and symlinks to regular files). */
+	entry_type = dirent_type (path, entry);
+	if (entry_type == -1) {
+	    fprintf (stderr, "Error reading file %s/%s: %s\n",
+		     path, entry->d_name, strerror (errno));
+	    return NOTMUCH_STATUS_FILE_ERROR;
+	} else if (entry_type != S_IFREG) {
+	    continue;
+	}
+
+	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	status = add_messages_mbox_file (notmuch, next, state);
+	talloc_free (next);
+	next = NULL;
+
+	if (status) {
+	    ret = status;
+	    goto DONE;
+	}
+    }
+
+DONE:
+    if (next)
+	talloc_free (next);
+    return ret;
+}
+
 /* Call out to the appropriate add_files function, based on the URI. */
 static notmuch_status_t
-add_files_uri (unused(notmuch_database_t *notmuch),
-	       unused(const char *uri),
-	       unused(add_files_state_t *state))
+add_files_uri (notmuch_database_t *notmuch,
+	       const char *uri,
+	       add_files_state_t *state)
 {
-    /* Stub for now */
-    return NOTMUCH_STATUS_SUCCESS;
+    UriUriA parsed;
+    UriParserStateA parser;
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+    parser.uri = &parsed;
+    if (uriParseUriA (&parser, uri) != URI_SUCCESS)
+	goto DONE;
+
+    if (strncmp (parsed.scheme.first, "mbox",
+		 parsed.scheme.afterLast - parsed.scheme.first) == 0) {
+	ret = add_files_mbox (notmuch, parsed.pathHead->text.first - 1, state);
+	goto DONE;
+    }
+
+DONE:
+    uriFreeUriMembersA (&parsed);
+    return ret;
 }
 
 static void
-- 
1.7.9.5



More information about the notmuch mailing list