[RFC PATCH 14/14] new: Add scan support for mbox:// URIs

Ethan Glasser-Camp ethan.glasser.camp at gmail.com
Mon Jun 25 13:51:57 PDT 2012


A lot of code is duplicated from maildir, I don't think I handled all
those errors correctly, and I didn't report any progress.

Signed-off-by: Ethan Glasser-Camp <ethan at betacantrips.com>
---
 notmuch-new.c |  299 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 289 insertions(+), 10 deletions(-)

diff --git a/notmuch-new.c b/notmuch-new.c
index 1bf4e25..36fee34 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -19,6 +19,7 @@
  */
 
 #include "notmuch-client.h"
+#include <uriparser/Uri.h>
 
 #include <unistd.h>
 
@@ -239,16 +240,6 @@ _entry_in_ignore_list (const char *entry, add_files_state_t *state)
     return FALSE;
 }
 
-/* Call out to the appropriate add_files function, based on the URI. */
-static notmuch_status_t
-add_files_uri (unused(notmuch_database_t *notmuch),
-	       unused(const char *uri),
-	       unused(add_files_state_t *state))
-{
-    /* Stub for now */
-    return NOTMUCH_STATUS_SUCCESS;
-}
-
 /* Progress-reporting function.
  *
  * Can be used by any mailstore-crawling function that wants to alert
@@ -674,6 +665,294 @@ add_files (notmuch_database_t *notmuch,
     return ret;
 }
 
+/* Scan an mbox file for messages.
+ *
+ * We assume that mboxes grow monotonically only.
+ *
+ * The mtime of the mbox file is stored in a "directory" document in
+ * Xapian.
+ */
+static notmuch_status_t
+add_messages_mbox_file (notmuch_database_t *notmuch,
+			const char *path,
+			add_files_state_t *state)
+{
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
+    struct stat st;
+    time_t fs_mtime, db_mtime, stat_time;
+    FILE *mbox;
+    char *line, *path_uri = NULL, *message_uri = NULL;
+    int line_len;
+    size_t offset, end_offset, line_size = 0;
+    notmuch_directory_t *directory;
+    int content_length = -1, is_headers;
+
+    if (stat (path, &st)) {
+	fprintf (stderr, "Error reading mbox file %s: %s\n",
+		 path, strerror (errno));
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    stat_time = time (NULL);
+    if (! S_ISREG (st.st_mode)) {
+	fprintf (stderr, "Error: %s is not a file.\n", path);
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    fs_mtime = st.st_mtime;
+
+    path_uri = talloc_asprintf (notmuch, "mbox://%s", path);
+    status = notmuch_database_get_directory (notmuch, path_uri, &directory);
+    if (status) {
+	ret = status;
+	goto DONE;
+    }
+    db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0;
+
+    if (directory && db_mtime == fs_mtime) {
+	goto DONE;
+    }
+
+    mbox = fopen (path, "r");
+    if (mbox == NULL) {
+	fprintf (stderr, "Error: couldn't open %s for reading.\n",
+		 path);
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    line_len = getline (&line, &line_size, mbox);
+
+    if (line_len == -1) {
+	fprintf (stderr, "Error: reading from %s failed: %s\n",
+		 path, strerror (errno));
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    if (strncmp (line, "From ", 5) != 0) {
+	fprintf (stderr, "Note: Ignoring non-mbox file: %s\n",
+		 path);
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+    free(line);
+    line = NULL;
+
+    /* Loop invariant: At the beginning of the loop, we have just read
+     * a From_ line, but haven't yet read any of the headers.
+     */
+    while (! feof (mbox)) {
+	is_headers = 1;
+	offset = ftell (mbox);
+	content_length = -1;
+
+	/* Read lines until we either get to the next From_ header, or
+	 * we find a Content-Length header (mboxcl) and we run out of headers.
+	 */
+	do {
+	    /* Get the offset before we read, in case we got another From_ header. */
+	    end_offset = ftell (mbox);
+
+	    line_len = getline (&line, &line_size, mbox);
+
+	    /* Check to see if this line is a content-length header,
+	     * or the end of the headers. */
+	    if (is_headers && strncasecmp (line, "Content-Length: ",
+					   strlen ("Content-Length: ")) == 0)
+		content_length = strtol (line + strlen ("Content-Length: "),
+					 NULL, 10);
+
+	    if (is_headers && strlen (line) == 1 && *line == '\n') {
+		is_headers = 0;
+		/* If we got a content_length, skip the message body. */
+		if (content_length != -1) {
+		    fseek (mbox, content_length, SEEK_CUR);
+		    line_len = getline (&line, &line_size, mbox);
+
+		    /* We should be at the end of the message.  Sanity
+		     * check: there should be a blank line, and then
+		     * another From_ header. */
+		    if (strlen (line) != 1 || *line != '\n') {
+			fprintf (stderr, "Warning: message with Content-Length not "
+				 "immediately followed by blank line (%d)\n", offset);
+		    }
+
+		    end_offset = ftell (mbox);
+		    line_len = getline (&line, &line_size, mbox);
+
+		    if (line_len != -1 && strncmp (line, "From ", 5) != 0) {
+			fprintf (stderr, "Warning: message with Content-Length not "
+				 "immediately followed by another message (%d)\n", offset);
+			fprintf (stderr, "Line was: %s", line);
+		    }
+		}
+	    }
+
+	} while (! feof (mbox) && strncmp (line, "From ", 5) != 0);
+
+	/* end_offset is now after the \n but before the From_. */
+	message_uri = talloc_asprintf (notmuch, "mbox://%s#%d+%d",
+				       path, offset, (end_offset - 1) - offset);
+	status = _add_message (state, notmuch, message_uri);
+	talloc_free (message_uri);
+	message_uri = NULL;
+    }
+
+    if (fs_mtime != stat_time)
+	_filename_list_add (state->directory_mtimes, path_uri)->mtime = fs_mtime;
+
+DONE:
+    if (line)
+	free (line);
+    if (path_uri)
+	talloc_free (path_uri);
+    if (message_uri)
+	talloc_free (message_uri);
+    if (directory)
+	notmuch_directory_destroy (directory);
+
+    return ret;
+}
+
+/*
+ * Examine path recursively as follows:
+ *
+ * - Recurse on each subdirectory, as in add_files.
+ *
+ * - Call add_messages_mbox_file on every non-directory.
+ */
+static notmuch_status_t
+add_files_mbox (notmuch_database_t *notmuch,
+		const char *path,
+		add_files_state_t *state)
+{
+    struct dirent **fs_entries = NULL;
+    struct dirent *entry = NULL;
+    char *next = NULL;
+    int num_fs_entries = 0, i, entry_type;
+    struct stat st;
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
+
+    if (stat (path, &st)) {
+	fprintf (stderr, "Error reading directory %s: %s\n",
+		 path, strerror (errno));
+	return NOTMUCH_STATUS_FILE_ERROR;
+    }
+
+    num_fs_entries = scandir (path, &fs_entries, 0,
+			      dirent_sort_inode);
+
+    if (num_fs_entries == -1) {
+	fprintf (stderr, "Error opening directory %s: %s\n",
+		 path, strerror (errno));
+	ret = NOTMUCH_STATUS_FILE_ERROR;
+	goto DONE;
+    }
+
+    for (i = 0; i < num_fs_entries; i++) {
+	if (interrupted)
+	    break;
+
+	entry = fs_entries[i];
+
+	entry_type = dirent_type (path, entry);
+	if (entry_type == -1) {
+	    /* Be pessimistic, e.g. so we don't lose lots of mail just
+	     * because a user broke a symlink. */
+	    fprintf (stderr, "Error reading file %s/%s: %s\n",
+		     path, entry->d_name, strerror (errno));
+	    return NOTMUCH_STATUS_FILE_ERROR;
+	} else if (entry_type != S_IFDIR) {
+	    continue;
+	}
+
+	/* Ignore special directories to avoid infinite recursion.
+	 * Also ignore the .notmuch directory, any "tmp" directory
+	 * that appears within a maildir and files/directories
+	 * the user has configured to be ignored.
+	 */
+	if (strcmp (entry->d_name, ".") == 0 ||
+	    strcmp (entry->d_name, "..") == 0 ||
+	    strcmp (entry->d_name, ".notmuch") == 0 ||
+	    _entry_in_ignore_list (entry->d_name, state))
+	{
+	    continue;
+	}
+
+	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	status = add_files_mbox (notmuch, next, state);
+	if (status) {
+	    ret = status;
+	    goto DONE;
+	}
+	talloc_free (next);
+	next = NULL;
+    }
+
+    /* Pass 2: Scan for new files, removed files, and removed directories. */
+    for (i = 0; i < num_fs_entries; i++)
+    {
+	if (interrupted)
+	    break;
+
+        entry = fs_entries[i];
+
+	/* Ignore files & directories user has configured to be ignored */
+	if (_entry_in_ignore_list (entry->d_name, state))
+	    continue;
+
+	/* Only add regular files (and symlinks to regular files). */
+	entry_type = dirent_type (path, entry);
+	if (entry_type == -1) {
+	    fprintf (stderr, "Error reading file %s/%s: %s\n",
+		     path, entry->d_name, strerror (errno));
+	    return NOTMUCH_STATUS_FILE_ERROR;
+	} else if (entry_type != S_IFREG) {
+	    continue;
+	}
+
+	next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+	status = add_messages_mbox_file (notmuch, next, state);
+	talloc_free (next);
+	next = NULL;
+
+	if (status) {
+	    ret = status;
+	    goto DONE;
+	}
+    }
+
+DONE:
+    if (next)
+	talloc_free (next);
+    return ret;
+}
+
+/* Call out to the appropriate add_files function, based on the URI. */
+static notmuch_status_t
+add_files_uri (notmuch_database_t *notmuch,
+	       const char *uri,
+	       add_files_state_t *state)
+{
+    UriUriA parsed;
+    UriParserStateA parser;
+    notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+    parser.uri = &parsed;
+    if (uriParseUriA (&parser, uri) != URI_SUCCESS)
+	goto DONE;
+
+    if (strncmp (parsed.scheme.first, "mbox",
+		 parsed.scheme.afterLast - parsed.scheme.first) == 0) {
+	ret = add_files_mbox (notmuch, parsed.pathHead->text.first - 1, state);
+	goto DONE;
+    }
+
+DONE:
+    uriFreeUriMembersA (&parsed);
+    return ret;
+}
+
 static void
 setup_progress_printing_timer (void)
 {
-- 
1.7.9.5



More information about the notmuch mailing list