[PATCH v2 8/8] new: Add scan support for mbox:// URIs
Ethan Glasser-Camp
ethan.glasser.camp at gmail.com
Sun Jul 1 09:39:50 PDT 2012
This fixes the broken tests introduced by the last commit.
Signed-off-by: Ethan Glasser-Camp <ethan at betacantrips.com>
---
More text was added to clarify how mbox scanning works.
notmuch-config.c | 4 +
notmuch-new.c | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 303 insertions(+), 5 deletions(-)
diff --git a/notmuch-config.c b/notmuch-config.c
index 387f855..e02b6a9 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -56,6 +56,10 @@ static const char new_config_comment[] =
"\t Each URL denotes a \"root\" which will be searched for mail files.\n"
"\t How this search is performed depends on the scheme of the URL (the\n"
"\t part before the first colon).\n"
+ "\n"
+ "\t\tmbox:///path scans all subdirectories starting at path for mbox\n"
+ "\t\t files, and scans all mbox files for all messages.\n"
+ "\n"
"\t The maildir located at database.path, above, will automatically be added.\n";
static const char user_config_comment[] =
diff --git a/notmuch-new.c b/notmuch-new.c
index 5250562..061a1a8 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -19,6 +19,7 @@
*/
#include "notmuch-client.h"
+#include <uriparser/Uri.h>
#include <unistd.h>
@@ -653,14 +654,307 @@ add_files (notmuch_database_t *notmuch,
return ret;
}
+/* Scan an mbox file for messages.
+ *
+ * We assume that mboxes are append only -- this function does not
+ * check to see if messages have gone missing.
+ *
+ * The mtime of the mbox file is stored in a "directory" document in
+ * Xapian.
+ */
+/* FIXME: a certain amount of this code appears in add_files_recursive,
+ * and could be refactored
+ */
+static notmuch_status_t
+add_messages_mbox_file (notmuch_database_t *notmuch,
+ const char *path,
+ add_files_state_t *state)
+{
+ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
+ struct stat st;
+ time_t fs_mtime, db_mtime, stat_time;
+ FILE *mbox;
+ char *line, *path_uri = NULL, *message_uri = NULL;
+ int line_len;
+ size_t offset, end_offset, line_size = 0;
+ notmuch_directory_t *directory;
+ int content_length = -1, is_headers;
+
+ if (stat (path, &st)) {
+ fprintf (stderr, "Error reading mbox file %s: %s\n",
+ path, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ }
+
+ stat_time = time (NULL);
+ if (! S_ISREG (st.st_mode)) {
+ fprintf (stderr, "Error: %s is not a file.\n", path);
+ return NOTMUCH_STATUS_FILE_ERROR;
+ }
+
+ fs_mtime = st.st_mtime;
+
+ path_uri = talloc_asprintf (notmuch, "mbox://%s", path);
+ status = notmuch_database_get_directory (notmuch, path_uri, &directory);
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+ db_mtime = directory ? notmuch_directory_get_mtime (directory) : 0;
+
+ if (directory && db_mtime == fs_mtime) {
+ goto DONE;
+ }
+
+ mbox = fopen (path, "r");
+ if (mbox == NULL) {
+ fprintf (stderr, "Error: couldn't open %s for reading.\n",
+ path);
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ line_len = getline (&line, &line_size, mbox);
+
+ if (line_len == -1) {
+ fprintf (stderr, "Error: reading from %s failed: %s\n",
+ path, strerror (errno));
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ if (strncmp (line, "From ", 5) != 0) {
+ fprintf (stderr, "Note: Ignoring non-mbox file: %s\n",
+ path);
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ /* Loop invariant: At the beginning of the loop, we have just read
+ * a From_ line, but haven't yet read any of the headers.
+ */
+ while (! feof (mbox)) {
+ is_headers = 1;
+ offset = ftell (mbox);
+ content_length = -1;
+
+ /* Read lines until we either get to the next From_ header, or
+ * we find a Content-Length header (mboxcl) and we run out of headers.
+ */
+ do {
+ /* Get the offset before we read, in case we got another From_ header. */
+ end_offset = ftell (mbox);
+
+ line_len = getline (&line, &line_size, mbox);
+
+ /* Check to see if this line is a content-length header,
+ * or the end of the headers. */
+ if (is_headers && strncasecmp (line, "Content-Length: ",
+ strlen ("Content-Length: ")) == 0)
+ content_length = strtol (line + strlen ("Content-Length: "),
+ NULL, 10);
+
+ if (is_headers && strlen (line) == 1 && *line == '\n') {
+ is_headers = 0;
+ /* If we got a content_length, skip the message body. */
+ if (content_length != -1) {
+ fseek (mbox, content_length, SEEK_CUR);
+ line_len = getline (&line, &line_size, mbox);
+
+ /* We should be at the end of the message. Sanity
+ * check: there should be a blank line, and then
+ * another From_ header. */
+ if (strlen (line) != 1 || *line != '\n') {
+ fprintf (stderr, "Warning: message with Content-Length not "
+ "immediately followed by blank line (%d)\n", offset);
+ }
+
+ end_offset = ftell (mbox);
+ line_len = getline (&line, &line_size, mbox);
+
+ if (line_len != -1 && strncmp (line, "From ", 5) != 0) {
+ fprintf (stderr, "Warning: message with Content-Length not "
+ "immediately followed by another message (%d)\n", offset);
+ fprintf (stderr, "Line was: %s", line);
+ }
+ }
+ }
+
+ } while (! feof (mbox) && strncmp (line, "From ", 5) != 0);
+
+ /* end_offset is now after the \n but before the From_. */
+ message_uri = talloc_asprintf (notmuch, "mbox://%s#%d+%d",
+ path, offset, (end_offset - 1) - offset);
+
+ _report_before_adding_file (state, message_uri);
+
+ status = _add_message (state, notmuch, message_uri);
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+
+ _report_added_file (state);
+
+ talloc_free (message_uri);
+ message_uri = NULL;
+ }
+
+ /* This is the same precaution we take in maildir. */
+ if (fs_mtime != stat_time)
+ _filename_list_add (state->directory_mtimes, path_uri)->mtime = fs_mtime;
+
+DONE:
+ if (line)
+ free (line);
+ if (path_uri)
+ talloc_free (path_uri);
+ if (message_uri)
+ talloc_free (message_uri);
+ if (directory)
+ notmuch_directory_destroy (directory);
+
+ return ret;
+}
+
+/*
+ * Examine path recursively as follows:
+ *
+ * - Recurse on each subdirectory, as in add_files.
+ *
+ * - Call add_messages_mbox_file on every non-directory.
+ */
+/* FIXME: this is almost entirely bits-and-pieces from
+ * add_files_recursive and could do with a refactor */
+static notmuch_status_t
+add_files_mbox (notmuch_database_t *notmuch,
+ const char *path,
+ add_files_state_t *state)
+{
+ struct dirent **fs_entries = NULL;
+ struct dirent *entry = NULL;
+ char *next = NULL;
+ int num_fs_entries = 0, i, entry_type;
+ struct stat st;
+ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS, status;
+
+ if (stat (path, &st)) {
+ fprintf (stderr, "Error reading directory %s: %s\n",
+ path, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ }
+
+ num_fs_entries = scandir (path, &fs_entries, 0,
+ dirent_sort_inode);
+
+ if (num_fs_entries == -1) {
+ fprintf (stderr, "Error opening directory %s: %s\n",
+ path, strerror (errno));
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ for (i = 0; i < num_fs_entries; i++) {
+ if (interrupted)
+ break;
+
+ entry = fs_entries[i];
+
+ entry_type = dirent_type (path, entry);
+ if (entry_type == -1) {
+ /* Be pessimistic, e.g. so we don't lose lots of mail just
+ * because a user broke a symlink. */
+ fprintf (stderr, "Error reading file %s/%s: %s\n",
+ path, entry->d_name, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ } else if (entry_type != S_IFDIR) {
+ continue;
+ }
+
+ /* Ignore special directories to avoid infinite recursion.
+ * Also ignore the .notmuch directory, any "tmp" directory
+ * that appears within a maildir and files/directories
+ * the user has configured to be ignored.
+ */
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0 ||
+ strcmp (entry->d_name, ".notmuch") == 0 ||
+ _entry_in_ignore_list (entry->d_name, state))
+ {
+ continue;
+ }
+
+ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+ status = add_files_mbox (notmuch, next, state);
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+ talloc_free (next);
+ next = NULL;
+ }
+
+ /* Pass 2: Scan for new files, removed files, and removed directories. */
+ for (i = 0; i < num_fs_entries; i++)
+ {
+ if (interrupted)
+ break;
+
+ entry = fs_entries[i];
+
+ /* Ignore files & directories user has configured to be ignored */
+ if (_entry_in_ignore_list (entry->d_name, state))
+ continue;
+
+ /* Only add regular files (and symlinks to regular files). */
+ entry_type = dirent_type (path, entry);
+ if (entry_type == -1) {
+ fprintf (stderr, "Error reading file %s/%s: %s\n",
+ path, entry->d_name, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ } else if (entry_type != S_IFREG) {
+ continue;
+ }
+
+ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+ status = add_messages_mbox_file (notmuch, next, state);
+ talloc_free (next);
+ next = NULL;
+
+ if (status) {
+ ret = status;
+ goto DONE;
+ }
+ }
+
+DONE:
+ if (next)
+ talloc_free (next);
+ return ret;
+}
+
/* Call out to the appropriate add_files function, based on the URI. */
static notmuch_status_t
-add_files_uri (unused(notmuch_database_t *notmuch),
- unused(const char *uri),
- unused(add_files_state_t *state))
+add_files_uri (notmuch_database_t *notmuch,
+ const char *uri,
+ add_files_state_t *state)
{
- /* Stub for now */
- return NOTMUCH_STATUS_SUCCESS;
+ UriUriA parsed;
+ UriParserStateA parser;
+ notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
+ parser.uri = &parsed;
+ if (uriParseUriA (&parser, uri) != URI_SUCCESS)
+ goto DONE;
+
+ if (strncmp (parsed.scheme.first, "mbox",
+ parsed.scheme.afterLast - parsed.scheme.first) == 0) {
+ ret = add_files_mbox (notmuch, parsed.pathHead->text.first - 1, state);
+ goto DONE;
+ }
+
+DONE:
+ uriFreeUriMembersA (&parsed);
+ return ret;
}
static void
--
1.7.9.5
More information about the notmuch
mailing list