[notmuch] [PATCH 2/4] Convert mailstore abstraction
Michal Sojka
sojkam1 at fel.cvut.cz
Thu Mar 18 08:39:38 PDT 2010
The code for detection of new files in the mailstore and their
addition to the database is moved from notmuch-new.c to
lib/mailstore-files.c, where it is called by the abstract mailstore
interface.
The code was changed to allow the progress reporting function to be
implemented outside of notmuch library.
Signed-off-by: Michal Sojka <sojkam1 at fel.cvut.cz>
---
lib/mailstore-files.c | 590 +++++++++++++++++++++++++++++++++++++++++++++++
notmuch-new.c | 611 ++-----------------------------------------------
2 files changed, 615 insertions(+), 586 deletions(-)
diff --git a/lib/mailstore-files.c b/lib/mailstore-files.c
index 92d7f5d..ace2664 100644
--- a/lib/mailstore-files.c
+++ b/lib/mailstore-files.c
@@ -20,9 +20,596 @@
* Michal Sojka <sojkam1 at fel.cvut.cz>
*/
+#define _GNU_SOURCE /* For asprintf() */
#include "notmuch.h"
#include "mailstore-private.h"
+#include <dirent.h>
+typedef struct _filename_node {
+ char *filename;
+ struct _filename_node *next;
+} _filename_node_t;
+
+typedef struct _filename_list {
+ _filename_node_t *head;
+ _filename_node_t **tail;
+} _filename_list_t;
+
+typedef struct _indexing_context_priv {
+ _filename_list_t *removed_files;
+ _filename_list_t *removed_directories;
+} _indexing_context_priv_t;
+
+static _filename_list_t *
+_filename_list_create (const void *ctx)
+{
+ _filename_list_t *list;
+
+ list = talloc (ctx, _filename_list_t);
+ if (list == NULL)
+ return NULL;
+
+ list->head = NULL;
+ list->tail = &list->head;
+
+ return list;
+}
+
+static void
+_filename_list_add (_filename_list_t *list,
+ const char *filename)
+{
+ _filename_node_t *node = talloc (list, _filename_node_t);
+
+ node->filename = talloc_strdup (list, filename);
+ node->next = NULL;
+
+ *(list->tail) = node;
+ list->tail = &node->next;
+}
+
+static void
+tag_inbox_and_unread (notmuch_message_t *message)
+{
+ notmuch_message_add_tag (message, "inbox");
+ notmuch_message_add_tag (message, "unread");
+}
+
+static int
+dirent_sort_inode (const struct dirent **a, const struct dirent **b)
+{
+ return ((*a)->d_ino < (*b)->d_ino) ? -1 : 1;
+}
+
+static int
+dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)
+{
+ return strcmp ((*a)->d_name, (*b)->d_name);
+}
+
+/* Test if the directory looks like a Maildir directory.
+ *
+ * Search through the array of directory entries to see if we can find all
+ * three subdirectories typical for Maildir, that is "new", "cur", and "tmp".
+ *
+ * Return 1 if the directory looks like a Maildir and 0 otherwise.
+ */
+static int
+_entries_resemble_maildir (struct dirent **entries, int count)
+{
+ int i, found = 0;
+
+ for (i = 0; i < count; i++) {
+ if (entries[i]->d_type != DT_DIR && entries[i]->d_type != DT_UNKNOWN)
+ continue;
+
+ if (strcmp(entries[i]->d_name, "new") == 0 ||
+ strcmp(entries[i]->d_name, "cur") == 0 ||
+ strcmp(entries[i]->d_name, "tmp") == 0)
+ {
+ found++;
+ if (found == 3)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+
+/* Examine 'path' recursively as follows:
+ *
+ * o Ask the filesystem for the mtime of 'path' (fs_mtime)
+ * o Ask the database for its timestamp of 'path' (db_mtime)
+ *
+ * o Ask the filesystem for files and directories within 'path'
+ * (via scandir and stored in fs_entries)
+ * o Ask the database for files and directories within 'path'
+ * (db_files and db_subdirs)
+ *
+ * o Pass 1: For each directory in fs_entries, recursively call into
+ * this same function.
+ *
+ * o Pass 2: If 'fs_mtime' > 'db_mtime', then walk fs_entries
+ * simultaneously with db_files and db_subdirs. Look for one of
+ * three interesting cases:
+ *
+ * 1. Regular file in fs_entries and not in db_files
+ * This is a new file to add_message into the database.
+ *
+ * 2. Filename in db_files not in fs_entries.
+ * This is a file that has been removed from the mail store.
+ *
+ * 3. Directory in db_subdirs not in fs_entries
+ * This is a directory that has been removed from the mail store.
+ *
+ * Note that the addition of a directory is not interesting here,
+ * since that will have been taken care of in pass 1. Also, we
+ * don't immediately act on file/directory removal since we must
+ * ensure that in the case of a rename that the new filename is
+ * added before the old filename is removed, (so that no
+ * information is lost from the database).
+ *
+ * o Tell the database to update its time of 'path' to 'fs_mtime'
+ */
+static notmuch_status_t
+add_files_recursive (notmuch_mailstore_t *mailstore,
+ const char *path,
+ notmuch_indexing_context_t *state)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ char *next = NULL;
+ time_t fs_mtime, db_mtime;
+ notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
+ notmuch_message_t *message = NULL;
+ struct dirent **fs_entries = NULL;
+ int i, num_fs_entries;
+ notmuch_directory_t *directory;
+ notmuch_filenames_t *db_files = NULL;
+ notmuch_filenames_t *db_subdirs = NULL;
+ struct stat st;
+ notmuch_bool_t is_maildir, new_directory;
+ _indexing_context_priv_t *priv = state->priv;
+ notmuch_database_t *notmuch = mailstore->notmuch;
+
+ if (stat (path, &st)) {
+ fprintf (stderr, "Error reading directory %s: %s\n",
+ path, strerror (errno));
+ return NOTMUCH_STATUS_FILE_ERROR;
+ }
+
+ /* This is not an error since we may have recursed based on a
+ * symlink to a regular file, not a directory, and we don't know
+ * that until this stat. */
+ if (! S_ISDIR (st.st_mode))
+ return NOTMUCH_STATUS_SUCCESS;
+
+ fs_mtime = st.st_mtime;
+
+ directory = notmuch_database_get_directory (notmuch, path);
+ db_mtime = notmuch_directory_get_mtime (directory);
+
+ if (db_mtime == 0) {
+ new_directory = TRUE;
+ db_files = NULL;
+ db_subdirs = NULL;
+ } else {
+ new_directory = FALSE;
+ db_files = notmuch_directory_get_child_files (directory);
+ db_subdirs = notmuch_directory_get_child_directories (directory);
+ }
+
+ /* If the database knows about this directory, then we sort based
+ * on strcmp to match the database sorting. Otherwise, we can do
+ * inode-based sorting for faster filesystem operation. */
+ num_fs_entries = scandir (path, &fs_entries, 0,
+ new_directory ?
+ dirent_sort_inode : dirent_sort_strcmp_name);
+
+ if (num_fs_entries == -1) {
+ fprintf (stderr, "Error opening directory %s: %s\n",
+ path, strerror (errno));
+ ret = NOTMUCH_STATUS_FILE_ERROR;
+ goto DONE;
+ }
+
+ /* Pass 1: Recurse into all sub-directories. */
+ is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries);
+
+ for (i = 0; i < num_fs_entries; i++) {
+ if (state->interrupted)
+ break;
+
+ entry = fs_entries[i];
+
+ /* We only want to descend into directories.
+ * But symlinks can be to directories too, of course.
+ *
+ * And if the filesystem doesn't tell us the file type in the
+ * scandir results, then it might be a directory (and if not,
+ * then we'll stat and return immediately in the next level of
+ * recursion). */
+ if (entry->d_type != DT_DIR &&
+ entry->d_type != DT_LNK &&
+ entry->d_type != DT_UNKNOWN)
+ {
+ continue;
+ }
+
+ /* Ignore special directories to avoid infinite recursion.
+ * Also ignore the .notmuch directory and any "tmp" directory
+ * that appears within a maildir.
+ */
+ /* XXX: Eventually we'll want more sophistication to let the
+ * user specify files to be ignored. */
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0 ||
+ (is_maildir && strcmp (entry->d_name, "tmp") == 0) ||
+ strcmp (entry->d_name, ".notmuch") ==0)
+ {
+ continue;
+ }
+
+ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+ status = add_files_recursive (mailstore, next, state);
+ if (status && ret == NOTMUCH_STATUS_SUCCESS)
+ ret = status;
+ talloc_free (next);
+ next = NULL;
+ }
+
+ /* If this directory hasn't been modified since the last
+ * "notmuch new", then we can skip the second pass entirely. */
+ if (fs_mtime <= db_mtime)
+ goto DONE;
+
+ /* Pass 2: Scan for new files, removed files, and removed directories. */
+ for (i = 0; i < num_fs_entries; i++)
+ {
+ if (state->interrupted)
+ break;
+
+ entry = fs_entries[i];
+
+ /* Check if we've walked past any names in db_files or
+ * db_subdirs. If so, these have been deleted. */
+ while (notmuch_filenames_valid (db_files) &&
+ strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0)
+ {
+ char *absolute = talloc_asprintf (priv->removed_files,
+ "%s/%s", path,
+ notmuch_filenames_get (db_files));
+
+ _filename_list_add (priv->removed_files, absolute);
+
+ notmuch_filenames_move_to_next (db_files);
+ }
+
+ while (notmuch_filenames_valid (db_subdirs) &&
+ strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0)
+ {
+ const char *filename = notmuch_filenames_get (db_subdirs);
+
+ if (strcmp (filename, entry->d_name) < 0)
+ {
+ char *absolute = talloc_asprintf (priv->removed_directories,
+ "%s/%s", path, filename);
+
+ _filename_list_add (priv->removed_directories, absolute);
+ }
+
+ notmuch_filenames_move_to_next (db_subdirs);
+ }
+
+ /* If we're looking at a symlink, we only want to add it if it
+ * links to a regular file, (and not to a directory, say).
+ *
+ * Similarly, if the file is of unknown type (due to filesytem
+ * limitations), then we also need to look closer.
+ *
+ * In either case, a stat does the trick.
+ */
+ if (entry->d_type == DT_LNK || entry->d_type == DT_UNKNOWN) {
+ int err;
+
+ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+ err = stat (next, &st);
+ talloc_free (next);
+ next = NULL;
+
+ /* Don't emit an error for a link pointing nowhere, since
+ * the directory-traversal pass will have already done
+ * that. */
+ if (err)
+ continue;
+
+ if (! S_ISREG (st.st_mode))
+ continue;
+ } else if (entry->d_type != DT_REG) {
+ continue;
+ }
+
+ /* Don't add a file that we've added before. */
+ if (notmuch_filenames_valid (db_files) &&
+ strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0)
+ {
+ notmuch_filenames_move_to_next (db_files);
+ continue;
+ }
+
+ /* We're now looking at a regular file that doesn't yet exist
+ * in the database, so add it. */
+ next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
+
+ state->processed_files++;
+
+ if (state->verbose) {
+ if (state->output_is_a_tty)
+ printf("\r\033[K");
+
+ printf ("%i/%i: %s",
+ state->processed_files,
+ state->total_files,
+ next);
+
+ putchar((state->output_is_a_tty) ? '\r' : '\n');
+ fflush (stdout);
+ }
+
+ status = notmuch_database_add_message (notmuch, next, &message);
+ switch (status) {
+ /* success */
+ case NOTMUCH_STATUS_SUCCESS:
+ state->added_messages++;
+ tag_inbox_and_unread (message);
+ break;
+ /* Non-fatal issues (go on to next file) */
+ case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
+ /* Stay silent on this one. */
+ break;
+ case NOTMUCH_STATUS_FILE_NOT_EMAIL:
+ fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
+ next);
+ break;
+ /* Fatal issues. Don't process anymore. */
+ case NOTMUCH_STATUS_READ_ONLY_DATABASE:
+ case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
+ case NOTMUCH_STATUS_OUT_OF_MEMORY:
+ fprintf (stderr, "Error: %s. Halting processing.\n",
+ notmuch_status_to_string (status));
+ ret = status;
+ goto DONE;
+ default:
+ case NOTMUCH_STATUS_FILE_ERROR:
+ case NOTMUCH_STATUS_NULL_POINTER:
+ case NOTMUCH_STATUS_TAG_TOO_LONG:
+ case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW:
+ case NOTMUCH_STATUS_LAST_STATUS:
+ INTERNAL_ERROR ("add_message returned unexpected value: %d", status);
+ goto DONE;
+ }
+
+ if (message) {
+ notmuch_message_destroy (message);
+ message = NULL;
+ }
+
+ if (state->print_progress &&
+ state->print_progress_cb) {
+ state->print_progress = 0;
+ state->print_progress_cb (state);
+ }
+
+ talloc_free (next);
+ next = NULL;
+ }
+
+ /* FIXME: Handle interrupted - there might be data loss */
+
+ /* Now that we've walked the whole filesystem list, anything left
+ * over in the database lists has been deleted. */
+ while (notmuch_filenames_valid (db_files))
+ {
+ char *absolute = talloc_asprintf (priv->removed_files,
+ "%s/%s", path,
+ notmuch_filenames_get (db_files));
+
+ _filename_list_add (priv->removed_files, absolute);
+
+ notmuch_filenames_move_to_next (db_files);
+ }
+
+ while (notmuch_filenames_valid (db_subdirs))
+ {
+ char *absolute = talloc_asprintf (priv->removed_directories,
+ "%s/%s", path,
+ notmuch_filenames_get (db_subdirs));
+
+ _filename_list_add (priv->removed_directories, absolute);
+
+ notmuch_filenames_move_to_next (db_subdirs);
+ }
+
+ if (! state->interrupted) {
+ status = notmuch_directory_set_mtime (directory, fs_mtime);
+ if (status && ret == NOTMUCH_STATUS_SUCCESS)
+ ret = status;
+ }
+
+DONE:
+ if (next)
+ talloc_free (next);
+ if (entry)
+ free (entry);
+ if (dir)
+ closedir (dir);
+ if (fs_entries)
+ free (fs_entries);
+ if (db_subdirs)
+ notmuch_filenames_destroy (db_subdirs);
+ if (db_files)
+ notmuch_filenames_destroy (db_files);
+ if (directory)
+ notmuch_directory_destroy (directory);
+
+ return ret;
+}
+
+/* XXX: This should be merged with the add_files function since it
+ * shares a lot of logic with it. */
+/* Recursively count all regular files in path and all sub-directories
+ * of path. The result is added to *count (which should be
+ * initialized to zero by the top-level caller before calling
+ * count_files). */
+static void
+count_files (notmuch_mailstore_t *mailstore,
+ const char *path, int *count,
+ volatile sig_atomic_t *interrupted)
+{
+ struct dirent *entry = NULL;
+ char *next;
+ struct stat st;
+ struct dirent **fs_entries = NULL;
+ int num_fs_entries = scandir (path, &fs_entries, 0, dirent_sort_inode);
+ int i = 0;
+
+ (void)mailstore;
+ if (num_fs_entries == -1) {
+ fprintf (stderr, "Warning: failed to open directory %s: %s\n",
+ path, strerror (errno));
+ goto DONE;
+ }
+
+ while (!*interrupted) {
+ if (i == num_fs_entries)
+ break;
+
+ entry = fs_entries[i++];
+
+ /* Ignore special directories to avoid infinite recursion.
+ * Also ignore the .notmuch directory.
+ */
+ /* XXX: Eventually we'll want more sophistication to let the
+ * user specify files to be ignored. */
+ if (strcmp (entry->d_name, ".") == 0 ||
+ strcmp (entry->d_name, "..") == 0 ||
+ strcmp (entry->d_name, ".notmuch") == 0)
+ {
+ continue;
+ }
+
+ if (asprintf (&next, "%s/%s", path, entry->d_name) == -1) {
+ next = NULL;
+ fprintf (stderr, "Error descending from %s to %s: Out of memory\n",
+ path, entry->d_name);
+ continue;
+ }
+
+ stat (next, &st);
+
+ if (S_ISREG (st.st_mode)) {
+ *count = *count + 1;
+ if (*count % 1000 == 0) {
+ printf ("Found %d files so far.\r", *count);
+ fflush (stdout);
+ }
+ } else if (S_ISDIR (st.st_mode)) {
+ count_files (mailstore, next, count, interrupted);
+ }
+
+ free (next);
+ }
+
+DONE:
+ if (entry)
+ free (entry);
+ if (fs_entries)
+ free (fs_entries);
+}
+
+/* Recursively remove all filenames from the database referring to
+ * 'path' (or to any of its children). */
+static void
+_remove_directory (void *ctx,
+ notmuch_database_t *notmuch,
+ const char *path,
+ int *renamed_files,
+ int *removed_files)
+{
+ notmuch_directory_t *directory;
+ notmuch_filenames_t *files, *subdirs;
+ notmuch_status_t status;
+ char *absolute;
+
+ directory = notmuch_database_get_directory (notmuch, path);
+
+ for (files = notmuch_directory_get_child_files (directory);
+ notmuch_filenames_valid (files);
+ notmuch_filenames_move_to_next (files))
+ {
+ absolute = talloc_asprintf (ctx, "%s/%s", path,
+ notmuch_filenames_get (files));
+ status = notmuch_database_remove_message (notmuch, absolute);
+ if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID)
+ *renamed_files = *renamed_files + 1;
+ else
+ *removed_files = *removed_files + 1;
+ talloc_free (absolute);
+ }
+
+ for (subdirs = notmuch_directory_get_child_directories (directory);
+ notmuch_filenames_valid (subdirs);
+ notmuch_filenames_move_to_next (subdirs))
+ {
+ absolute = talloc_asprintf (ctx, "%s/%s", path,
+ notmuch_filenames_get (subdirs));
+ _remove_directory (ctx, notmuch, absolute, renamed_files, removed_files);
+ talloc_free (absolute);
+ }
+
+ notmuch_directory_destroy (directory);
+}
+
+static notmuch_private_status_t
+index_new(notmuch_mailstore_t *mailstore, const char* path,
+ notmuch_indexing_context_t *indexing_ctx)
+{
+ _indexing_context_priv_t *priv;
+ _filename_node_t *f;
+ notmuch_status_t status, ret;
+ notmuch_database_t *notmuch = mailstore->notmuch;
+
+ priv = talloc(NULL, _indexing_context_priv_t);
+ indexing_ctx->priv = priv;
+ if (priv == NULL)
+ return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+ priv->removed_files = _filename_list_create (priv);
+ priv->removed_directories = _filename_list_create (priv);
+
+ ret = add_files_recursive(mailstore, path, indexing_ctx);
+
+ indexing_ctx->removed_files = 0;
+ indexing_ctx->renamed_files = 0;
+ for (f = priv->removed_files->head; f; f = f->next) {
+ status = notmuch_database_remove_message (notmuch, f->filename);
+ if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID)
+ indexing_ctx->renamed_files++;
+ else
+ indexing_ctx->removed_files++;
+ }
+
+ for (f = priv->removed_directories->head; f; f = f->next) {
+ _remove_directory (priv, notmuch, f->filename,
+ &indexing_ctx->renamed_files,
+ &indexing_ctx->removed_files);
+ }
+
+ talloc_free(priv);
+
+ return ret;
+}
static FILE *
open_file(notmuch_mailstore_t *mailstore, const char *filename)
@@ -39,5 +626,8 @@ open_file(notmuch_mailstore_t *mailstore, const char *filename)
/* Original notmuch mail store */
struct _notmuch_mailstore mailstore_files = {
.type = "files",
+ .count_files = count_files,
+ .index_new = index_new,
+ .sync_tags = NULL,
.open_file = open_file,
};
diff --git a/notmuch-new.c b/notmuch-new.c
index 2d0ba6c..ede0859 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -22,35 +22,12 @@
#include <unistd.h>
-typedef struct _filename_node {
- char *filename;
- struct _filename_node *next;
-} _filename_node_t;
-
-typedef struct _filename_list {
- _filename_node_t *head;
- _filename_node_t **tail;
-} _filename_list_t;
-
-typedef struct {
- int output_is_a_tty;
- int verbose;
-
- int total_files;
- int processed_files;
- int added_messages;
- struct timeval tv_start;
-
- _filename_list_t *removed_files;
- _filename_list_t *removed_directories;
-} add_files_state_t;
-
-static volatile sig_atomic_t do_add_files_print_progress = 0;
+notmuch_indexing_context_t *indexing_ctx;
static void
handle_sigalrm (unused (int signal))
{
- do_add_files_print_progress = 1;
+ indexing_ctx->print_progress = 1;
}
static volatile sig_atomic_t interrupted;
@@ -62,46 +39,11 @@ handle_sigint (unused (int sig))
static char msg[] = "Stopping... \n";
ignored = write(2, msg, sizeof(msg)-1);
- interrupted = 1;
-}
-
-static _filename_list_t *
-_filename_list_create (const void *ctx)
-{
- _filename_list_t *list;
-
- list = talloc (ctx, _filename_list_t);
- if (list == NULL)
- return NULL;
-
- list->head = NULL;
- list->tail = &list->head;
-
- return list;
-}
-
-static void
-_filename_list_add (_filename_list_t *list,
- const char *filename)
-{
- _filename_node_t *node = talloc (list, _filename_node_t);
-
- node->filename = talloc_strdup (list, filename);
- node->next = NULL;
-
- *(list->tail) = node;
- list->tail = &node->next;
+ indexing_ctx->interrupted = 1;
}
static void
-tag_inbox_and_unread (notmuch_message_t *message)
-{
- notmuch_message_add_tag (message, "inbox");
- notmuch_message_add_tag (message, "unread");
-}
-
-static void
-add_files_print_progress (add_files_state_t *state)
+add_files_print_progress (notmuch_indexing_context_t *state)
{
struct timeval tv_now;
double elapsed_overall, rate_overall;
@@ -128,388 +70,13 @@ add_files_print_progress (add_files_state_t *state)
fflush (stdout);
}
-static int
-dirent_sort_inode (const struct dirent **a, const struct dirent **b)
-{
- return ((*a)->d_ino < (*b)->d_ino) ? -1 : 1;
-}
-
-static int
-dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)
-{
- return strcmp ((*a)->d_name, (*b)->d_name);
-}
-
-/* Test if the directory looks like a Maildir directory.
- *
- * Search through the array of directory entries to see if we can find all
- * three subdirectories typical for Maildir, that is "new", "cur", and "tmp".
- *
- * Return 1 if the directory looks like a Maildir and 0 otherwise.
- */
-static int
-_entries_resemble_maildir (struct dirent **entries, int count)
-{
- int i, found = 0;
-
- for (i = 0; i < count; i++) {
- if (entries[i]->d_type != DT_DIR && entries[i]->d_type != DT_UNKNOWN)
- continue;
-
- if (strcmp(entries[i]->d_name, "new") == 0 ||
- strcmp(entries[i]->d_name, "cur") == 0 ||
- strcmp(entries[i]->d_name, "tmp") == 0)
- {
- found++;
- if (found == 3)
- return 1;
- }
- }
-
- return 0;
-}
-
-/* Examine 'path' recursively as follows:
- *
- * o Ask the filesystem for the mtime of 'path' (fs_mtime)
- * o Ask the database for its timestamp of 'path' (db_mtime)
- *
- * o Ask the filesystem for files and directories within 'path'
- * (via scandir and stored in fs_entries)
- * o Ask the database for files and directories within 'path'
- * (db_files and db_subdirs)
- *
- * o Pass 1: For each directory in fs_entries, recursively call into
- * this same function.
- *
- * o Pass 2: If 'fs_mtime' > 'db_mtime', then walk fs_entries
- * simultaneously with db_files and db_subdirs. Look for one of
- * three interesting cases:
- *
- * 1. Regular file in fs_entries and not in db_files
- * This is a new file to add_message into the database.
- *
- * 2. Filename in db_files not in fs_entries.
- * This is a file that has been removed from the mail store.
- *
- * 3. Directory in db_subdirs not in fs_entries
- * This is a directory that has been removed from the mail store.
- *
- * Note that the addition of a directory is not interesting here,
- * since that will have been taken care of in pass 1. Also, we
- * don't immediately act on file/directory removal since we must
- * ensure that in the case of a rename that the new filename is
- * added before the old filename is removed, (so that no
- * information is lost from the database).
- *
- * o Tell the database to update its time of 'path' to 'fs_mtime'
- */
-static notmuch_status_t
-add_files_recursive (notmuch_database_t *notmuch,
- const char *path,
- add_files_state_t *state)
-{
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- char *next = NULL;
- time_t fs_mtime, db_mtime;
- notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
- notmuch_message_t *message = NULL;
- struct dirent **fs_entries = NULL;
- int i, num_fs_entries;
- notmuch_directory_t *directory;
- notmuch_filenames_t *db_files = NULL;
- notmuch_filenames_t *db_subdirs = NULL;
- struct stat st;
- notmuch_bool_t is_maildir, new_directory;
-
- if (stat (path, &st)) {
- fprintf (stderr, "Error reading directory %s: %s\n",
- path, strerror (errno));
- return NOTMUCH_STATUS_FILE_ERROR;
- }
-
- /* This is not an error since we may have recursed based on a
- * symlink to a regular file, not a directory, and we don't know
- * that until this stat. */
- if (! S_ISDIR (st.st_mode))
- return NOTMUCH_STATUS_SUCCESS;
-
- fs_mtime = st.st_mtime;
-
- directory = notmuch_database_get_directory (notmuch, path);
- db_mtime = notmuch_directory_get_mtime (directory);
-
- if (db_mtime == 0) {
- new_directory = TRUE;
- db_files = NULL;
- db_subdirs = NULL;
- } else {
- new_directory = FALSE;
- db_files = notmuch_directory_get_child_files (directory);
- db_subdirs = notmuch_directory_get_child_directories (directory);
- }
-
- /* If the database knows about this directory, then we sort based
- * on strcmp to match the database sorting. Otherwise, we can do
- * inode-based sorting for faster filesystem operation. */
- num_fs_entries = scandir (path, &fs_entries, 0,
- new_directory ?
- dirent_sort_inode : dirent_sort_strcmp_name);
-
- if (num_fs_entries == -1) {
- fprintf (stderr, "Error opening directory %s: %s\n",
- path, strerror (errno));
- ret = NOTMUCH_STATUS_FILE_ERROR;
- goto DONE;
- }
-
- /* Pass 1: Recurse into all sub-directories. */
- is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries);
-
- for (i = 0; i < num_fs_entries; i++) {
- if (interrupted)
- break;
-
- entry = fs_entries[i];
-
- /* We only want to descend into directories.
- * But symlinks can be to directories too, of course.
- *
- * And if the filesystem doesn't tell us the file type in the
- * scandir results, then it might be a directory (and if not,
- * then we'll stat and return immediately in the next level of
- * recursion). */
- if (entry->d_type != DT_DIR &&
- entry->d_type != DT_LNK &&
- entry->d_type != DT_UNKNOWN)
- {
- continue;
- }
-
- /* Ignore special directories to avoid infinite recursion.
- * Also ignore the .notmuch directory and any "tmp" directory
- * that appears within a maildir.
- */
- /* XXX: Eventually we'll want more sophistication to let the
- * user specify files to be ignored. */
- if (strcmp (entry->d_name, ".") == 0 ||
- strcmp (entry->d_name, "..") == 0 ||
- (is_maildir && strcmp (entry->d_name, "tmp") == 0) ||
- strcmp (entry->d_name, ".notmuch") ==0)
- {
- continue;
- }
-
- next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
- status = add_files_recursive (notmuch, next, state);
- if (status && ret == NOTMUCH_STATUS_SUCCESS)
- ret = status;
- talloc_free (next);
- next = NULL;
- }
-
- /* If this directory hasn't been modified since the last
- * "notmuch new", then we can skip the second pass entirely. */
- if (fs_mtime <= db_mtime)
- goto DONE;
-
- /* Pass 2: Scan for new files, removed files, and removed directories. */
- for (i = 0; i < num_fs_entries; i++)
- {
- if (interrupted)
- break;
-
- entry = fs_entries[i];
-
- /* Check if we've walked past any names in db_files or
- * db_subdirs. If so, these have been deleted. */
- while (notmuch_filenames_valid (db_files) &&
- strcmp (notmuch_filenames_get (db_files), entry->d_name) < 0)
- {
- char *absolute = talloc_asprintf (state->removed_files,
- "%s/%s", path,
- notmuch_filenames_get (db_files));
-
- _filename_list_add (state->removed_files, absolute);
-
- notmuch_filenames_move_to_next (db_files);
- }
-
- while (notmuch_filenames_valid (db_subdirs) &&
- strcmp (notmuch_filenames_get (db_subdirs), entry->d_name) <= 0)
- {
- const char *filename = notmuch_filenames_get (db_subdirs);
-
- if (strcmp (filename, entry->d_name) < 0)
- {
- char *absolute = talloc_asprintf (state->removed_directories,
- "%s/%s", path, filename);
-
- _filename_list_add (state->removed_directories, absolute);
- }
-
- notmuch_filenames_move_to_next (db_subdirs);
- }
-
- /* If we're looking at a symlink, we only want to add it if it
- * links to a regular file, (and not to a directory, say).
- *
- * Similarly, if the file is of unknown type (due to filesytem
- * limitations), then we also need to look closer.
- *
- * In either case, a stat does the trick.
- */
- if (entry->d_type == DT_LNK || entry->d_type == DT_UNKNOWN) {
- int err;
-
- next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
- err = stat (next, &st);
- talloc_free (next);
- next = NULL;
-
- /* Don't emit an error for a link pointing nowhere, since
- * the directory-traversal pass will have already done
- * that. */
- if (err)
- continue;
-
- if (! S_ISREG (st.st_mode))
- continue;
- } else if (entry->d_type != DT_REG) {
- continue;
- }
-
- /* Don't add a file that we've added before. */
- if (notmuch_filenames_valid (db_files) &&
- strcmp (notmuch_filenames_get (db_files), entry->d_name) == 0)
- {
- notmuch_filenames_move_to_next (db_files);
- continue;
- }
-
- /* We're now looking at a regular file that doesn't yet exist
- * in the database, so add it. */
- next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
-
- state->processed_files++;
-
- if (state->verbose) {
- if (state->output_is_a_tty)
- printf("\r\033[K");
-
- printf ("%i/%i: %s",
- state->processed_files,
- state->total_files,
- next);
-
- putchar((state->output_is_a_tty) ? '\r' : '\n');
- fflush (stdout);
- }
-
- status = notmuch_database_add_message (notmuch, next, &message);
- switch (status) {
- /* success */
- case NOTMUCH_STATUS_SUCCESS:
- state->added_messages++;
- tag_inbox_and_unread (message);
- break;
- /* Non-fatal issues (go on to next file) */
- case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID:
- /* Stay silent on this one. */
- break;
- case NOTMUCH_STATUS_FILE_NOT_EMAIL:
- fprintf (stderr, "Note: Ignoring non-mail file: %s\n",
- next);
- break;
- /* Fatal issues. Don't process anymore. */
- case NOTMUCH_STATUS_READ_ONLY_DATABASE:
- case NOTMUCH_STATUS_XAPIAN_EXCEPTION:
- case NOTMUCH_STATUS_OUT_OF_MEMORY:
- fprintf (stderr, "Error: %s. Halting processing.\n",
- notmuch_status_to_string (status));
- ret = status;
- goto DONE;
- default:
- case NOTMUCH_STATUS_FILE_ERROR:
- case NOTMUCH_STATUS_NULL_POINTER:
- case NOTMUCH_STATUS_TAG_TOO_LONG:
- case NOTMUCH_STATUS_UNBALANCED_FREEZE_THAW:
- case NOTMUCH_STATUS_LAST_STATUS:
- INTERNAL_ERROR ("add_message returned unexpected value: %d", status);
- goto DONE;
- }
-
- if (message) {
- notmuch_message_destroy (message);
- message = NULL;
- }
-
- if (do_add_files_print_progress) {
- do_add_files_print_progress = 0;
- add_files_print_progress (state);
- }
-
- talloc_free (next);
- next = NULL;
- }
-
- /* Now that we've walked the whole filesystem list, anything left
- * over in the database lists has been deleted. */
- while (notmuch_filenames_valid (db_files))
- {
- char *absolute = talloc_asprintf (state->removed_files,
- "%s/%s", path,
- notmuch_filenames_get (db_files));
-
- _filename_list_add (state->removed_files, absolute);
-
- notmuch_filenames_move_to_next (db_files);
- }
-
- while (notmuch_filenames_valid (db_subdirs))
- {
- char *absolute = talloc_asprintf (state->removed_directories,
- "%s/%s", path,
- notmuch_filenames_get (db_subdirs));
-
- _filename_list_add (state->removed_directories, absolute);
-
- notmuch_filenames_move_to_next (db_subdirs);
- }
-
- if (! interrupted) {
- status = notmuch_directory_set_mtime (directory, fs_mtime);
- if (status && ret == NOTMUCH_STATUS_SUCCESS)
- ret = status;
- }
-
- DONE:
- if (next)
- talloc_free (next);
- if (entry)
- free (entry);
- if (dir)
- closedir (dir);
- if (fs_entries)
- free (fs_entries);
- if (db_subdirs)
- notmuch_filenames_destroy (db_subdirs);
- if (db_files)
- notmuch_filenames_destroy (db_files);
- if (directory)
- notmuch_directory_destroy (directory);
-
- return ret;
-}
-
/* This is the top-level entry point for add_files. It does a couple
* of error checks, sets up the progress-printing timer and then calls
* into the recursive function. */
static notmuch_status_t
-add_files (notmuch_database_t *notmuch,
+add_files (notmuch_mailstore_t *mailstore,
const char *path,
- add_files_state_t *state)
+ notmuch_indexing_context_t *state) /* FIXME: rename */
{
notmuch_status_t status;
struct sigaction action;
@@ -517,6 +84,9 @@ add_files (notmuch_database_t *notmuch,
notmuch_bool_t timer_is_active = FALSE;
struct stat st;
+ state->print_progress = 0;
+ state->print_progress_cb = add_files_print_progress;
+
if (state->output_is_a_tty && ! debugger_is_active () && ! state->verbose) {
/* Setup our handler for SIGALRM */
memset (&action, 0, sizeof (struct sigaction));
@@ -546,7 +116,7 @@ add_files (notmuch_database_t *notmuch,
return NOTMUCH_STATUS_FILE_ERROR;
}
- status = add_files_recursive (notmuch, path, state);
+ status = notmuch_mailstore_index_new (mailstore, path, state);
if (timer_is_active) {
/* Now stop the timer. */
@@ -564,80 +134,11 @@ add_files (notmuch_database_t *notmuch,
return status;
}
-/* XXX: This should be merged with the add_files function since it
- * shares a lot of logic with it. */
-/* Recursively count all regular files in path and all sub-directories
- * of path. The result is added to *count (which should be
- * initialized to zero by the top-level caller before calling
- * count_files). */
-static void
-count_files (const char *path, int *count)
-{
- struct dirent *entry = NULL;
- char *next;
- struct stat st;
- struct dirent **fs_entries = NULL;
- int num_fs_entries = scandir (path, &fs_entries, 0, dirent_sort_inode);
- int i = 0;
-
- if (num_fs_entries == -1) {
- fprintf (stderr, "Warning: failed to open directory %s: %s\n",
- path, strerror (errno));
- goto DONE;
- }
-
- while (!interrupted) {
- if (i == num_fs_entries)
- break;
-
- entry = fs_entries[i++];
-
- /* Ignore special directories to avoid infinite recursion.
- * Also ignore the .notmuch directory.
- */
- /* XXX: Eventually we'll want more sophistication to let the
- * user specify files to be ignored. */
- if (strcmp (entry->d_name, ".") == 0 ||
- strcmp (entry->d_name, "..") == 0 ||
- strcmp (entry->d_name, ".notmuch") == 0)
- {
- continue;
- }
-
- if (asprintf (&next, "%s/%s", path, entry->d_name) == -1) {
- next = NULL;
- fprintf (stderr, "Error descending from %s to %s: Out of memory\n",
- path, entry->d_name);
- continue;
- }
-
- stat (next, &st);
-
- if (S_ISREG (st.st_mode)) {
- *count = *count + 1;
- if (*count % 1000 == 0) {
- printf ("Found %d files so far.\r", *count);
- fflush (stdout);
- }
- } else if (S_ISDIR (st.st_mode)) {
- count_files (next, count);
- }
-
- free (next);
- }
-
- DONE:
- if (entry)
- free (entry);
- if (fs_entries)
- free (fs_entries);
-}
-
static void
upgrade_print_progress (void *closure,
double progress)
{
- add_files_state_t *state = closure;
+ notmuch_indexing_context_t *state = closure;
printf ("Upgrading database: %.2f%% complete", progress * 100.0);
@@ -659,55 +160,12 @@ upgrade_print_progress (void *closure,
fflush (stdout);
}
-/* Recursively remove all filenames from the database referring to
- * 'path' (or to any of its children). */
-static void
-_remove_directory (void *ctx,
- notmuch_database_t *notmuch,
- const char *path,
- int *renamed_files,
- int *removed_files)
-{
- notmuch_directory_t *directory;
- notmuch_filenames_t *files, *subdirs;
- notmuch_status_t status;
- char *absolute;
-
- directory = notmuch_database_get_directory (notmuch, path);
-
- for (files = notmuch_directory_get_child_files (directory);
- notmuch_filenames_valid (files);
- notmuch_filenames_move_to_next (files))
- {
- absolute = talloc_asprintf (ctx, "%s/%s", path,
- notmuch_filenames_get (files));
- status = notmuch_database_remove_message (notmuch, absolute);
- if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID)
- *renamed_files = *renamed_files + 1;
- else
- *removed_files = *removed_files + 1;
- talloc_free (absolute);
- }
-
- for (subdirs = notmuch_directory_get_child_directories (directory);
- notmuch_filenames_valid (subdirs);
- notmuch_filenames_move_to_next (subdirs))
- {
- absolute = talloc_asprintf (ctx, "%s/%s", path,
- notmuch_filenames_get (subdirs));
- _remove_directory (ctx, notmuch, absolute, renamed_files, removed_files);
- talloc_free (absolute);
- }
-
- notmuch_directory_destroy (directory);
-}
-
int
notmuch_new_command (void *ctx, int argc, char *argv[])
{
notmuch_config_t *config;
notmuch_database_t *notmuch;
- add_files_state_t add_files_state;
+ notmuch_indexing_context_t add_files_state; /* FIXME: Rename */
double elapsed;
struct timeval tv_now;
int ret = 0;
@@ -715,10 +173,10 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
const char *db_path;
char *dot_notmuch_path;
struct sigaction action;
- _filename_node_t *f;
- int renamed_files, removed_files;
- notmuch_status_t status;
int i;
+ notmuch_mailstore_t *mailstore;
+
+ indexing_ctx = &add_files_state;
add_files_state.verbose = 0;
add_files_state.output_is_a_tty = isatty (fileno (stdout));
@@ -737,6 +195,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
return 1;
db_path = notmuch_config_get_database_path (config);
+ mailstore = notmuch_config_get_mailstore (config);
dot_notmuch_path = talloc_asprintf (ctx, "%s/%s", db_path, ".notmuch");
@@ -744,7 +203,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
int count;
count = 0;
- count_files (db_path, &count);
+ notmuch_mailstore_count_files (mailstore, db_path, &count, &interrupted);
if (interrupted)
return 1;
@@ -777,6 +236,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
/* Setup our handler for SIGINT. We do this after having
* potentially done a database upgrade we this interrupt handler
* won't support. */
+ add_files_state.interrupted = 0;
memset (&action, 0, sizeof (struct sigaction));
action.sa_handler = handle_sigint;
sigemptyset (&action.sa_mask);
@@ -790,28 +250,7 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
add_files_state.added_messages = 0;
gettimeofday (&add_files_state.tv_start, NULL);
- add_files_state.removed_files = _filename_list_create (ctx);
- add_files_state.removed_directories = _filename_list_create (ctx);
-
- ret = add_files (notmuch, db_path, &add_files_state);
-
- removed_files = 0;
- renamed_files = 0;
- for (f = add_files_state.removed_files->head; f; f = f->next) {
- status = notmuch_database_remove_message (notmuch, f->filename);
- if (status == NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID)
- renamed_files++;
- else
- removed_files++;
- }
-
- for (f = add_files_state.removed_directories->head; f; f = f->next) {
- _remove_directory (ctx, notmuch, f->filename,
- &renamed_files, &removed_files);
- }
-
- talloc_free (add_files_state.removed_files);
- talloc_free (add_files_state.removed_directories);
+ ret = add_files (mailstore, db_path, &add_files_state);
gettimeofday (&tv_now, NULL);
elapsed = notmuch_time_elapsed (add_files_state.tv_start,
@@ -839,16 +278,16 @@ notmuch_new_command (void *ctx, int argc, char *argv[])
printf ("No new mail.");
}
- if (removed_files) {
+ if (add_files_state.removed_files) {
printf (" Removed %d %s.",
- removed_files,
- removed_files == 1 ? "message" : "messages");
+ add_files_state.removed_files,
+ add_files_state.removed_files == 1 ? "message" : "messages");
}
- if (renamed_files) {
+ if (add_files_state.renamed_files) {
printf (" Detected %d file %s.",
- renamed_files,
- renamed_files == 1 ? "rename" : "renames");
+ add_files_state.renamed_files,
+ add_files_state.renamed_files == 1 ? "rename" : "renames");
}
printf ("\n");
--
1.7.0
More information about the notmuch
mailing list