[PATCH v2 2/2] new: Centralize file type stat-ing logic
    Austin Clements 
    amdragon at MIT.EDU
       
    Mon May  7 15:20:40 PDT 2012
    
    
  
This moves our logic to get a file's type into one function.  This has
several benefits: we can support OSes and file systems that do not
provide dirent.d_type or always return DT_UNKNOWN, complex
symlink-handling logic has been replaced by a simple stat fall-through
in one place, and the error message for un-stat-able file is more
accurate (previously, the error always mentioned directories, even
though a broken symlink is not a directory).
---
 notmuch-new.c |  103 +++++++++++++++++++++++++++++++++++----------------------
 test/new      |    2 +-
 2 files changed, 64 insertions(+), 41 deletions(-)
diff --git a/notmuch-new.c b/notmuch-new.c
index cb720cc..8955677 100644
--- a/notmuch-new.c
+++ b/notmuch-new.c
@@ -154,6 +154,48 @@ dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)
     return strcmp ((*a)->d_name, (*b)->d_name);
 }
 
+/* Return the type of a directory entry relative to path as a stat(2)
+ * mode.  Like stat, this follows symlinks.  Returns -1 and sets errno
+ * if the file's type cannot be determined (which includes dangling
+ * symlinks).
+ */
+static int
+dirent_type (const char *path, const struct dirent *entry)
+{
+    struct stat statbuf;
+    char *abspath;
+    int err, saved_errno;
+
+#ifdef _DIRENT_HAVE_D_TYPE
+    /* Mapping from d_type to stat mode_t.  We omit DT_LNK so that
+     * we'll fall through to stat and get the real file type. */
+    static const mode_t modes[] = {
+	[DT_BLK]  = S_IFBLK,
+	[DT_CHR]  = S_IFCHR,
+	[DT_DIR]  = S_IFDIR,
+	[DT_FIFO] = S_IFIFO,
+	[DT_REG]  = S_IFREG,
+	[DT_SOCK] = S_IFSOCK
+    };
+    if (entry->d_type < ARRAY_SIZE(modes) && modes[entry->d_type])
+	return modes[entry->d_type];
+#endif
+
+    abspath = talloc_asprintf (NULL, "%s/%s", path, entry->d_name);
+    if (!abspath) {
+	errno = ENOMEM;
+	return -1;
+    }
+    err = stat(abspath, &statbuf);
+    saved_errno = errno;
+    talloc_free (abspath);
+    if (err < 0) {
+	errno = saved_errno;
+	return -1;
+    }
+    return statbuf.st_mode & S_IFMT;
+}
+
 /* Test if the directory looks like a Maildir directory.
  *
  * Search through the array of directory entries to see if we can find all
@@ -162,12 +204,12 @@ dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)
  * Return 1 if the directory looks like a Maildir and 0 otherwise.
  */
 static int
-_entries_resemble_maildir (struct dirent **entries, int count)
+_entries_resemble_maildir (const char *path, struct dirent **entries, int count)
 {
     int i, found = 0;
 
     for (i = 0; i < count; i++) {
-	if (entries[i]->d_type != DT_DIR && entries[i]->d_type != DT_UNKNOWN)
+	if (dirent_type (path, entries[i]) != S_IFDIR)
 	    continue;
 
 	if (strcmp(entries[i]->d_name, "new") == 0 ||
@@ -250,7 +292,7 @@ add_files_recursive (notmuch_database_t *notmuch,
     notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;
     notmuch_message_t *message = NULL;
     struct dirent **fs_entries = NULL;
-    int i, num_fs_entries;
+    int i, num_fs_entries, entry_type;
     notmuch_directory_t *directory;
     notmuch_filenames_t *db_files = NULL;
     notmuch_filenames_t *db_subdirs = NULL;
@@ -317,7 +359,7 @@ add_files_recursive (notmuch_database_t *notmuch,
     }
 
     /* Pass 1: Recurse into all sub-directories. */
-    is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries);
+    is_maildir = _entries_resemble_maildir (path, fs_entries, num_fs_entries);
 
     for (i = 0; i < num_fs_entries; i++) {
 	if (interrupted)
@@ -325,17 +367,16 @@ add_files_recursive (notmuch_database_t *notmuch,
 
 	entry = fs_entries[i];
 
-	/* We only want to descend into directories.
-	 * But symlinks can be to directories too, of course.
-	 *
-	 * And if the filesystem doesn't tell us the file type in the
-	 * scandir results, then it might be a directory (and if not,
-	 * then we'll stat and return immediately in the next level of
-	 * recursion). */
-	if (entry->d_type != DT_DIR &&
-	    entry->d_type != DT_LNK &&
-	    entry->d_type != DT_UNKNOWN)
-	{
+	/* We only want to descend into directories (and symlinks to
+	 * directories). */
+	entry_type = dirent_type (path, entry);
+	if (entry_type == -1) {
+	    /* Be pessimistic, e.g. so we don't lose lots of mail just
+	     * because a user broke a symlink. */
+	    fprintf (stderr, "Error reading file %s/%s: %s\n",
+		     path, entry->d_name, strerror (errno));
+	    return NOTMUCH_STATUS_FILE_ERROR;
+	} else if (entry_type != S_IFDIR) {
 	    continue;
 	}
 
@@ -425,31 +466,13 @@ add_files_recursive (notmuch_database_t *notmuch,
 	    notmuch_filenames_move_to_next (db_subdirs);
 	}
 
-	/* If we're looking at a symlink, we only want to add it if it
-	 * links to a regular file, (and not to a directory, say).
-	 *
-	 * Similarly, if the file is of unknown type (due to filesystem
-	 * limitations), then we also need to look closer.
-	 *
-	 * In either case, a stat does the trick.
-	 */
-	if (entry->d_type == DT_LNK || entry->d_type == DT_UNKNOWN) {
-	    int err;
-
-	    next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);
-	    err = stat (next, &st);
-	    talloc_free (next);
-	    next = NULL;
-
-	    /* Don't emit an error for a link pointing nowhere, since
-	     * the directory-traversal pass will have already done
-	     * that. */
-	    if (err)
-		continue;
-
-	    if (! S_ISREG (st.st_mode))
-		continue;
-	} else if (entry->d_type != DT_REG) {
+	/* Only add regular files (and symlinks to regular files). */
+	entry_type = dirent_type (path, entry);
+	if (entry_type == -1) {
+	    fprintf (stderr, "Error reading file %s/%s: %s\n",
+		     path, entry->d_name, strerror (errno));
+	    return NOTMUCH_STATUS_FILE_ERROR;
+	} else if (entry_type != S_IFREG) {
 	    continue;
 	}
 
diff --git a/test/new b/test/new
index 26253db..e3900f5 100755
--- a/test/new
+++ b/test/new
@@ -140,7 +140,7 @@ test_begin_subtest "Broken symlink aborts"
 ln -s does-not-exist "${MAIL_DIR}/broken"
 output=$(NOTMUCH_NEW 2>&1)
 test_expect_equal "$output" \
-"Error reading directory /run/shm/nm/tmp.new/mail/broken: No such file or directory
+"Error reading file /run/shm/nm/tmp.new/mail/broken: No such file or directory
 Note: A fatal error was encountered: Something went wrong trying to read or write a file
 No new mail."
 rm "${MAIL_DIR}/broken"
-- 
1.7.10
    
    
More information about the notmuch
mailing list