<p><br>
On May 7, 2012 9:10 PM, "Austin Clements" <<a href="mailto:amdragon@mit.edu">amdragon@mit.edu</a>> wrote:<br>
><br>
> This moves our logic to get a file's type into one function. This has<br>
> several benefits: we can support OSes and file systems that do not<br>
> provide dirent.d_type or always return DT_UNKNOWN, complex<br>
> symlink-handling logic has been replaced by a simple stat fall-through<br>
> in one place, and the error message for un-stat-able file is more<br>
> accurate (previously, the error always mentioned directories, even<br>
> though a broken symlink is not a directory).</p>
<p>Please find some quick drive-by-review below.</p>
<p>J.</p>
<p>> ---<br>
> notmuch-new.c | 99 ++++++++++++++++++++++++++++++++++-----------------------<br>
> test/new | 2 +-<br>
> 2 files changed, 60 insertions(+), 41 deletions(-)<br>
><br>
> diff --git a/notmuch-new.c b/notmuch-new.c<br>
> index cb720cc..cf2580e 100644<br>
> --- a/notmuch-new.c<br>
> +++ b/notmuch-new.c<br>
> @@ -154,6 +154,44 @@ dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)<br>
> return strcmp ((*a)->d_name, (*b)->d_name);<br>
> }<br>
><br>
> +/* Return the type of a directory entry relative to path as a stat(2)<br>
> + * mode. Like stat, this follows symlinks. Returns -1 and sets errno<br>
> + * if the file's type cannot be determined (which includes dangling<br>
> + * symlinks).<br>
> + */<br>
> +static int<br>
> +dirent_type (const char *path, const struct dirent *entry)<br>
> +{<br>
> + struct stat statbuf;<br>
> + char *abspath;<br>
> + int err;<br>
> +<br>
> +#ifdef _DIRENT_HAVE_D_TYPE<br>
> + /* Mapping from d_type to stat mode_t. We omit DT_LNK so that<br>
> + * we'll fall through to stat and get the real file type. */<br>
> + static const mode_t modes[] = {<br>
> + [DT_BLK] = S_IFBLK,<br>
> + [DT_CHR] = S_IFCHR,<br>
> + [DT_DIR] = S_IFDIR,<br>
> + [DT_FIFO] = S_IFIFO,<br>
> + [DT_REG] = S_IFREG,<br>
> + [DT_SOCK] = S_IFSOCK<br>
> + };<br>
> + if (entry->d_type < sizeof(modes)/sizeof(modes[0]) &&</p>
<p>ARRAY_SIZE()</p>
<p>> + modes[entry->d_type])<br>
> + return modes[entry->d_type];<br>
> +#endif<br>
> +<br>
> + abspath = talloc_asprintf (NULL, "%s/%s", path, entry->d_name);<br>
> + if (!abspath)<br>
> + return -1;</p>
<p>Does talloc set errno in this case? I suspect not.</p>
<p>> + err = stat(abspath, &statbuf);<br>
> + talloc_free (abspath);</p>
<p>This likely breaks your promise about errno. You can't trust talloc_free() not calling some function that sets errno.</p>
<p>> + if (err < 0)<br>
> + return -1;<br>
> + return statbuf.st_mode & S_IFMT;<br>
> +}<br>
> +<br>
> /* Test if the directory looks like a Maildir directory.<br>
> *<br>
> * Search through the array of directory entries to see if we can find all<br>
> @@ -162,12 +200,12 @@ dirent_sort_strcmp_name (const struct dirent **a, const struct dirent **b)<br>
> * Return 1 if the directory looks like a Maildir and 0 otherwise.<br>
> */<br>
> static int<br>
> -_entries_resemble_maildir (struct dirent **entries, int count)<br>
> +_entries_resemble_maildir (const char *path, struct dirent **entries, int count)<br>
> {<br>
> int i, found = 0;<br>
><br>
> for (i = 0; i < count; i++) {<br>
> - if (entries[i]->d_type != DT_DIR && entries[i]->d_type != DT_UNKNOWN)<br>
> + if (dirent_type (path, entries[i]) != S_IFDIR)<br>
> continue;<br>
><br>
> if (strcmp(entries[i]->d_name, "new") == 0 ||<br>
> @@ -250,7 +288,7 @@ add_files_recursive (notmuch_database_t *notmuch,<br>
> notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS;<br>
> notmuch_message_t *message = NULL;<br>
> struct dirent **fs_entries = NULL;<br>
> - int i, num_fs_entries;<br>
> + int i, num_fs_entries, entry_type;<br>
> notmuch_directory_t *directory;<br>
> notmuch_filenames_t *db_files = NULL;<br>
> notmuch_filenames_t *db_subdirs = NULL;<br>
> @@ -317,7 +355,7 @@ add_files_recursive (notmuch_database_t *notmuch,<br>
> }<br>
><br>
> /* Pass 1: Recurse into all sub-directories. */<br>
> - is_maildir = _entries_resemble_maildir (fs_entries, num_fs_entries);<br>
> + is_maildir = _entries_resemble_maildir (path, fs_entries, num_fs_entries);<br>
><br>
> for (i = 0; i < num_fs_entries; i++) {<br>
> if (interrupted)<br>
> @@ -325,17 +363,16 @@ add_files_recursive (notmuch_database_t *notmuch,<br>
><br>
> entry = fs_entries[i];<br>
><br>
> - /* We only want to descend into directories.<br>
> - * But symlinks can be to directories too, of course.<br>
> - *<br>
> - * And if the filesystem doesn't tell us the file type in the<br>
> - * scandir results, then it might be a directory (and if not,<br>
> - * then we'll stat and return immediately in the next level of<br>
> - * recursion). */<br>
> - if (entry->d_type != DT_DIR &&<br>
> - entry->d_type != DT_LNK &&<br>
> - entry->d_type != DT_UNKNOWN)<br>
> - {<br>
> + /* We only want to descend into directories (and symlinks to<br>
> + * directories). */<br>
> + entry_type = dirent_type (path, entry);<br>
> + if (entry_type == -1) {<br>
> + /* Be pessimistic, e.g. so we don't loose lots of mail</p>
<p>s/loose/lose/ ?</p>
<p>> + * just because a user broke a symlink. */<br>
> + fprintf (stderr, "Error reading file %s/%s: %s\n",<br>
> + path, entry->d_name, strerror (errno));</p>
<p>You can't trust errno here, as explained above.</p>
<p>> + return NOTMUCH_STATUS_FILE_ERROR;<br>
> + } else if (entry_type != S_IFDIR) {<br>
> continue;<br>
> }<br>
><br>
> @@ -425,31 +462,13 @@ add_files_recursive (notmuch_database_t *notmuch,<br>
> notmuch_filenames_move_to_next (db_subdirs);<br>
> }<br>
><br>
> - /* If we're looking at a symlink, we only want to add it if it<br>
> - * links to a regular file, (and not to a directory, say).<br>
> - *<br>
> - * Similarly, if the file is of unknown type (due to filesystem<br>
> - * limitations), then we also need to look closer.<br>
> - *<br>
> - * In either case, a stat does the trick.<br>
> - */<br>
> - if (entry->d_type == DT_LNK || entry->d_type == DT_UNKNOWN) {<br>
> - int err;<br>
> -<br>
> - next = talloc_asprintf (notmuch, "%s/%s", path, entry->d_name);<br>
> - err = stat (next, &st);<br>
> - talloc_free (next);<br>
> - next = NULL;<br>
> -<br>
> - /* Don't emit an error for a link pointing nowhere, since<br>
> - * the directory-traversal pass will have already done<br>
> - * that. */<br>
> - if (err)<br>
> - continue;<br>
> -<br>
> - if (! S_ISREG (st.st_mode))<br>
> - continue;<br>
> - } else if (entry->d_type != DT_REG) {<br>
> + /* Only add regular files (and symlinks to regular files). */<br>
> + entry_type = dirent_type (path, entry);<br>
> + if (entry_type == -1) {<br>
> + fprintf (stderr, "Error reading file %s/%s: %s\n",<br>
> + path, entry->d_name, strerror (errno));</p>
<p>Ditto.</p>
<p>> + return NOTMUCH_STATUS_FILE_ERROR;<br>
> + } else if (entry_type != S_IFREG) {<br>
> continue;<br>
> }<br>
><br>
> diff --git a/test/new b/test/new<br>
> index 26253db..e3900f5 100755<br>
> --- a/test/new<br>
> +++ b/test/new<br>
> @@ -140,7 +140,7 @@ test_begin_subtest "Broken symlink aborts"<br>
> ln -s does-not-exist "${MAIL_DIR}/broken"<br>
> output=$(NOTMUCH_NEW 2>&1)<br>
> test_expect_equal "$output" \<br>
> -"Error reading directory /run/shm/nm/tmp.new/mail/broken: No such file or directory<br>
> +"Error reading file /run/shm/nm/tmp.new/mail/broken: No such file or directory<br>
> Note: A fatal error was encountered: Something went wrong trying to read or write a file<br>
> No new mail."<br>
> rm "${MAIL_DIR}/broken"<br>
> --<br>
> 1.7.10<br>
><br>
> _______________________________________________<br>
> notmuch mailing list<br>
> <a href="mailto:notmuch@notmuchmail.org">notmuch@notmuchmail.org</a><br>
> <a href="http://notmuchmail.org/mailman/listinfo/notmuch">http://notmuchmail.org/mailman/listinfo/notmuch</a><br>
</p>