[PATCH 2/2] Improve heuristic for guessing best from address in replies
Dirk Hohndel
hohndel at infradead.org
Fri Apr 16 13:51:42 PDT 2010
We now look at Envelope-To: and Original-To: headers
Then concat all of the Received headers and walk through them to find
either a "for email at add.res" clause or a host in a known domain.
This should deal with most of the fetchmail and mail hoster induced
pain (and failure) of the old heuristic.
Signed-off-by: Dirk Hohndel <hohndel at infradead.org>
---
notmuch-reply.c | 125 +++++++++++++++++++++++++++++++++++++++++--------------
1 files changed, 94 insertions(+), 31 deletions(-)
diff --git a/notmuch-reply.c b/notmuch-reply.c
index 230cacc..78d3914 100644
--- a/notmuch-reply.c
+++ b/notmuch-reply.c
@@ -305,33 +305,95 @@ add_recipients_from_message (GMimeMessage *reply,
static const char *
guess_from_received_header (notmuch_config_t *config, notmuch_message_t *message)
{
- const char *received,*primary;
- char **other;
- char *by,*mta,*ptr,*token;
+ const char *received,*primary,*by;
+ char **other,*tohdr;
+ char *mta,*ptr,*token;
char *domain=NULL;
char *tld=NULL;
const char *delim=". \t";
size_t i,other_len;
- received = notmuch_message_get_header (message, "received");
- by = strstr (received, " by ");
- if (by && *(by+4)) {
- /* sadly, the format of Received: headers is a bit inconsistent,
- * depending on the MTA used. So we try to extract just the MTA
- * here by removing leading whitespace and assuming that the MTA
- * name ends at the next whitespace
- * we test for *(by+4) to be non-'\0' to make sure there's something
- * there at all - and then assume that the first whitespace delimited
- * token that follows is the last receiving server
+ const char *to_headers[] = {"Envelope-to", "X-Original-To"};
+
+ primary = notmuch_config_get_user_primary_email (config);
+ other = notmuch_config_get_user_other_email (config, &other_len);
+
+ /* sadly, there is no standard way to find out to which email
+ * address a mail was delivered - what is in the headers depends
+ * on the MTAs used along the way. So we are trying a number of
+ * heuristics which hopefully will answer this question.
+
+ * We only got here if none of the users email addresses are in
+ * the To: or Cc: header. From here we try the following in order:
+ * 1) check for an Envelope-to: header
+ * 2) check for an X-Original-To: header
+ * 3) check for a (for <email at add.res>) clause in Received: headers
+ * 4) check for the domain part of known email addresses in the
+ * 'by' part of Received headers
+ * If none of these work, we give up and return NULL
+ */
+ for (i = 0; i < sizeof(to_headers)/sizeof(*to_headers); i++) {
+ tohdr = xstrdup(notmuch_message_get_header (message, to_headers[i]));
+ if (tohdr && *tohdr) {
+ /* tohdr is potentialy a list of email addresses, so here we
+ * check if one of the email addresses is a substring of tohdr
+ */
+ if (strcasestr(tohdr, primary)) {
+ free(tohdr);
+ return primary;
+ }
+ for (i = 0; i < other_len; i++)
+ if (strcasestr (tohdr, other[i])) {
+ free(tohdr);
+ return other[i];
+ }
+ free(tohdr);
+ }
+ }
+
+ /* We get the concatenated Received: headers and search from the
+ * front (last Received: header added) and try to extract from
+ * them indications to which email address this message was
+ * delivered.
+ */
+ received = notmuch_message_get_concat_header (message, "received");
+ /* First we look for a " for <email at add.res>" in the received
+ * header
+ */
+ ptr = strstr (received, " for ");
+ if (ptr) {
+ /* the text following is potentialy a list of email addresses,
+ * so again we check if one of the email addresses is a
+ * substring of ptr
*/
- mta = strdup (by+4);
- if (mta == NULL)
- return NULL;
+ if (strcasestr(ptr, primary)) {
+ return primary;
+ }
+ for (i = 0; i < other_len; i++)
+ if (strcasestr (ptr, other[i])) {
+ return other[i];
+ }
+ }
+ /* Finally, we parse all the " by MTA ..." headers to guess the
+ * email address that this was originally delivered to.
+ * We extract just the MTA here by removing leading whitespace and
+ * assuming that the MTA name ends at the next whitespace.
+ * We test for *(by+4) to be non-'\0' to make sure there's
+ * something there at all - and then assume that the first
+ * whitespace delimited token that follows is the receiving
+ * system in this step of the receive chain
+ */
+ by = received;
+ while((by = strstr (by, " by ")) != NULL) {
+ by += 4;
+ if (*by == '\0')
+ break;
+ mta = xstrdup (by);
token = strtok(mta," \t");
if (token == NULL)
- return NULL;
+ break;
/* Now extract the last two components of the MTA host name
- * as domain and tld
+ * as domain and tld.
*/
while ((ptr = strsep (&token, delim)) != NULL) {
if (*ptr == '\0')
@@ -341,23 +403,24 @@ guess_from_received_header (notmuch_config_t *config, notmuch_message_t *message
}
if (domain) {
- /* recombine domain and tld and look for it among the configured
- * email addresses
+ /* Recombine domain and tld and look for it among the configured
+ * email addresses.
+ * This time we have a known domain name and nothing else - so
+ * the test is the other way around: we check if this is a
+ * substring of one of the email addresses.
*/
*(tld-1) = '.';
- primary = notmuch_config_get_user_primary_email (config);
- if (strcasestr (primary, domain)) {
- free (mta);
- return primary;
+
+ if (strcasestr(primary, domain)) {
+ free(mta);
+ return primary;
+ }
+ for (i = 0; i < other_len; i++)
+ if (strcasestr (other[i],domain)) {
+ free(mta);
+ return other[i];
}
- other = notmuch_config_get_user_other_email (config, &other_len);
- for (i = 0; i < other_len; i++)
- if (strcasestr (other[i], domain)) {
- free (mta);
- return other[i];
- }
}
-
free (mta);
}
--
1.6.6.1
More information about the notmuch
mailing list