[PATCH] Rearchitect the way the Received: header is concatenated

Dirk Hohndel hohndel at infradead.org
Fri Apr 23 22:08:31 PDT 2010


The previous implementation misunderstood the way the message file
was handled. Under certain circumstances this could cause SEGVs as
we were trying to keep reading from the file after it was closed.

Now we treat the Received: header as special and always concatenate
it when parsing the headers.

Signed-off-by: Dirk Hohndel <hohndel at infradead.org>
---
 lib/message-file.c    |   53 ++++++++++++++++-----
 lib/notmuch-private.h |    3 +
 lib/notmuch.h         |   16 ++++++
 notmuch-reply.c       |  125 +++++++++++++++++++++++++++++++++++++------------
 4 files changed, 154 insertions(+), 43 deletions(-)

diff --git a/lib/message-file.c b/lib/message-file.c
index 0c152a3..75b3990 100644
--- a/lib/message-file.c
+++ b/lib/message-file.c
@@ -209,16 +209,23 @@ copy_header_unfolding (header_value_closure_t *value,
 
 /* As a special-case, a value of NULL for header_desired will force
  * the entire header to be parsed if it is not parsed already. This is
- * used by the _notmuch_message_file_get_headers_end function. */
+ * used by the _notmuch_message_file_get_headers_end function.
+ * Another special case is the Received: header. For this header we
+ * want to concatenate all instances of the header instead of just
+ * hashing the first instance as we use this when analyzing the path
+ * the mail has taken from sender to recipient.
+ */
 const char *
 notmuch_message_file_get_header (notmuch_message_file_t *message,
 				 const char *header_desired)
 {
     int contains;
-    char *header, *decoded_value;
+    char *header, *decoded_value, *header_sofar, *combined_header;
     const char *s, *colon;
-    int match;
+    int match, newhdr, hdrsofar, received;
     static int initialized = 0;
+
+    received = (strcmp(header_desired,"received") == 0);
 
     if (! initialized) {
 	g_mime_init (0);
@@ -312,22 +319,34 @@ notmuch_message_file_get_header (notmuch_message_file_t *message,
 
 	NEXT_HEADER_LINE (&message->value);
 
-	if (header_desired == 0)
+	if (header_desired == NULL)
 	    match = 0;
 	else
 	    match = (strcasecmp (header, header_desired) == 0);
 
 	decoded_value = g_mime_utils_header_decode_text (message->value.str);
-	if (g_hash_table_lookup (message->headers, header) == NULL) {
-	    /* Only insert if we don't have a value for this header, yet.
-	     * This way we always return the FIRST instance of any header
-	     * we search for
-	     * FIXME: we should be returning ALL instances of a header
-	     *        or at least provide a way to iterate over them
-	     */
-	    g_hash_table_insert (message->headers, header, decoded_value);
+	header_sofar = (char *)g_hash_table_lookup (message->headers, header);
+	if (strcmp(header,"received") == 0) {
+	    if (header_sofar == NULL) {
+		/* Only insert if we don't have a value for this header, yet. */
+		g_hash_table_insert (message->headers, header, decoded_value);
+	    } else {
+		/* the caller wants them all concatenated */
+		newhdr = strlen(decoded_value);
+		hdrsofar = strlen(header_sofar);
+		combined_header = xmalloc(hdrsofar + newhdr + 2);
+		strncpy(combined_header,header_sofar,hdrsofar);
+		*(combined_header+hdrsofar) = ' ';
+		strncpy(combined_header+hdrsofar+1,decoded_value,newhdr+1);
+		g_hash_table_insert (message->headers, header, combined_header);
+	    }
+	} else {
+	    if (header_sofar == NULL) {
+		/* Only insert if we don't have a value for this header, yet. */
+		g_hash_table_insert (message->headers, header, decoded_value);
+	    }
 	}
-	if (match)
+	if (match && !received)
 	    return decoded_value;
     }
 
@@ -347,6 +366,14 @@ notmuch_message_file_get_header (notmuch_message_file_t *message,
 	message->value.len = 0;
     }
 
+    /* For the Received: header we actually might end up here even
+     * though we found the header (as we force continued parsing
+     * in that case). So let's check if that's the header we were
+     * looking for and return the value that we found (if any)
+     */
+    if (received)
+	return (char *)g_hash_table_lookup (message->headers, "received");
+
     /* We've parsed all headers and never found the one we're looking
      * for. It's probably just not there, but let's check that we
      * didn't make a mistake preventing us from seeing it. */
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 94cce1b..69298e7 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -334,6 +334,9 @@ notmuch_message_file_restrict_headersv (notmuch_message_file_t *message,
  *
  * The header name is case insensitive.
  *
+ * The "received" header is special - for it all received headers in
+ * the message are concatenated
+ *
  * The returned value is owned by the notmuch message and is valid
  * only until the message is closed. The caller should copy it if
  * needing to modify the value or to hold onto it for longer.
diff --git a/lib/notmuch.h b/lib/notmuch.h
index bae48a6..c3fe8d7 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -788,6 +788,22 @@ notmuch_message_get_date  (notmuch_message_t *message);
 const char *
 notmuch_message_get_header (notmuch_message_t *message, const char *header);
 
+/* Get the concatenated value of all instances of the specified header
+ * from 'message'.
+ *
+ * The value will be read from the actual message file, not from the
+ * notmuch database. The header name is case insensitive.
+ *
+ * The returned string belongs to the message so should not be
+ * modified or freed by the caller (nor should it be referenced after
+ * the message is destroyed).
+ *
+ * Returns an empty string ("") if the message does not contain a
+ * header line matching 'header'. Returns NULL if any error occurs.
+ */
+const char *
+notmuch_message_get_concat_header (notmuch_message_t *message, const char *header);
+
 /* Get the tags for 'message', returning a notmuch_tags_t object which
  * can be used to iterate over all tags.
  *
diff --git a/notmuch-reply.c b/notmuch-reply.c
index 230cacc..3a9186e 100644
--- a/notmuch-reply.c
+++ b/notmuch-reply.c
@@ -305,33 +305,97 @@ add_recipients_from_message (GMimeMessage *reply,
 static const char *
 guess_from_received_header (notmuch_config_t *config, notmuch_message_t *message)
 {
-    const char *received,*primary;
-    char **other;
-    char *by,*mta,*ptr,*token;
+    const char *received,*primary,*by;
+    char **other,*tohdr;
+    char *mta,*ptr,*token;
     char *domain=NULL;
     char *tld=NULL;
     const char *delim=". \t";
     size_t i,other_len;
 
+    const char *to_headers[] = {"Envelope-to", "X-Original-To"};
+
+    primary = notmuch_config_get_user_primary_email (config);
+    other = notmuch_config_get_user_other_email (config, &other_len);
+
+    /* sadly, there is no standard way to find out to which email
+     * address a mail was delivered - what is in the headers depends
+     * on the MTAs used along the way. So we are trying a number of
+     * heuristics which hopefully will answer this question.
+
+     * We only got here if none of the users email addresses are in
+     * the To: or Cc: header. From here we try the following in order:
+     * 1) check for an Envelope-to: header
+     * 2) check for an X-Original-To: header
+     * 3) check for a (for <email at add.res>) clause in Received: headers
+     * 4) check for the domain part of known email addresses in the
+     *    'by' part of Received headers
+     * If none of these work, we give up and return NULL
+     */
+    for (i = 0; i < sizeof(to_headers)/sizeof(*to_headers); i++) {
+	tohdr = xstrdup(notmuch_message_get_header (message, to_headers[i]));
+	if (tohdr && *tohdr) {
+	    /* tohdr is potentialy a list of email addresses, so here we
+	     * check if one of the email addresses is a substring of tohdr
+	     */
+	    if (strcasestr(tohdr, primary)) {
+		free(tohdr);
+		return primary;
+	    }
+	    for (i = 0; i < other_len; i++)
+		if (strcasestr (tohdr, other[i])) {
+		    free(tohdr);
+		    return other[i];
+		}
+	    free(tohdr);
+	}
+    }
+
+    /* We get the concatenated Received: headers and search from the
+     * front (last Received: header added) and try to extract from
+     * them indications to which email address this message was
+     * delivered.
+     * The Received: header is special in our get_header function
+     * and is always concated.
+     */
     received = notmuch_message_get_header (message, "received");
-    by = strstr (received, " by ");
-    if (by && *(by+4)) {
-	/* sadly, the format of Received: headers is a bit inconsistent,
-	 * depending on the MTA used. So we try to extract just the MTA
-	 * here by removing leading whitespace and assuming that the MTA
-	 * name ends at the next whitespace
-	 * we test for *(by+4) to be non-'\0' to make sure there's something
-	 * there at all - and then assume that the first whitespace delimited
-	 * token that follows is the last receiving server
+    /* First we look for a " for <email at add.res>" in the received
+     * header
+     */
+    ptr = strstr (received, " for ");
+    if (ptr) {
+	/* the text following is potentialy a list of email addresses,
+	 * so again we check if one of the email addresses is a
+	 * substring of ptr
 	 */
-	mta = strdup (by+4);
-	if (mta == NULL)
-	    return NULL;
+	if (strcasestr(ptr, primary)) {
+	    return primary;
+	}
+	for (i = 0; i < other_len; i++)
+	    if (strcasestr (ptr, other[i])) {
+		return other[i];
+	    }
+    }
+    /* Finally, we parse all the " by MTA ..." headers to guess the
+     * email address that this was originally delivered to.
+     * We extract just the MTA here by removing leading whitespace and
+     * assuming that the MTA name ends at the next whitespace.
+     * We test for *(by+4) to be non-'\0' to make sure there's
+     * something there at all - and then assume that the first
+     * whitespace delimited token that follows is the receiving
+     * system in this step of the receive chain
+     */
+    by = received;
+    while((by = strstr (by, " by ")) != NULL) {
+	by += 4;
+	if (*by == '\0')
+	    break;
+	mta = xstrdup (by);
 	token = strtok(mta," \t");
 	if (token == NULL)
-	    return NULL;
+	    break;
 	/* Now extract the last two components of the MTA host name
-	 * as domain and tld
+	 * as domain and tld.
 	 */
 	while ((ptr = strsep (&token, delim)) != NULL) {
 	    if (*ptr == '\0')
@@ -341,23 +405,24 @@ guess_from_received_header (notmuch_config_t *config, notmuch_message_t *message
 	}
 
 	if (domain) {
-	    /* recombine domain and tld and look for it among the configured
-	     * email addresses
+	    /* Recombine domain and tld and look for it among the configured
+	     * email addresses.
+	     * This time we have a known domain name and nothing else - so
+	     * the test is the other way around: we check if this is a
+	     * substring of one of the email addresses.
 	     */
 	    *(tld-1) = '.';
-	    primary = notmuch_config_get_user_primary_email (config);
-	    if (strcasestr (primary, domain)) {
-		free (mta);
-		return primary;
+
+	    if (strcasestr(primary, domain)) {
+		free(mta);
+	    return primary;
+	}
+	for (i = 0; i < other_len; i++)
+	    if (strcasestr (other[i],domain)) {
+		free(mta);
+		return other[i];
 	    }
-	    other = notmuch_config_get_user_other_email (config, &other_len);
-	    for (i = 0; i < other_len; i++)
-		if (strcasestr (other[i], domain)) {
-		    free (mta);
-		    return other[i];
-		}
 	}
-
 	free (mta);
     }
 
-- 
1.6.6.1


-- 
Dirk Hohndel
Intel Open Source Technology Center


More information about the notmuch mailing list