[PATCH 2/2] lib: detect mislabeled Windows-1252 parts
Sebastian Poeplau
sebastian.poeplau at eurecom.fr
Tue Aug 7 05:48:22 PDT 2018
Use GMime functionality to detect mislabeled messages and apply the
correct (Windows) encoding instead.
---
notmuch-show.c | 30 ++++++++++++++++++++++++++++--
test/T300-encoding.sh | 1 -
2 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/notmuch-show.c b/notmuch-show.c
index 1072ea55..c3a3783a 100644
--- a/notmuch-show.c
+++ b/notmuch-show.c
@@ -272,6 +272,7 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out,
GMimeContentType *content_type = g_mime_object_get_content_type (GMIME_OBJECT (part));
GMimeStream *stream_filter = NULL;
GMimeFilter *crlf_filter = NULL;
+ GMimeFilter *windows_filter = NULL;
GMimeDataWrapper *wrapper;
const char *charset;
@@ -282,13 +283,37 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out,
if (stream_out == NULL)
return;
+ charset = g_mime_object_get_content_type_parameter (part, "charset");
+ charset = charset ? g_mime_charset_canon_name (charset) : NULL;
+ wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+ if (wrapper && charset && !g_ascii_strncasecmp (charset, "iso-8859-", 9)) {
+ GMimeStream *null_stream = NULL;
+ GMimeStream *null_stream_filter = NULL;
+
+ /* Check for mislabeled Windows encoding */
+ null_stream = g_mime_stream_null_new ();
+ null_stream_filter = g_mime_stream_filter_new (null_stream);
+ windows_filter = g_mime_filter_windows_new (charset);
+ g_mime_stream_filter_add(GMIME_STREAM_FILTER (null_stream_filter),
+ windows_filter);
+ g_mime_data_wrapper_write_to_stream (wrapper, null_stream_filter);
+ charset = g_mime_filter_windows_real_charset(
+ (GMimeFilterWindows *) windows_filter);
+
+ if (null_stream_filter)
+ g_object_unref (null_stream_filter);
+ if (null_stream)
+ g_object_unref (null_stream);
+ /* Keep a reference to windows_filter in order to prevent the
+ * charset string from deallocation. */
+ }
+
stream_filter = g_mime_stream_filter_new (stream_out);
crlf_filter = g_mime_filter_crlf_new (false, false);
g_mime_stream_filter_add(GMIME_STREAM_FILTER (stream_filter),
crlf_filter);
g_object_unref (crlf_filter);
- charset = g_mime_object_get_content_type_parameter (part, "charset");
if (charset) {
GMimeFilter *charset_filter;
charset_filter = g_mime_filter_charset_new (charset, "UTF-8");
@@ -313,11 +338,12 @@ show_text_part_content (GMimeObject *part, GMimeStream *stream_out,
}
}
- wrapper = g_mime_part_get_content_object (GMIME_PART (part));
if (wrapper && stream_filter)
g_mime_data_wrapper_write_to_stream (wrapper, stream_filter);
if (stream_filter)
g_object_unref(stream_filter);
+ if (windows_filter)
+ g_object_unref (windows_filter);
}
static const char*
diff --git a/test/T300-encoding.sh b/test/T300-encoding.sh
index 4a6bfd2f..1e9d2a3d 100755
--- a/test/T300-encoding.sh
+++ b/test/T300-encoding.sh
@@ -45,7 +45,6 @@ output=$(notmuch search id:${gen_msg_id} 2>&1 | notmuch_show_sanitize)
test_expect_equal "$output" "thread:0000000000000005 2001-01-05 [1/1] Notmuch Test Suite; encodedword withoutspace (inbox unread)"
test_begin_subtest "Mislabeled Windows-1252 encoding"
-test_subtest_known_broken
add_message '[content-type]="text/plain; charset=iso-8859-1"' \
"[body]=$'This text contains \x93Windows-1252\x94 character codes.'"
cat <<EOF > EXPECTED
--
2.18.0
More information about the notmuch
mailing list