From 5db4a1ee1c096b40585f932406d0c2db07546eb0 Mon Sep 17 00:00:00 2001 From: Haw Loeung Date: Fri, 22 Jun 2018 13:41:28 +1000 Subject: [PATCH 1/5] Backport regex used to match encoded-word strings --- Lib/email/header.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/email/header.py b/Lib/email/header.py index 2cf870fd575f9a..9cf6caa4882db8 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -35,12 +35,11 @@ =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive + (?P[qQbB]) # either a "q" or a "b", case insensitive \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - (?=[ \t]|$) # whitespace or the end of the string - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + ''', re.VERBOSE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. From 383ec0b40fa584bbe86fe5b667948047a07e3a28 Mon Sep 17 00:00:00 2001 From: Haw Loeung Date: Fri, 22 Jun 2018 16:52:19 +1000 Subject: [PATCH 2/5] Backport updated unit test as well --- Lib/email/test/test_email.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 4b4dee3d34644b..7a99208bd777a7 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1649,10 +1649,12 @@ def test_whitespace_eater_unicode_2(self): hu = make_header(dh).__unicode__() eq(hu, u'The quick brown fox jumped over the lazy dog') - def test_rfc2047_without_whitespace(self): + def test_rfc2047_missing_whitespace(self): s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' dh = decode_header(s) - self.assertEqual(dh, [(s, None)]) + self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), + (b'rg', None), (b'\xe5', 'iso-8859-1'), + (b'sbord', None)]) def test_rfc2047_with_whitespace(self): s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' From 9610a9940b00c11b79248e2e4de3e5f092daae32 Mon Sep 17 00:00:00 2001 From: Haw Loeung Date: Sat, 18 May 2019 11:42:35 +1000 Subject: [PATCH 3/5] Fixed backported unit test --- Lib/email/test/test_email_renamed.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index 5a41701271c529..7a439b0e7cd843 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -1586,7 +1586,9 @@ def test_whitespace_eater_unicode_2(self): def test_rfc2047_missing_whitespace(self): s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' dh = decode_header(s) - self.assertEqual(dh, [(s, None)]) + self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'), + (b'rg', None), (b'\xe5', 'iso-8859-1'), + (b'sbord', None)]) def test_rfc2047_with_whitespace(self): s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' From 8f38b36f84f93f8ce1660cdffc37022c54923b50 Mon Sep 17 00:00:00 2001 From: Haw Loeung Date: Wed, 29 May 2019 19:29:39 +1000 Subject: [PATCH 4/5] Don't try change too much, make it clear which bit of the regex is the issue --- Lib/email/header.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/email/header.py b/Lib/email/header.py index 9cf6caa4882db8..f28ee437f621b0 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -35,11 +35,11 @@ =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? - (?P[qQbB]) # either a "q" or a "b", case insensitive + (?P[qb]) # either a "q" or a "b", case insensitive \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - ''', re.VERBOSE | re.MULTILINE) + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. From 0be62101d681c1f45f05ba52a0d820bc5feb4501 Mon Sep 17 00:00:00 2001 From: Haw Loeung Date: Wed, 29 May 2019 19:34:41 +1000 Subject: [PATCH 5/5] Remove noise --- Lib/email/header.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/header.py b/Lib/email/header.py index f28ee437f621b0..f9a769a4d7d2ff 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -35,7 +35,7 @@ =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive + (?P[qb]) # either a "q" or a "b", case insensitive \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?=