forked from python/cpython
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Stop incorrectly RFC 2047 encoding non-ASCII email addresses
Email generators had been incorrectly flattening non-ASCII email addresses to RFC 2047 encoded-word format, leaving them undeliverable. (RFC 2047 prohibits use of encoded-word in an addr-spec.) This change raises a ValueError when attempting to flatten an EmailMessage with a non-ASCII addr-spec and a policy with utf8=False. (Exception: If the non-ASCII address originated from parsing a message, it will be flattened as originally parsed, without error.) Non-ASCII email addresses are supported when using a policy with utf8=True (such as email.policy.SMTPUTF8) under RFCs 6531 and 6532. Non-ASCII email address domains (but not localparts) can also be used with non-SMTPUTF8 policies by encoding the domain as an IDNA A-label. (The email package does not perform this encoding, because it cannot know whether the caller wants IDNA 2003, IDNA 2008, or some other variant such as UTS python#46.)
- Loading branch information
Showing
3 changed files
with
75 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import io | ||
import re | ||
import textwrap | ||
import unittest | ||
from email import message_from_string, message_from_bytes | ||
|
@@ -288,6 +289,28 @@ def test_keep_long_encoded_newlines(self): | |
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), self.typ(expected)) | ||
|
||
def test_non_ascii_addr_spec_raises(self): | ||
# RFC2047 encoded-word is not permitted in any part of an addr-spec. | ||
# (See also test_non_ascii_addr_spec_preserved below.) | ||
g = self.genclass(self.ioclass(), policy=self.policy.clone(utf8=False)) | ||
cases = [ | ||
'wő[email protected]', | ||
'wok@exàmple.com', | ||
'wők@exàmple.com', | ||
'"Name, for display" <wő[email protected]>', | ||
'Näyttönimi <wő[email protected]>', | ||
] | ||
for address in cases: | ||
with self.subTest(address=address): | ||
msg = EmailMessage() | ||
msg['To'] = address | ||
expected_error = re.escape( | ||
"Non-ASCII address requires policy with utf8=True:" | ||
" '{}'".format(msg['To'].addresses[0].addr_spec) | ||
) | ||
with self.assertRaisesRegex(ValueError, expected_error): | ||
g.flatten(msg) | ||
|
||
|
||
class TestGenerator(TestGeneratorBase, TestEmailBase): | ||
|
||
|
@@ -432,12 +455,12 @@ def test_cte_type_7bit_transforms_8bit_cte(self): | |
|
||
def test_smtputf8_policy(self): | ||
msg = EmailMessage() | ||
msg['From'] = "Páolo <főo@bar.com>" | ||
msg['From'] = "Páolo <főo@bàr.com>" | ||
msg['To'] = 'Dinsdale' | ||
msg['Subject'] = 'Nudge nudge, wink, wink \u1F609' | ||
msg.set_content("oh là là, know what I mean, know what I mean?") | ||
expected = textwrap.dedent("""\ | ||
From: Páolo <főo@bar.com> | ||
From: Páolo <főo@bàr.com> | ||
To: Dinsdale | ||
Subject: Nudge nudge, wink, wink \u1F609 | ||
Content-Type: text/plain; charset="utf-8" | ||
|
@@ -472,6 +495,37 @@ def test_smtp_policy(self): | |
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), expected) | ||
|
||
def test_non_ascii_addr_spec_preserved(self): | ||
# A defective non-ASCII addr-spec parsed from the original | ||
# message is left unchanged when flattening. | ||
# (See also test_non_ascii_addr_spec_raises above.) | ||
source = ( | ||
'To: jö[email protected], "But a long name still works with refold_source" <jö[email protected]>' | ||
).encode() | ||
expected = ( | ||
b'To: j\xc3\xb6[email protected],\n' | ||
b' "But a long name still works with refold_source" <j\xc3\xb6[email protected]>\n' | ||
b'\n' | ||
) | ||
msg = message_from_bytes(source, policy=policy.default) | ||
s = io.BytesIO() | ||
g = BytesGenerator(s, policy=policy.default) | ||
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), expected) | ||
|
||
def test_idna_encoding_preserved(self): | ||
# Nothing tries to decode a pre-encoded IDNA domain. | ||
msg = EmailMessage() | ||
msg["To"] = Address( | ||
username='jörg', | ||
domain='☕.example'.encode('idna').decode() # IDNA 2003 | ||
) | ||
expected = 'To: jö[email protected]\n\n'.encode() | ||
s = io.BytesIO() | ||
g = BytesGenerator(s, policy=policy.default.clone(utf8=True)) | ||
g.flatten(msg) | ||
self.assertEqual(s.getvalue(), expected) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |