diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index ec2215a5e5f33c..876b8d7ad1ce84 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -95,8 +95,16 @@ NLSET = {'\n', '\r'} SPECIALSNL = SPECIALS | NLSET + +def escape_for_quotes(value): + """Escape dquote and backslash for use within a quoted-string.""" + return str(value).replace('\\', '\\\\').replace('"', '\\"') + + def quote_string(value): - return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' + escaped = escape_for_quotes(value) + return f'"{escaped}"' + # Match a RFC 2047 word, looks like =?utf-8?q?someword?= rfc2047_matcher = re.compile(r''' @@ -2905,6 +2913,14 @@ def _refold_parse_tree(parse_tree, *, policy): if not hasattr(part, 'encode'): # It's not a terminal, try folding the subparts. newparts = list(part) + if part.token_type == 'bare-quoted-string': + # Restore the quotes and escape contents. + dquote = ValueTerminal('"', 'ptext') + newparts = ( + [dquote] + + [ValueTerminal(escape_for_quotes(p), 'ptext') + for p in newparts] + + [dquote]) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 5413319a414a62..efd1695711d7c1 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -3082,13 +3082,40 @@ def test_address_list_with_list_separator_after_fold(self): self._test(parser.get_address_list(to)[0], f'{a},\n =?utf-8?q?H=C3=BCbsch?= Kaktus \n') - a = '.' * 79 + a = '.' * 79 # ('.' is a special, so must be in quoted-string.) to = f'"{a}" , "Hübsch Kaktus" ' self._test(parser.get_address_list(to)[0], - f'{a}\n' + f'"{a}"\n' ' , =?utf-8?q?H=C3=BCbsch?= Kaktus ' '\n') + def test_address_list_with_specials_in_long_quoted_string(self): + # Regression for gh-80222. + policy = self.policy.clone(max_line_length=40) + cases = [ + # (to, folded) + ('"Exfiltrator (unclosed comment?" ', + '"Exfiltrator (unclosed\n' + ' comment?" \n'), + ('"Escaped \\" chars \\\\ in quoted-string stay escaped" ', + '"Escaped \\" chars \\\\ in quoted-string\n' + ' stay escaped" \n'), + ('This long display name does not need quotes ', + 'This long display name does not need\n' + ' quotes \n'), + ('"Quotes are not required but are retained here" ', + '"Quotes are not required but are\n' + ' retained here" \n'), + ('"A quoted-string, it can be a valid local-part"@example.com', + '"A quoted-string, it can be a valid\n' + ' local-part"@example.com\n'), + ('"local-part-with-specials@but-no-fws.cannot-fold"@example.com', + '"local-part-with-specials@but-no-fws.cannot-fold"@example.com\n'), + ] + for (to, folded) in cases: + with self.subTest(to=to): + self._test(parser.get_address_list(to)[0], folded, policy=policy) + # XXX Need tests with comments on various sides of a unicode token, # and with unicode tokens in the comments. Spaces inside the quotes # currently don't do the right thing. diff --git a/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst new file mode 100644 index 00000000000000..78dfacf972f2af --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-08-06-11-43-08.gh-issue-80222.wfR4BU.rst @@ -0,0 +1,3 @@ +Fix a problem where email.policy.default header refolding could incorrectly +omit quotes from structured email headers, enabling sender or recipient +spoofing via a carefully crafted display-name.