From 09ec8153c6480fd608ce6f981a050f515f1875c9 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Thu, 5 Oct 2023 07:07:25 -0700 Subject: [PATCH] [3.12] gh-110259: Fix f-strings with multiline expressions and format specs (GH-110271) (#110396) gh-110259: Fix f-strings with multiline expressions and format specs (GH-110271) (cherry picked from commit cc389ef627b2a486ab89d9a11245bef48224efb1) Signed-off-by: Pablo Galindo Co-authored-by: Pablo Galindo Salgado --- Lib/ast.py | 11 ++- Lib/test/test_tokenize.py | 97 +++++++++++++++++++ Lib/test/test_unparse.py | 3 +- ...-10-03-11-43-48.gh-issue-110259.ka93x5.rst | 3 + Parser/tokenizer.c | 24 +++-- 5 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst diff --git a/Lib/ast.py b/Lib/ast.py index 07044706dc3608..de940d2e9c6549 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1268,13 +1268,15 @@ def visit_JoinedStr(self, node): quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") - def _write_fstring_inner(self, node): + def _write_fstring_inner(self, node, scape_newlines=False): if isinstance(node, JoinedStr): # for both the f-string itself, and format_spec for value in node.values: - self._write_fstring_inner(value) + self._write_fstring_inner(value, scape_newlines=scape_newlines) elif isinstance(node, Constant) and isinstance(node.value, str): value = node.value.replace("{", "{{").replace("}", "}}") + if scape_newlines: + value = value.replace("\n", "\\n") self.write(value) elif isinstance(node, FormattedValue): self.visit_FormattedValue(node) @@ -1297,7 +1299,10 @@ def unparse_inner(inner): self.write(f"!{chr(node.conversion)}") if node.format_spec: self.write(":") - self._write_fstring_inner(node.format_spec) + self._write_fstring_inner( + node.format_spec, + scape_newlines=True + ) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 40680f004723c5..b8d069a87843e4 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -571,6 +571,55 @@ def test_string(self): OP '=' (3, 0) (3, 1) OP '}' (3, 1) (3, 2) FSTRING_END "'''" (3, 2) (3, 5) + """) + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + OP '{' (1, 6) (1, 7) + NL '\\n' (1, 7) (1, 8) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + OP '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + OP '{' (1, 4) (1, 5) + NL '\\n' (1, 5) (1, 6) + NAME 'x' (2, 4) (2, 5) + OP ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + NL '\\n' (2, 7) (2, 8) + OP '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) """) def test_function(self): @@ -2274,6 +2323,54 @@ def test_string(self): FSTRING_START \'f"\' (1, 0) (1, 2) FSTRING_MIDDLE 'hola\\\\\\\\\\\\r\\\\ndfgf' (1, 2) (1, 16) FSTRING_END \'"\' (1, 16) (1, 17) + """) + + self.check_tokenize("""\ +f'''__{ + x:a +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n' (2, 6) (3, 0) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'''" (3, 3) (3, 6) + """) + + self.check_tokenize("""\ +f'''__{ + x:a + b + c + d +}__'''""", """\ + FSTRING_START "f'''" (1, 0) (1, 4) + FSTRING_MIDDLE '__' (1, 4) (1, 6) + LBRACE '{' (1, 6) (1, 7) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'a\\n b\\n c\\n d\\n' (2, 6) (6, 0) + RBRACE '}' (6, 0) (6, 1) + FSTRING_MIDDLE '__' (6, 1) (6, 3) + FSTRING_END "'''" (6, 3) (6, 6) + """) + + self.check_tokenize("""\ +f'__{ + x:d +}__'""", """\ + FSTRING_START "f'" (1, 0) (1, 2) + FSTRING_MIDDLE '__' (1, 2) (1, 4) + LBRACE '{' (1, 4) (1, 5) + NAME 'x' (2, 4) (2, 5) + COLON ':' (2, 5) (2, 6) + FSTRING_MIDDLE 'd' (2, 6) (2, 7) + RBRACE '}' (3, 0) (3, 1) + FSTRING_MIDDLE '__' (3, 1) (3, 3) + FSTRING_END "'" (3, 3) (3, 4) """) def test_function(self): diff --git a/Lib/test/test_unparse.py b/Lib/test/test_unparse.py index bdf7b0588bee67..6f698a8d891815 100644 --- a/Lib/test/test_unparse.py +++ b/Lib/test/test_unparse.py @@ -730,7 +730,8 @@ class DirectoryTestCase(ASTTestCase): test_directories = (lib_dir, lib_dir / "test") run_always_files = {"test_grammar.py", "test_syntax.py", "test_compile.py", "test_ast.py", "test_asdl_parser.py", "test_fstring.py", - "test_patma.py", "test_type_alias.py", "test_type_params.py"} + "test_patma.py", "test_type_alias.py", "test_type_params.py", + "test_tokenize.py"} _files_to_test = None diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst new file mode 100644 index 00000000000000..55c743d0e4917e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-03-11-43-48.gh-issue-110259.ka93x5.rst @@ -0,0 +1,3 @@ +Correctly identify the format spec in f-strings (with single or triple +quotes) that have multiple lines in the expression part and include a +formatting spec. Patch by Pablo Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 9911fa55d47c9f..a7786d0c17e9e3 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2768,11 +2768,28 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct if (tok->done == E_ERROR) { return MAKE_TOKEN(ERRORTOKEN); } - if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { + int in_format_spec = ( + current_tok->last_expr_end != -1 + && + INSIDE_FSTRING_EXPR(current_tok) + ); + + if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) { if (tok->decoding_erred) { return MAKE_TOKEN(ERRORTOKEN); } + // If we are in a format spec and we found a newline, + // it means that the format spec ends here and we should + // return to the regular mode. + if (in_format_spec && c == '\n') { + tok_backup(tok, c); + TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + p_start = tok->start; + p_end = tok->cur; + return MAKE_TOKEN(FSTRING_MIDDLE); + } + assert(tok->multi_line_start != NULL); // shift the tok_state's location into // the start of string, and report the error @@ -2804,11 +2821,6 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct end_quote_size = 0; } - int in_format_spec = ( - current_tok->last_expr_end != -1 - && - INSIDE_FSTRING_EXPR(current_tok) - ); if (c == '{') { int peek = tok_nextc(tok); if (peek != '{' || in_format_spec) {