From 98177754deb16f2b483c0da2cff728c7f507eb14 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 7 Mar 2023 18:33:47 -0500 Subject: [PATCH] Handle multi-line fixes for byte-string prefixing (#3391) --- .../test/fixtures/pyupgrade/UP012.py | 16 ++- .../rules/unnecessary_encode_utf8.rs | 39 ++++-- ...ff__rules__pyupgrade__tests__UP012.py.snap | 115 +++++++++++++----- 3 files changed, 123 insertions(+), 47 deletions(-) diff --git a/crates/ruff/resources/test/fixtures/pyupgrade/UP012.py b/crates/ruff/resources/test/fixtures/pyupgrade/UP012.py index 50edb51930d10..1795ec914748c 100644 --- a/crates/ruff/resources/test/fixtures/pyupgrade/UP012.py +++ b/crates/ruff/resources/test/fixtures/pyupgrade/UP012.py @@ -12,11 +12,17 @@ """.encode( "utf-8" ) -# b""" -# Lorem -# -# Ipsum -# """ +( + "Lorem " + "Ipsum".encode() +) +( + "Lorem " # Comment + "Ipsum".encode() # Comment +) +( + "Lorem " "Ipsum".encode() +) # `encode` on variables should not be processed. string = "hello there" diff --git a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs index e0a1c4ec4d277..dbceb51bfb400 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs @@ -1,4 +1,5 @@ use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword}; +use rustpython_parser::{lexer, Mode, Tok}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Locator; @@ -59,7 +60,7 @@ fn is_default_encode(args: &[Expr], kwargs: &[Keyword]) -> bool { (1, 0) => is_utf8_encoding_arg(&args[0]), // .encode(kwarg=kwarg) (0, 1) => { - kwargs[0].node.arg == Some("encoding".to_string()) + kwargs[0].node.arg.as_ref().unwrap() == "encoding" && is_utf8_encoding_arg(&kwargs[0].node.value) } // .encode(*args, **kwargs) @@ -67,8 +68,8 @@ fn is_default_encode(args: &[Expr], kwargs: &[Keyword]) -> bool { } } -// Return a Fix for a default `encode` call removing the encoding argument, -// keyword, or positional. +/// Return a [`Fix`] for a default `encode` call removing the encoding argument, +/// keyword, or positional. fn delete_default_encode_arg_or_kwarg( expr: &Expr, args: &[Expr], @@ -92,7 +93,7 @@ fn delete_default_encode_arg_or_kwarg( } } -// Return a Fix replacing the call to encode by a `"b"` prefix on the string. +/// Return a [`Fix`] replacing the call to encode by a `"b"` prefix on the string. fn replace_with_bytes_literal( expr: &Expr, constant: &Expr, @@ -101,16 +102,34 @@ fn replace_with_bytes_literal( ) -> Diagnostic { let mut diagnostic = Diagnostic::new(UnnecessaryEncodeUTF8, Range::from_located(expr)); if patch { - let content = locator.slice(Range::new( + // Build up a replacement string by prefixing all string tokens with `b`. + let contents = locator.slice(Range::new( constant.location, constant.end_location.unwrap(), )); - let content = format!( - "b{}", - content.trim_start_matches('u').trim_start_matches('U') - ); + let mut replacement = String::with_capacity(contents.len() + 1); + let mut prev = None; + for (start, tok, end) in + lexer::lex_located(contents, Mode::Module, constant.location).flatten() + { + if matches!(tok, Tok::String { .. }) { + if let Some(prev) = prev { + replacement.push_str(locator.slice(Range::new(prev, start))); + } + let string = locator.slice(Range::new(start, end)); + replacement.push_str(&format!( + "b{}", + &string.trim_start_matches('u').trim_start_matches('U') + )); + } else { + if let Some(prev) = prev { + replacement.push_str(locator.slice(Range::new(prev, end))); + } + } + prev = Some(end); + } diagnostic.amend(Fix::replacement( - content, + replacement, expr.location, expr.end_location.unwrap(), )); diff --git a/crates/ruff/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP012.py.snap b/crates/ruff/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP012.py.snap index 5a40942f8bb8f..b90f8536b5d31 100644 --- a/crates/ruff/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP012.py.snap +++ b/crates/ruff/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP012.py.snap @@ -124,137 +124,188 @@ expression: diagnostics - kind: UnnecessaryEncodeUTF8: ~ location: - row: 26 + row: 16 + column: 4 + end_location: + row: 17 + column: 20 + fix: + content: "b\"Lorem \"\n b\"Ipsum\"" + location: + row: 16 + column: 4 + end_location: + row: 17 + column: 20 + parent: ~ +- kind: + UnnecessaryEncodeUTF8: ~ + location: + row: 20 + column: 4 + end_location: + row: 21 + column: 20 + fix: + content: "b\"Lorem \" # Comment\n b\"Ipsum\"" + location: + row: 20 + column: 4 + end_location: + row: 21 + column: 20 + parent: ~ +- kind: + UnnecessaryEncodeUTF8: ~ + location: + row: 24 + column: 4 + end_location: + row: 24 + column: 29 + fix: + content: "b\"Lorem \" b\"Ipsum\"" + location: + row: 24 + column: 4 + end_location: + row: 24 + column: 29 + parent: ~ +- kind: + UnnecessaryEncodeUTF8: ~ + location: + row: 32 column: 0 end_location: - row: 26 + row: 32 column: 27 fix: content: "" location: - row: 26 + row: 32 column: 19 end_location: - row: 26 + row: 32 column: 26 parent: ~ - kind: UnnecessaryEncodeUTF8: ~ location: - row: 44 + row: 50 column: 0 end_location: - row: 44 + row: 50 column: 31 fix: content: "" location: - row: 44 + row: 50 column: 23 end_location: - row: 44 + row: 50 column: 30 parent: ~ - kind: UnnecessaryEncodeUTF8: ~ location: - row: 46 + row: 52 column: 0 end_location: - row: 46 + row: 52 column: 39 fix: content: "" location: - row: 46 + row: 52 column: 23 end_location: - row: 46 + row: 52 column: 38 parent: ~ - kind: UnnecessaryEncodeUTF8: ~ location: - row: 48 + row: 54 column: 0 end_location: - row: 48 + row: 54 column: 24 fix: content: "br\"foo\\o\"" location: - row: 48 + row: 54 column: 0 end_location: - row: 48 + row: 54 column: 24 parent: ~ - kind: UnnecessaryEncodeUTF8: ~ location: - row: 49 + row: 55 column: 0 end_location: - row: 49 + row: 55 column: 22 fix: content: "b\"foo\"" location: - row: 49 + row: 55 column: 0 end_location: - row: 49 + row: 55 column: 22 parent: ~ - kind: UnnecessaryEncodeUTF8: ~ location: - row: 50 + row: 56 column: 0 end_location: - row: 50 + row: 56 column: 24 fix: content: "bR\"foo\\o\"" location: - row: 50 + row: 56 column: 0 end_location: - row: 50 + row: 56 column: 24 parent: ~ - kind: UnnecessaryEncodeUTF8: ~ location: - row: 51 + row: 57 column: 0 end_location: - row: 51 + row: 57 column: 22 fix: content: "b\"foo\"" location: - row: 51 + row: 57 column: 0 end_location: - row: 51 + row: 57 column: 22 parent: ~ - kind: UnnecessaryEncodeUTF8: ~ location: - row: 52 + row: 58 column: 6 end_location: - row: 52 + row: 58 column: 20 fix: content: "b\"foo\"" location: - row: 52 + row: 58 column: 6 end_location: - row: 52 + row: 58 column: 20 parent: ~