Handle multi-line fixes for byte-string prefixing (#3391)

astral-sh · Mar 7, 2023 · 9817775 · 9817775
1 parent 8d53747
commit 9817775
Show file tree

Hide file tree

Showing 3 changed files with 123 additions and 47 deletions.
diff --git a/crates/ruff/resources/test/fixtures/pyupgrade/UP012.py b/crates/ruff/resources/test/fixtures/pyupgrade/UP012.py
@@ -12,11 +12,17 @@
 """.encode(
     "utf-8"
 )
-# b"""
-# Lorem
-#
-# Ipsum
-# """
+(
+    "Lorem "
+    "Ipsum".encode()
+)
+(
+    "Lorem "  # Comment
+    "Ipsum".encode()  # Comment
+)
+(
+    "Lorem " "Ipsum".encode()
+)
 
 # `encode` on variables should not be processed.
 string = "hello there"

diff --git a/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs b/crates/ruff/src/rules/pyupgrade/rules/unnecessary_encode_utf8.rs
@@ -1,4 +1,5 @@
 use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword};
+use rustpython_parser::{lexer, Mode, Tok};
 
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::source_code::Locator;
@@ -59,16 +60,16 @@ fn is_default_encode(args: &[Expr], kwargs: &[Keyword]) -> bool {
         (1, 0) => is_utf8_encoding_arg(&args[0]),
         // .encode(kwarg=kwarg)
         (0, 1) => {
-            kwargs[0].node.arg == Some("encoding".to_string())
+            kwargs[0].node.arg.as_ref().unwrap() == "encoding"
                 && is_utf8_encoding_arg(&kwargs[0].node.value)
         }
         // .encode(*args, **kwargs)
         _ => false,
     }
 }
 
-// Return a Fix for a default `encode` call removing the encoding argument,
-// keyword, or positional.
+/// Return a [`Fix`] for a default `encode` call removing the encoding argument,
+/// keyword, or positional.
 fn delete_default_encode_arg_or_kwarg(
     expr: &Expr,
     args: &[Expr],
@@ -92,7 +93,7 @@ fn delete_default_encode_arg_or_kwarg(
     }
 }
 
-// Return a Fix replacing the call to encode by a `"b"` prefix on the string.
+/// Return a [`Fix`] replacing the call to encode by a `"b"` prefix on the string.
 fn replace_with_bytes_literal(
     expr: &Expr,
     constant: &Expr,
@@ -101,16 +102,34 @@ fn replace_with_bytes_literal(
 ) -> Diagnostic {
     let mut diagnostic = Diagnostic::new(UnnecessaryEncodeUTF8, Range::from_located(expr));
     if patch {
-        let content = locator.slice(Range::new(
+        // Build up a replacement string by prefixing all string tokens with `b`.
+        let contents = locator.slice(Range::new(
             constant.location,
             constant.end_location.unwrap(),
         ));
-        let content = format!(
-            "b{}",
-            content.trim_start_matches('u').trim_start_matches('U')
-        );
+        let mut replacement = String::with_capacity(contents.len() + 1);
+        let mut prev = None;
+        for (start, tok, end) in
+            lexer::lex_located(contents, Mode::Module, constant.location).flatten()
+        {
+            if matches!(tok, Tok::String { .. }) {
+                if let Some(prev) = prev {
+                    replacement.push_str(locator.slice(Range::new(prev, start)));
+                }
+                let string = locator.slice(Range::new(start, end));
+                replacement.push_str(&format!(
+                    "b{}",
+                    &string.trim_start_matches('u').trim_start_matches('U')
+                ));
+            } else {
+                if let Some(prev) = prev {
+                    replacement.push_str(locator.slice(Range::new(prev, end)));
+                }
+            }
+            prev = Some(end);
+        }
         diagnostic.amend(Fix::replacement(
-            content,
+            replacement,
             expr.location,
             expr.end_location.unwrap(),
         ));

diff --git a/crates/ruff/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP012.py.snap b/crates/ruff/src/rules/pyupgrade/snapshots/ruff__rules__pyupgrade__tests__UP012.py.snap
@@ -124,137 +124,188 @@ expression: diagnostics
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 26
+    row: 16
+    column: 4
+  end_location:
+    row: 17
+    column: 20
+  fix:
+    content: "b\"Lorem \"\n    b\"Ipsum\""
+    location:
+      row: 16
+      column: 4
+    end_location:
+      row: 17
+      column: 20
+  parent: ~
+- kind:
+    UnnecessaryEncodeUTF8: ~
+  location:
+    row: 20
+    column: 4
+  end_location:
+    row: 21
+    column: 20
+  fix:
+    content: "b\"Lorem \"  # Comment\n    b\"Ipsum\""
+    location:
+      row: 20
+      column: 4
+    end_location:
+      row: 21
+      column: 20
+  parent: ~
+- kind:
+    UnnecessaryEncodeUTF8: ~
+  location:
+    row: 24
+    column: 4
+  end_location:
+    row: 24
+    column: 29
+  fix:
+    content: "b\"Lorem \" b\"Ipsum\""
+    location:
+      row: 24
+      column: 4
+    end_location:
+      row: 24
+      column: 29
+  parent: ~
+- kind:
+    UnnecessaryEncodeUTF8: ~
+  location:
+    row: 32
     column: 0
   end_location:
-    row: 26
+    row: 32
     column: 27
   fix:
     content: ""
     location:
-      row: 26
+      row: 32
       column: 19
     end_location:
-      row: 26
+      row: 32
       column: 26
   parent: ~
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 44
+    row: 50
     column: 0
   end_location:
-    row: 44
+    row: 50
     column: 31
   fix:
     content: ""
     location:
-      row: 44
+      row: 50
       column: 23
     end_location:
-      row: 44
+      row: 50
       column: 30
   parent: ~
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 46
+    row: 52
     column: 0
   end_location:
-    row: 46
+    row: 52
     column: 39
   fix:
     content: ""
     location:
-      row: 46
+      row: 52
       column: 23
     end_location:
-      row: 46
+      row: 52
       column: 38
   parent: ~
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 48
+    row: 54
     column: 0
   end_location:
-    row: 48
+    row: 54
     column: 24
   fix:
     content: "br\"foo\\o\""
     location:
-      row: 48
+      row: 54
       column: 0
     end_location:
-      row: 48
+      row: 54
       column: 24
   parent: ~
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 49
+    row: 55
     column: 0
   end_location:
-    row: 49
+    row: 55
     column: 22
   fix:
     content: "b\"foo\""
     location:
-      row: 49
+      row: 55
       column: 0
     end_location:
-      row: 49
+      row: 55
       column: 22
   parent: ~
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 50
+    row: 56
     column: 0
   end_location:
-    row: 50
+    row: 56
     column: 24
   fix:
     content: "bR\"foo\\o\""
     location:
-      row: 50
+      row: 56
       column: 0
     end_location:
-      row: 50
+      row: 56
       column: 24
   parent: ~
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 51
+    row: 57
     column: 0
   end_location:
-    row: 51
+    row: 57
     column: 22
   fix:
     content: "b\"foo\""
     location:
-      row: 51
+      row: 57
       column: 0
     end_location:
-      row: 51
+      row: 57
       column: 22
   parent: ~
 - kind:
     UnnecessaryEncodeUTF8: ~
   location:
-    row: 52
+    row: 58
     column: 6
   end_location:
-    row: 52
+    row: 58
     column: 20
   fix:
     content: "b\"foo\""
     location:
-      row: 52
+      row: 58
       column: 6
     end_location:
-      row: 52
+      row: 58
       column: 20
   parent: ~