astral-sh · ThatsJustCheesy · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024 · Oct 23, 2024
diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters.py b/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters.py
diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs
@@ -10,7 +10,6 @@ use ruff_diagnostics::Diagnostic;
 use ruff_python_index::Indexer;
 use ruff_python_parser::Tokens;
 use ruff_source_file::Locator;
-use ruff_text_size::Ranged;
 
 use crate::directives::TodoComment;
 use crate::registry::{AsRule, Rule};
@@ -93,11 +92,12 @@ pub(crate) fn check_tokens(
         Rule::InvalidCharacterNul,
         Rule::InvalidCharacterZeroWidthSpace,
     ]) {
+        let mut last_fstring_start = None;
         for token in tokens {
             pylint::rules::invalid_string_characters(
                 &mut diagnostics,
-                token.kind(),
-                token.range(),
+                token,
+                &mut last_fstring_start,
                 locator,
             );
         }

diff --git a/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs b/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs
@@ -1,3 +1,7 @@
+use ruff_python_ast::str::Quote;
+use ruff_python_ast::StringFlags;
+use ruff_python_parser::Token;
+use ruff_text_size::Ranged;
 use ruff_text_size::{TextLen, TextRange, TextSize};
 
 use ruff_diagnostics::AlwaysFixableViolation;
@@ -172,19 +176,33 @@ impl AlwaysFixableViolation for InvalidCharacterZeroWidthSpace {
 }
 
 /// PLE2510, PLE2512, PLE2513, PLE2514, PLE2515
-pub(crate) fn invalid_string_characters(
+pub(crate) fn invalid_string_characters<'a>(
     diagnostics: &mut Vec<Diagnostic>,
-    token: TokenKind,
-    range: TextRange,
+    token: &'a Token,
+    last_fstring_start: &mut Option<&'a Token>,
     locator: &Locator,
 ) {
-    let text = match token {
+    struct InvalidCharacterDiagnostic {
+        diagnostic: Diagnostic,
+        edit: Edit,
+    }
+
+    let kind = token.kind();
+    let range = token.range();
+
+    let text = match kind {
         // We can't use the `value` field since it's decoded and e.g. for f-strings removed a curly
         // brace that escaped another curly brace, which would gives us wrong column information.
         TokenKind::String | TokenKind::FStringMiddle => locator.slice(range),
+        TokenKind::FStringStart => {
+            *last_fstring_start = Some(token);
+            return;
+        }
         _ => return,
     };
 
+    // Accumulate diagnostics here to postpone generating shared fixes until we know we need them.
+    let mut new_diagnostics: Vec<InvalidCharacterDiagnostic> = Vec::new();
     for (column, match_) in text.match_indices(&['\x08', '\x1A', '\x1B', '\0', '\u{200b}']) {
         let c = match_.chars().next().unwrap();
         let (replacement, rule): (&str, DiagnosticKind) = match c {
@@ -201,8 +219,97 @@ pub(crate) fn invalid_string_characters(
         let location = range.start() + TextSize::try_from(column).unwrap();
         let range = TextRange::at(location, c.text_len());
 
-        diagnostics.push(Diagnostic::new(rule, range).with_fix(Fix::safe_edit(
-            Edit::range_replacement(replacement.to_string(), range),
-        )));
+        new_diagnostics.push(InvalidCharacterDiagnostic {
+            diagnostic: Diagnostic::new(rule, range),
+            // This is integrated with other fixes and attached to the diagnostic below.
+            edit: Edit::range_replacement(replacement.to_string(), range),
+        });
+    }
+    if new_diagnostics.is_empty() {
+        // No issues, nothing to fix.
+        return;
+    }
+
+    // Convert raw strings to non-raw strings when fixes are applied:
+    // https://github.com/astral-sh/ruff/issues/13294#issuecomment-2341955180
+    let mut string_conversion_edits = Vec::new();
+    if token.is_raw_string() {
+        let string_flags = token.string_flags();
+        let prefix = string_flags.prefix().as_str();
+
+        // 1. Remove the raw string prefix.
+        for (column, match_) in prefix.match_indices(&['r', 'R']) {
+            let c = match_.chars().next().unwrap();
+
+            let entire_string_range = match kind {
+                TokenKind::String => range,
+                _ => last_fstring_start.unwrap().range(),
+            };
+            let location = entire_string_range.start() + TextSize::try_from(column).unwrap();
+            let range = TextRange::at(location, c.text_len());
+
+            string_conversion_edits.push(Edit::range_deletion(range));
+        }
+
+        // 2. Escape '\' and quote characters inside the string content.
+        let (content_start, content_end): (TextSize, TextSize) = match kind {
+            TokenKind::String => (
+                prefix.text_len() + string_flags.quote_len(),
+                TextSize::try_from(text.len()).unwrap() - string_flags.quote_len(),
+            ),
+            _ => (0.into(), text.len().try_into().unwrap()),
+        };
-                TextSize::try_from(text.len()).unwrap() - string_flags.quote_len(),
-            ),
-            _ => (0.into(), text.len().try_into().unwrap()),
-        };
+                text.text_len() - string_flags.quote_len(),
+            ),
+            _ => TextRange::new((0.into(), text.text_len()),
+        };
-                TextSize::try_from(text.len()).unwrap() - string_flags.quote_len(),
-            ),
-            _ => (0.into(), text.len().try_into().unwrap()),
-        };
+                text.text_len() - string_flags.quote_len(),
+            ),
+            _ => TextRange::new((0.into(), text.text_len()),
+        };
+        let string_content = &text[content_start.to_usize()..content_end.to_usize()];
+        for (column, match_) in string_content.match_indices(&['\\', '\'', '"']) {
+            let c = match_.chars().next().unwrap();
+            let replacement: &str = match c {
+                '\\' => "\\\\",
+                '\'' | '"' => {
+                    // Quotes only have to be escaped in triple-quoted strings at the beginning
+                    // of a triplet (like `\"""\"""` within the string, or `\""""` at the end).
+                    // For simplicity, escape all quotes followed by the same character
+                    // (e.g., `r""" \""" \""""` becomes `""" \\\"\"" \""""`).
+                    if string_flags.is_triple_quoted()
+                        && string_content
+                            .as_bytes()
+                            .get(column + 1)
+                            .is_some_and(|c2| char::from(*c2) != c)
+                    {
+                        continue;
+                    }
+                    match (c, string_flags.quote_style()) {
+                        ('\'', Quote::Single) => "\\'",
+                        ('"', Quote::Double) => "\\\"",
+                        _ => {
+                            continue;
+                        }
+                    }
+                }
+                _ => {
+                    continue;
+                }
+            };
+
+            let location = range.start() + content_start + TextSize::try_from(column).unwrap();
+            let range = TextRange::at(location, c.text_len());
+
+            string_conversion_edits.push(Edit::range_replacement(replacement.to_string(), range));
+        }
+
+        // 3. Add back '\' characters for line continuation in non-triple-quoted strings.
+        if !string_flags.is_triple_quoted() {
+            for (column, _match) in string_content.match_indices("\\\n") {
+                let location = range.start() + content_start + TextSize::try_from(column).unwrap();
+                string_conversion_edits.push(Edit::insertion(
+                    "\\n\\".to_string(),
+                    location + TextSize::from(1),
+                ));
+            }
+        }
+    }
+
+    for InvalidCharacterDiagnostic { diagnostic, edit } in new_diagnostics {
+        diagnostics
+            .push(diagnostic.with_fix(Fix::safe_edits(edit, string_conversion_edits.clone())));
     }
 }
diff --git a/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2510_invalid_characters.py.snap b/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2510_invalid_characters.py.snap
@@ -62,3 +62,86 @@ invalid_characters.py:55:21: PLE2510 [*] Invalid unescaped character backspace,
 56 56 | 
 57 57 | # https://github.com/astral-sh/ruff/issues/7455#issuecomment-1741998106
 58 58 | x = f"""}}ab"""
+
+invalid_characters.py:63:35: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead
+   |
+62 | # https://github.com/astral-sh/ruff/issues/13294#issuecomment-2341955180
+63 | raw_single_singlequote = r'\ \' " ␈'
+   |                                   ^ PLE2510
+64 | raw_triple_singlequote = r'''\ ' " '''
+65 | raw_triple_singlequote_2 = r''' \''' \''''
+   |
+   = help: Replace with escape sequence
+
+ℹ Safe fix
+60 60 | x = f"""}}a␛b"""
+61 61 | 
+62 62 | # https://github.com/astral-sh/ruff/issues/13294#issuecomment-2341955180
+63    |-raw_single_singlequote = r'\ \' " ␈'
+   63 |+raw_single_singlequote = '\\ \\\' " \b'
+64 64 | raw_triple_singlequote = r'''\ ' " '''
+65 65 | raw_triple_singlequote_2 = r''' \''' \''''
+66 66 | raw_single_doublequote = r"\ ' \" ␛"
+
+invalid_characters.py:77:1: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead
+   |
+75 | raw_single_doublequote_multiline = r"' \
+76 | \" \
+77 | ␈"
+   | ^ PLE2510
+78 | raw_triple_doublequote_multiline = r"""' \
+79 | " \
+   |
+   = help: Replace with escape sequence
+
+ℹ Safe fix
+72 72 | raw_triple_singlequote_multiline = r'''' \
+73 73 | " \
+74 74 | '''
+75    |-raw_single_doublequote_multiline = r"' \
+76    |-\" \
+77    |-␈"
+   75 |+raw_single_doublequote_multiline = "' \\\n\
+   76 |+\\\" \\\n\
+   77 |+\b"
+78 78 | raw_triple_doublequote_multiline = r"""' \
+79 79 | " \
+80 80 | ␈"""
+
+invalid_characters.py:80:1: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead
+   |
+78 | raw_triple_doublequote_multiline = r"""' \
+79 | " \
+80 | ␈"""
+   | ^ PLE2510
+81 | raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}'
+   |
+   = help: Replace with escape sequence
+
+ℹ Safe fix
+75 75 | raw_single_doublequote_multiline = r"' \
+76 76 | \" \
+77 77 | ␈"
+78    |-raw_triple_doublequote_multiline = r"""' \
+79    |-" \
+80    |-␈"""
+   78 |+raw_triple_doublequote_multiline = """' \\
+   79 |+" \\
+   80 |+\b"""
+81 81 | raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}'
+
+invalid_characters.py:81:26: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead
+   |
+79 | " \
+80 | ␈"""
+81 | raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}'
+   |                          ^ PLE2510
+   |
+   = help: Replace with escape sequence
+
+ℹ Safe fix
+78 78 | raw_triple_doublequote_multiline = r"""' \
+79 79 | " \
+80 80 | ␈"""
+81    |-raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}'
+   81 |+raw_nested_fstrings = f'\b\\ {rf'\ {rf'␛\' '}'}'
diff --git a/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2512_invalid_characters.py.snap b/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2512_invalid_characters.py.snap
diff --git a/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2513_invalid_characters.py.snap b/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2513_invalid_characters.py.snap
diff --git a/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2514_invalid_characters.py.snap b/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2514_invalid_characters.py.snap
diff --git a/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2515_invalid_characters.py.snap b/...es/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2515_invalid_characters.py.snap
@@ -54,6 +54,16 @@ impl Token {
         self.flags.is_triple_quoted()
     }
 
+    /// Returns `true` if the current token is a raw string of any kind.
+    ///
+    /// # Panics
+    ///
+    /// If it isn't a string or any f-string tokens.
+    pub const fn is_raw_string(self) -> bool {
+        assert!(self.is_any_string());
+        self.flags.is_raw_string()
+    }
+
     /// Returns the [`Quote`] style for the current string token of any kind.
     ///
     /// # Panics
@@ -64,6 +74,16 @@ impl Token {
         self.flags.quote_style()
     }
 
+    /// Returns the string flags for the current string token of any kind.
+    ///
+    /// # Panics
+    ///
+    /// If it isn't a string or any f-string tokens.
+    pub fn string_flags(self) -> AnyStringFlags {
+        assert!(self.is_any_string());
+        self.flags.as_any_string_flags()
+    }
+
     /// Returns `true` if this is any kind of string token.
     const fn is_any_string(self) -> bool {
         matches!(