diff --git a/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters.py b/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters.py index 79c9307695a91..c8f1872cff185 100644 Binary files a/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters.py and b/crates/ruff_linter/resources/test/fixtures/pylint/invalid_characters.py differ diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index f272b910f8d36..15cbe95587840 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -10,7 +10,6 @@ use ruff_diagnostics::Diagnostic; use ruff_python_index::Indexer; use ruff_python_parser::Tokens; use ruff_source_file::Locator; -use ruff_text_size::Ranged; use crate::directives::TodoComment; use crate::registry::{AsRule, Rule}; @@ -93,11 +92,12 @@ pub(crate) fn check_tokens( Rule::InvalidCharacterNul, Rule::InvalidCharacterZeroWidthSpace, ]) { + let mut last_fstring_start = None; for token in tokens { pylint::rules::invalid_string_characters( &mut diagnostics, - token.kind(), - token.range(), + token, + &mut last_fstring_start, locator, ); } diff --git a/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs b/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs index 16a0b8ba794e5..ccfdade6356dc 100644 --- a/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs +++ b/crates/ruff_linter/src/rules/pylint/rules/invalid_string_characters.rs @@ -1,3 +1,7 @@ +use ruff_python_ast::str::Quote; +use ruff_python_ast::StringFlags; +use ruff_python_parser::Token; +use ruff_text_size::Ranged; use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_diagnostics::AlwaysFixableViolation; @@ -172,19 +176,33 @@ impl AlwaysFixableViolation for InvalidCharacterZeroWidthSpace { } /// PLE2510, PLE2512, PLE2513, PLE2514, PLE2515 -pub(crate) fn invalid_string_characters( +pub(crate) fn invalid_string_characters<'a>( diagnostics: &mut Vec, - token: TokenKind, - range: TextRange, + token: &'a Token, + last_fstring_start: &mut Option<&'a Token>, locator: &Locator, ) { - let text = match token { + struct InvalidCharacterDiagnostic { + diagnostic: Diagnostic, + edit: Edit, + } + + let kind = token.kind(); + let range = token.range(); + + let text = match kind { // We can't use the `value` field since it's decoded and e.g. for f-strings removed a curly // brace that escaped another curly brace, which would gives us wrong column information. TokenKind::String | TokenKind::FStringMiddle => locator.slice(range), + TokenKind::FStringStart => { + *last_fstring_start = Some(token); + return; + } _ => return, }; + // Accumulate diagnostics here to postpone generating shared fixes until we know we need them. + let mut new_diagnostics: Vec = Vec::new(); for (column, match_) in text.match_indices(&['\x08', '\x1A', '\x1B', '\0', '\u{200b}']) { let c = match_.chars().next().unwrap(); let (replacement, rule): (&str, DiagnosticKind) = match c { @@ -201,8 +219,97 @@ pub(crate) fn invalid_string_characters( let location = range.start() + TextSize::try_from(column).unwrap(); let range = TextRange::at(location, c.text_len()); - diagnostics.push(Diagnostic::new(rule, range).with_fix(Fix::safe_edit( - Edit::range_replacement(replacement.to_string(), range), - ))); + new_diagnostics.push(InvalidCharacterDiagnostic { + diagnostic: Diagnostic::new(rule, range), + // This is integrated with other fixes and attached to the diagnostic below. + edit: Edit::range_replacement(replacement.to_string(), range), + }); + } + if new_diagnostics.is_empty() { + // No issues, nothing to fix. + return; + } + + // Convert raw strings to non-raw strings when fixes are applied: + // https://github.com/astral-sh/ruff/issues/13294#issuecomment-2341955180 + let mut string_conversion_edits = Vec::new(); + if token.is_raw_string() { + let string_flags = token.string_flags(); + let prefix = string_flags.prefix().as_str(); + + // 1. Remove the raw string prefix. + for (column, match_) in prefix.match_indices(&['r', 'R']) { + let c = match_.chars().next().unwrap(); + + let entire_string_range = match kind { + TokenKind::String => range, + _ => last_fstring_start.unwrap().range(), + }; + let location = entire_string_range.start() + TextSize::try_from(column).unwrap(); + let range = TextRange::at(location, c.text_len()); + + string_conversion_edits.push(Edit::range_deletion(range)); + } + + // 2. Escape '\' and quote characters inside the string content. + let (content_start, content_end): (TextSize, TextSize) = match kind { + TokenKind::String => ( + prefix.text_len() + string_flags.quote_len(), + TextSize::try_from(text.len()).unwrap() - string_flags.quote_len(), + ), + _ => (0.into(), text.len().try_into().unwrap()), + }; + let string_content = &text[content_start.to_usize()..content_end.to_usize()]; + for (column, match_) in string_content.match_indices(&['\\', '\'', '"']) { + let c = match_.chars().next().unwrap(); + let replacement: &str = match c { + '\\' => "\\\\", + '\'' | '"' => { + // Quotes only have to be escaped in triple-quoted strings at the beginning + // of a triplet (like `\"""\"""` within the string, or `\""""` at the end). + // For simplicity, escape all quotes followed by the same character + // (e.g., `r""" \""" \""""` becomes `""" \\\"\"" \""""`). + if string_flags.is_triple_quoted() + && string_content + .as_bytes() + .get(column + 1) + .is_some_and(|c2| char::from(*c2) != c) + { + continue; + } + match (c, string_flags.quote_style()) { + ('\'', Quote::Single) => "\\'", + ('"', Quote::Double) => "\\\"", + _ => { + continue; + } + } + } + _ => { + continue; + } + }; + + let location = range.start() + content_start + TextSize::try_from(column).unwrap(); + let range = TextRange::at(location, c.text_len()); + + string_conversion_edits.push(Edit::range_replacement(replacement.to_string(), range)); + } + + // 3. Add back '\' characters for line continuation in non-triple-quoted strings. + if !string_flags.is_triple_quoted() { + for (column, _match) in string_content.match_indices("\\\n") { + let location = range.start() + content_start + TextSize::try_from(column).unwrap(); + string_conversion_edits.push(Edit::insertion( + "\\n\\".to_string(), + location + TextSize::from(1), + )); + } + } + } + + for InvalidCharacterDiagnostic { diagnostic, edit } in new_diagnostics { + diagnostics + .push(diagnostic.with_fix(Fix::safe_edits(edit, string_conversion_edits.clone()))); } } diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2510_invalid_characters.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2510_invalid_characters.py.snap index 3f23d12764d77..86167ee4e1adb 100644 --- a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2510_invalid_characters.py.snap +++ b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2510_invalid_characters.py.snap @@ -62,3 +62,86 @@ invalid_characters.py:55:21: PLE2510 [*] Invalid unescaped character backspace, 56 56 | 57 57 | # https://github.com/astral-sh/ruff/issues/7455#issuecomment-1741998106 58 58 | x = f"""}}ab""" + +invalid_characters.py:63:35: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead + | +62 | # https://github.com/astral-sh/ruff/issues/13294#issuecomment-2341955180 +63 | raw_single_singlequote = r'\ \' " ␈' + | ^ PLE2510 +64 | raw_triple_singlequote = r'''\ ' " ''' +65 | raw_triple_singlequote_2 = r'''​ \''' \'''' + | + = help: Replace with escape sequence + +ℹ Safe fix +60 60 | x = f"""}}a␛b""" +61 61 | +62 62 | # https://github.com/astral-sh/ruff/issues/13294#issuecomment-2341955180 +63 |-raw_single_singlequote = r'\ \' " ␈' + 63 |+raw_single_singlequote = '\\ \\\' " \b' +64 64 | raw_triple_singlequote = r'''\ ' " ''' +65 65 | raw_triple_singlequote_2 = r'''​ \''' \'''' +66 66 | raw_single_doublequote = r"\ ' \" ␛" + +invalid_characters.py:77:1: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead + | +75 | raw_single_doublequote_multiline = r"' \ +76 | \" \ +77 | ␈" + | ^ PLE2510 +78 | raw_triple_doublequote_multiline = r"""' \ +79 | " \ + | + = help: Replace with escape sequence + +ℹ Safe fix +72 72 | raw_triple_singlequote_multiline = r'''' \ +73 73 | " \ +74 74 | ''' +75 |-raw_single_doublequote_multiline = r"' \ +76 |-\" \ +77 |-␈" + 75 |+raw_single_doublequote_multiline = "' \\\n\ + 76 |+\\\" \\\n\ + 77 |+\b" +78 78 | raw_triple_doublequote_multiline = r"""' \ +79 79 | " \ +80 80 | ␈""" + +invalid_characters.py:80:1: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead + | +78 | raw_triple_doublequote_multiline = r"""' \ +79 | " \ +80 | ␈""" + | ^ PLE2510 +81 | raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}' + | + = help: Replace with escape sequence + +ℹ Safe fix +75 75 | raw_single_doublequote_multiline = r"' \ +76 76 | \" \ +77 77 | ␈" +78 |-raw_triple_doublequote_multiline = r"""' \ +79 |-" \ +80 |-␈""" + 78 |+raw_triple_doublequote_multiline = """' \\ + 79 |+" \\ + 80 |+\b""" +81 81 | raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}' + +invalid_characters.py:81:26: PLE2510 [*] Invalid unescaped character backspace, use "\b" instead + | +79 | " \ +80 | ␈""" +81 | raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}' + | ^ PLE2510 + | + = help: Replace with escape sequence + +ℹ Safe fix +78 78 | raw_triple_doublequote_multiline = r"""' \ +79 79 | " \ +80 80 | ␈""" +81 |-raw_nested_fstrings = rf'␈\ {rf'\ {rf'␛\' '}'}' + 81 |+raw_nested_fstrings = f'\b\\ {rf'\ {rf'␛\' '}'}' diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2512_invalid_characters.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2512_invalid_characters.py.snap index 3ef59bd9291bd..ca47b78b75dc3 100644 Binary files a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2512_invalid_characters.py.snap and b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2512_invalid_characters.py.snap differ diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2513_invalid_characters.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2513_invalid_characters.py.snap index 993c89041e59f..e4a324ae17d12 100644 Binary files a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2513_invalid_characters.py.snap and b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2513_invalid_characters.py.snap differ diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2514_invalid_characters.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2514_invalid_characters.py.snap index 3ab1a52ec4958..426c787310024 100644 Binary files a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2514_invalid_characters.py.snap and b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2514_invalid_characters.py.snap differ diff --git a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2515_invalid_characters.py.snap b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2515_invalid_characters.py.snap index bf097d02f2cec..b8ad690a123a2 100644 Binary files a/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2515_invalid_characters.py.snap and b/crates/ruff_linter/src/rules/pylint/snapshots/ruff_linter__rules__pylint__tests__PLE2515_invalid_characters.py.snap differ diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs index 9c240f4964e5b..8d5763fdab457 100644 --- a/crates/ruff_python_parser/src/token.rs +++ b/crates/ruff_python_parser/src/token.rs @@ -54,6 +54,16 @@ impl Token { self.flags.is_triple_quoted() } + /// Returns `true` if the current token is a raw string of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f-string tokens. + pub const fn is_raw_string(self) -> bool { + assert!(self.is_any_string()); + self.flags.is_raw_string() + } + /// Returns the [`Quote`] style for the current string token of any kind. /// /// # Panics @@ -64,6 +74,16 @@ impl Token { self.flags.quote_style() } + /// Returns the string flags for the current string token of any kind. + /// + /// # Panics + /// + /// If it isn't a string or any f-string tokens. + pub fn string_flags(self) -> AnyStringFlags { + assert!(self.is_any_string()); + self.flags.as_any_string_flags() + } + /// Returns `true` if this is any kind of string token. const fn is_any_string(self) -> bool { matches!(