diff --git a/Cargo.lock b/Cargo.lock index 21c7d6c3ef8514..53457f6061da3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2139,6 +2139,7 @@ dependencies = [ "rustc-hash", "rustpython-common", "rustpython-parser", + "serde", "smallvec", ] diff --git a/crates/ruff/src/autofix/helpers.rs b/crates/ruff/src/autofix/helpers.rs index 8ff322b821c71f..362362212acb4d 100644 --- a/crates/ruff/src/autofix/helpers.rs +++ b/crates/ruff/src/autofix/helpers.rs @@ -9,8 +9,8 @@ use rustpython_parser::{lexer, Mode, Tok}; use ruff_diagnostics::Fix; use ruff_python_ast::helpers; use ruff_python_ast::helpers::to_absolute; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::source_code::{Indexer, Locator, Stylist}; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::cst::helpers::compose_module_path; use crate::cst::matchers::match_module; @@ -100,7 +100,7 @@ fn is_lone_child(child: &Stmt, parent: &Stmt, deleted: &[&Stmt]) -> Result /// of a multi-statement line. fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { let contents = locator.skip(stmt.end_location.unwrap()); - for (row, line) in LinesWithTrailingNewline::from(contents).enumerate() { + for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); if trimmed.starts_with(';') { let column = line @@ -123,7 +123,7 @@ fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option { fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location { let start_location = Location::new(semicolon.row(), semicolon.column() + 1); let contents = locator.skip(start_location); - for (row, line) in LinesWithTrailingNewline::from(contents).enumerate() { + for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() { let trimmed = line.trim(); // Skip past any continuations. if trimmed.starts_with('\\') { diff --git a/crates/ruff/src/checkers/noqa.rs b/crates/ruff/src/checkers/noqa.rs index 3ec8a31b73c3ef..0d8790a230b76a 100644 --- a/crates/ruff/src/checkers/noqa.rs +++ b/crates/ruff/src/checkers/noqa.rs @@ -5,6 +5,7 @@ use nohash_hasher::IntMap; use rustpython_parser::ast::Location; use ruff_diagnostics::{Diagnostic, Fix}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::codes::NoqaCode; @@ -38,7 +39,7 @@ pub fn check_noqa( // Indices of diagnostics that were ignored by a `noqa` directive. let mut ignored_diagnostics = vec![]; - let lines: Vec<&str> = contents.lines().collect(); + let lines: Vec<&str> = contents.universal_newlines().collect(); for lineno in commented_lines { match extract_file_exemption(lines[lineno - 1]) { Exemption::All => { diff --git a/crates/ruff/src/checkers/physical_lines.rs b/crates/ruff/src/checkers/physical_lines.rs index 442cac67f530b5..adb2963e404947 100644 --- a/crates/ruff/src/checkers/physical_lines.rs +++ b/crates/ruff/src/checkers/physical_lines.rs @@ -3,7 +3,8 @@ use std::path::Path; use ruff_diagnostics::Diagnostic; -use ruff_python_ast::source_code::Stylist; +use ruff_python_ast::newlines::StrExt; +use ruff_python_ast::source_code::{Locator, Stylist}; use crate::registry::Rule; use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective}; @@ -21,8 +22,8 @@ use crate::settings::{flags, Settings}; pub fn check_physical_lines( path: &Path, + locator: &Locator, stylist: &Stylist, - contents: &str, commented_lines: &[usize], doc_lines: &[usize], settings: &Settings, @@ -56,7 +57,7 @@ pub fn check_physical_lines( let mut commented_lines_iter = commented_lines.iter().peekable(); let mut doc_lines_iter = doc_lines.iter().peekable(); - for (index, line) in contents.lines().enumerate() { + for (index, line) in locator.contents().universal_newlines().enumerate() { while commented_lines_iter .next_if(|lineno| &(index + 1) == *lineno) .is_some() @@ -162,8 +163,8 @@ pub fn check_physical_lines( if enforce_no_newline_at_end_of_file { if let Some(diagnostic) = no_newline_at_end_of_file( + locator, stylist, - contents, autofix.into() && settings.rules.should_fix(&Rule::NoNewLineAtEndOfFile), ) { diagnostics.push(diagnostic); @@ -199,8 +200,8 @@ mod tests { let check_with_max_line_length = |line_length: usize| { check_physical_lines( Path::new("foo.py"), + &locator, &stylist, - line, &[], &[], &Settings { diff --git a/crates/ruff/src/linter.rs b/crates/ruff/src/linter.rs index 7c634b409012c8..1ad51a08936024 100644 --- a/crates/ruff/src/linter.rs +++ b/crates/ruff/src/linter.rs @@ -191,8 +191,8 @@ pub fn check_path( { diagnostics.extend(check_physical_lines( path, + locator, stylist, - contents, indexer.commented_lines(), &doc_lines, settings, diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index ad973602110e86..844a64fe5d40f8 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -12,6 +12,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use rustpython_parser::ast::Location; use ruff_diagnostics::Diagnostic; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::{LineEnding, Locator}; use ruff_python_ast::types::Range; @@ -181,7 +182,7 @@ fn add_noqa_inner( // Codes that are globally exempted (within the current file). let mut file_exemptions: Vec = vec![]; - let lines: Vec<&str> = contents.lines().collect(); + let lines: Vec<&str> = contents.universal_newlines().collect(); for lineno in commented_lines { match extract_file_exemption(lines[lineno - 1]) { Exemption::All => { @@ -263,7 +264,7 @@ fn add_noqa_inner( let mut count: usize = 0; let mut output = String::new(); - for (lineno, line) in contents.lines().enumerate() { + for (lineno, line) in lines.into_iter().enumerate() { match matches_by_line.get(&lineno) { None => { output.push_str(line); diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs index 38587687d7b5ca..3330d9bea777fb 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_if.rs @@ -9,6 +9,7 @@ use ruff_python_ast::helpers::{ contains_call_path, contains_effect, create_expr, create_stmt, first_colon_range, has_comments, has_comments_in, unparse_expr, unparse_stmt, }; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -283,7 +284,7 @@ pub fn nested_if_statements( Ok(fix) => { if fix .content - .lines() + .universal_newlines() .all(|line| line.len() <= checker.settings.line_length) { diagnostic.amend(fix); diff --git a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs index 800a5830a37563..ba1c4fcec74787 100644 --- a/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs +++ b/crates/ruff/src/rules/flake8_simplify/rules/ast_with.rs @@ -5,6 +5,7 @@ use ruff_diagnostics::Diagnostic; use ruff_diagnostics::{AutofixKind, Availability, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::{first_colon_range, has_comments_in}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -115,7 +116,7 @@ pub fn multiple_with_statements( Ok(fix) => { if fix .content - .lines() + .universal_newlines() .all(|line| line.len() <= checker.settings.line_length) { diagnostic.amend(fix); diff --git a/crates/ruff/src/rules/isort/helpers.rs b/crates/ruff/src/rules/isort/helpers.rs index f1ca5396f189a6..53ef62c9bcc2d4 100644 --- a/crates/ruff/src/rules/isort/helpers.rs +++ b/crates/ruff/src/rules/isort/helpers.rs @@ -2,6 +2,7 @@ use rustpython_parser::ast::{Location, Stmt}; use rustpython_parser::{lexer, Mode, Tok}; use ruff_python_ast::helpers::is_docstring_stmt; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Locator; use super::types::TrailingComma; @@ -62,7 +63,7 @@ pub fn has_comment_break(stmt: &Stmt, locator: &Locator) -> bool { // # Direct comment. // def f(): pass let mut seen_blank = false; - for line in locator.take(stmt.location).lines().rev() { + for line in locator.take(stmt.location).universal_newlines().rev() { let line = line.trim(); if seen_blank { if line.starts_with('#') { diff --git a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs index dd79f339d349da..6eeb9b155096eb 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs @@ -4,6 +4,7 @@ use rustpython_parser::ast::Location; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Locator; use ruff_python_ast::types::Range; @@ -76,7 +77,7 @@ pub fn invalid_escape_sequence( let body = &text[(quote_pos + quote.len())..(text.len() - quote.len())]; if !prefix.contains('r') { - for (row_offset, line) in body.lines().enumerate() { + for (row_offset, line) in body.universal_newlines().enumerate() { let chars: Vec = line.chars().collect(); for col_offset in 0..chars.len() { if chars[col_offset] != '\\' { diff --git a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs index 335a9dba13f4d2..021b80467edd06 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/lambda_assignment.rs @@ -3,6 +3,7 @@ use rustpython_parser::ast::{Arguments, Expr, ExprKind, Location, Stmt, StmtKind use ruff_diagnostics::{AutofixKind, Availability, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::{match_leading_content, match_trailing_content, unparse_stmt}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Stylist; use ruff_python_ast::types::{Range, ScopeKind}; use ruff_python_ast::whitespace::leading_space; @@ -86,7 +87,7 @@ pub fn lambda_assignment(checker: &mut Checker, target: &Expr, value: &Expr, stm let indentation = &leading_space(first_line); let mut indented = String::new(); for (idx, line) in function(id, args, body, checker.stylist) - .lines() + .universal_newlines() .enumerate() { if idx == 0 { diff --git a/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs b/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs index e5411cc53c772c..9609c37895112f 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/no_newline_at_end_of_file.rs @@ -2,7 +2,8 @@ use rustpython_parser::ast::Location; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::source_code::Stylist; +use ruff_python_ast::newlines::StrExt; +use ruff_python_ast::source_code::{Locator, Stylist}; use ruff_python_ast::types::Range; /// ## What it does @@ -37,16 +38,16 @@ impl AlwaysAutofixableViolation for NoNewLineAtEndOfFile { /// W292 pub fn no_newline_at_end_of_file( + locator: &Locator, stylist: &Stylist, - contents: &str, autofix: bool, ) -> Option { - if !contents.ends_with('\n') { + if !locator.contents().ends_with(['\n', '\r']) { // Note: if `lines.last()` is `None`, then `contents` is empty (and so we don't // want to raise W292 anyway). - if let Some(line) = contents.lines().last() { + if let Some(line) = locator.contents().universal_newlines().last() { // Both locations are at the end of the file (and thus the same). - let location = Location::new(contents.lines().count(), line.len()); + let location = Location::new(locator.count_lines(), line.len()); let mut diagnostic = Diagnostic::new(NoNewLineAtEndOfFile, Range::new(location, location)); if autofix { diff --git a/crates/ruff/src/rules/pydocstyle/helpers.rs b/crates/ruff/src/rules/pydocstyle/helpers.rs index b6261b1361fbff..5c7e5b4c9a9a40 100644 --- a/crates/ruff/src/rules/pydocstyle/helpers.rs +++ b/crates/ruff/src/rules/pydocstyle/helpers.rs @@ -2,6 +2,7 @@ use std::collections::BTreeSet; use ruff_python_ast::cast; use ruff_python_ast::helpers::{map_callable, to_call_path}; +use ruff_python_ast::newlines::StrExt; use crate::checkers::ast::Checker; use crate::docstrings::definition::{Definition, DefinitionKind}; @@ -10,7 +11,7 @@ use crate::docstrings::definition::{Definition, DefinitionKind}; pub fn logical_line(content: &str) -> Option { // Find the first logical line. let mut logical_line = None; - for (i, line) in content.lines().enumerate() { + for (i, line) in content.universal_newlines().enumerate() { if line.trim().is_empty() { // Empty line. If this is the line _after_ the first logical line, stop. if logical_line.is_some() { diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs index 2e39195bd03ba9..3b47420e2edb7a 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_after_summary.rs @@ -1,5 +1,6 @@ use ruff_diagnostics::{AutofixKind, Availability, Diagnostic, Fix, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -45,7 +46,7 @@ pub fn blank_after_summary(checker: &mut Checker, docstring: &Docstring) { let mut lines_count = 1; let mut blanks_count = 0; - for line in body.trim().lines().skip(1) { + for line in body.trim().universal_newlines().skip(1) { lines_count += 1; if line.trim().is_empty() { blanks_count += 1; @@ -64,7 +65,7 @@ pub fn blank_after_summary(checker: &mut Checker, docstring: &Docstring) { if blanks_count > 1 { // Find the "summary" line (defined as the first non-blank line). let mut summary_line = 0; - for line in body.lines() { + for line in body.universal_newlines() { if line.trim().is_empty() { summary_line += 1; } else { diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs index 9659fca6c1e870..df7ff18fdbed1f 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_class.rs @@ -1,5 +1,6 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -75,7 +76,7 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { .slice(Range::new(parent.location, docstring.expr.location)); let blank_lines_before = before - .lines() + .universal_newlines() .rev() .skip(1) .take_while(|line| line.trim().is_empty()) @@ -138,7 +139,7 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { )); let all_blank_after = after - .lines() + .universal_newlines() .skip(1) .all(|line| line.trim().is_empty() || line.trim_start().starts_with('#')); if all_blank_after { @@ -146,7 +147,7 @@ pub fn blank_before_after_class(checker: &mut Checker, docstring: &Docstring) { } let blank_lines_after = after - .lines() + .universal_newlines() .skip(1) .take_while(|line| line.trim().is_empty()) .count(); diff --git a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs index 701808c70aa6b8..2d58910fbe9ba6 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/blank_before_after_function.rs @@ -3,6 +3,7 @@ use regex::Regex; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -67,7 +68,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) .slice(Range::new(parent.location, docstring.expr.location)); let blank_lines_before = before - .lines() + .universal_newlines() .rev() .skip(1) .take_while(|line| line.trim().is_empty()) @@ -102,7 +103,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) // If the docstring is only followed by blank and commented lines, abort. let all_blank_after = after - .lines() + .universal_newlines() .skip(1) .all(|line| line.trim().is_empty() || line.trim_start().starts_with('#')); if all_blank_after { @@ -111,7 +112,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) // Count the number of blank lines after the docstring. let blank_lines_after = after - .lines() + .universal_newlines() .skip(1) .take_while(|line| line.trim().is_empty()) .count(); @@ -119,7 +120,7 @@ pub fn blank_before_after_function(checker: &mut Checker, docstring: &Docstring) // Avoid violations for blank lines followed by inner functions or classes. if blank_lines_after == 1 && after - .lines() + .universal_newlines() .skip(1 + blank_lines_after) .find(|line| !line.trim_start().starts_with('#')) .map_or(false, |line| INNER_FUNCTION_OR_CLASS_REGEX.is_match(line)) diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs index e4d963feff6baf..c97af428166b79 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_period.rs @@ -2,6 +2,7 @@ use strum::IntoEnumIterator; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; @@ -31,7 +32,7 @@ pub fn ends_with_period(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if let Some(first_line) = body.trim().lines().next() { + if let Some(first_line) = body.trim().universal_newlines().next() { let trimmed = first_line.trim(); // Avoid false-positives: `:param`, etc. @@ -55,7 +56,7 @@ pub fn ends_with_period(checker: &mut Checker, docstring: &Docstring) { } if let Some(index) = logical_line(body) { - let line = body.lines().nth(index).unwrap(); + let line = body.universal_newlines().nth(index).unwrap(); let trimmed = line.trim_end(); if !trimmed.ends_with('.') { diff --git a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs index f163afc6ad18f1..4562cc4c7be8dc 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/ends_with_punctuation.rs @@ -2,6 +2,7 @@ use strum::IntoEnumIterator; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; @@ -31,7 +32,7 @@ pub fn ends_with_punctuation(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if let Some(first_line) = body.trim().lines().next() { + if let Some(first_line) = body.trim().universal_newlines().next() { let trimmed = first_line.trim(); // Avoid false-positives: `:param`, etc. @@ -55,7 +56,7 @@ pub fn ends_with_punctuation(checker: &mut Checker, docstring: &Docstring) { } if let Some(index) = logical_line(body) { - let line = body.lines().nth(index).unwrap(); + let line = body.universal_newlines().nth(index).unwrap(); let trimmed = line.trim_end(); if !(trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?')) { let mut diagnostic = Diagnostic::new(EndsInPunctuation, Range::from(docstring.expr)); diff --git a/crates/ruff/src/rules/pydocstyle/rules/indent.rs b/crates/ruff/src/rules/pydocstyle/rules/indent.rs index 475720d40ed9d1..dc1a74ec18097c 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/indent.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/indent.rs @@ -1,9 +1,9 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -53,7 +53,7 @@ pub fn indent(checker: &mut Checker, docstring: &Docstring) { let body = docstring.body; // Split the docstring into lines. - let lines: Vec<&str> = LinesWithTrailingNewline::from(body).collect(); + let lines: Vec<&str> = NewlineWithTrailingNewline::from(body).collect(); if lines.len() <= 1 { return; } diff --git a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs index c2d2ec7ea818fe..30fc1d9f397a9c 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/multi_line_summary_start.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::{NewlineWithTrailingNewline, StrExt}; use ruff_python_ast::str::{is_triple_quote, leading_quote}; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::{DefinitionKind, Docstring}; @@ -42,10 +42,10 @@ pub fn multi_line_summary_start(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - if LinesWithTrailingNewline::from(body).nth(1).is_none() { + if NewlineWithTrailingNewline::from(body).nth(1).is_none() { return; }; - let mut content_lines = contents.lines(); + let mut content_lines = contents.universal_newlines(); let Some(first_line) = content_lines .next() else diff --git a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs index b3f49732e2cbe7..19e74ba2bfdee3 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/newline_after_last_paragraph.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::{NewlineWithTrailingNewline, StrExt}; use ruff_python_ast::types::Range; use ruff_python_ast::whitespace; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -29,12 +29,12 @@ pub fn newline_after_last_paragraph(checker: &mut Checker, docstring: &Docstring let body = docstring.body; let mut line_count = 0; - for line in LinesWithTrailingNewline::from(body) { + for line in NewlineWithTrailingNewline::from(body) { if !line.trim().is_empty() { line_count += 1; } if line_count > 1 { - if let Some(last_line) = contents.lines().last().map(str::trim) { + if let Some(last_line) = contents.universal_newlines().last().map(str::trim) { if last_line != "\"\"\"" && last_line != "'''" { let mut diagnostic = Diagnostic::new(NewLineAfterLastParagraph, Range::from(docstring.expr)); diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs index 6e404dc223b1e6..5546a24cf7b47a 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_signature.rs @@ -2,6 +2,7 @@ use rustpython_parser::ast::StmtKind; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -32,7 +33,7 @@ pub fn no_signature(checker: &mut Checker, docstring: &Docstring) { let body = docstring.body; - let Some(first_line) = body.trim().lines().next() else { + let Some(first_line) = body.trim().universal_newlines().next() else { return; }; if !first_line.contains(&format!("{name}(")) { diff --git a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs index 609bbe1e8ba733..c4743f77e34955 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/no_surrounding_whitespace.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::str::leading_quote; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -28,7 +28,7 @@ pub fn no_surrounding_whitespace(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - let mut lines = LinesWithTrailingNewline::from(body); + let mut lines = NewlineWithTrailingNewline::from(body); let Some(line) = lines.next() else { return; }; diff --git a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs index 9beae70e9f6c92..df9a4c95634ae6 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/non_imperative_mood.rs @@ -7,6 +7,7 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::cast; use ruff_python_ast::helpers::to_call_path; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::{CallPath, Range}; use ruff_python_ast::visibility::{is_property, is_test}; @@ -48,7 +49,7 @@ pub fn non_imperative_mood( let body = docstring.body; // Find first line, disregarding whitespace. - let line = match body.trim().lines().next() { + let line = match body.trim().universal_newlines().next() { Some(line) => line.trim(), None => return, }; diff --git a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs index 7ecb70f70df24d..b0dcf37ff64f09 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/one_liner.rs @@ -1,8 +1,8 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::types::Range; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use crate::checkers::ast::Checker; use crate::docstrings::definition::Docstring; @@ -26,7 +26,7 @@ impl AlwaysAutofixableViolation for FitsOnOneLine { pub fn one_liner(checker: &mut Checker, docstring: &Docstring) { let mut line_count = 0; let mut non_empty_line_count = 0; - for line in LinesWithTrailingNewline::from(docstring.body) { + for line in NewlineWithTrailingNewline::from(docstring.body) { line_count += 1; if !line.trim().is_empty() { non_empty_line_count += 1; diff --git a/crates/ruff/src/rules/pydocstyle/rules/sections.rs b/crates/ruff/src/rules/pydocstyle/rules/sections.rs index 3680dc30267a2d..b5d51378d13ea3 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/sections.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/sections.rs @@ -8,9 +8,9 @@ use ruff_diagnostics::{AlwaysAutofixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::helpers::identifier_range; +use ruff_python_ast::newlines::NewlineWithTrailingNewline; use ruff_python_ast::types::Range; use ruff_python_ast::visibility::is_staticmethod; -use ruff_python_ast::whitespace::LinesWithTrailingNewline; use ruff_python_ast::{cast, whitespace}; use crate::checkers::ast::Checker; @@ -273,7 +273,7 @@ impl AlwaysAutofixableViolation for NoBlankLinesBetweenHeaderAndContent { pub fn sections(checker: &mut Checker, docstring: &Docstring, convention: Option<&Convention>) { let body = docstring.body; - let lines: Vec<&str> = LinesWithTrailingNewline::from(body).collect(); + let lines: Vec<&str> = NewlineWithTrailingNewline::from(body).collect(); if lines.len() < 2 { return; } @@ -923,30 +923,32 @@ fn parameters_section(checker: &mut Checker, docstring: &Docstring, context: &Se // Join line continuations, then resplit by line. let adjusted_following_lines = context.following_lines.join("\n").replace("\\\n", ""); - let lines: Vec<&str> = LinesWithTrailingNewline::from(&adjusted_following_lines).collect(); - - for i in 1..lines.len() { - let current_line = lines[i - 1]; - let current_leading_space = whitespace::leading_space(current_line); - let next_line = lines[i]; - if current_leading_space == section_level_indent - && (whitespace::leading_space(next_line).len() > current_leading_space.len()) - && !next_line.trim().is_empty() - { - let parameters = if let Some(semi_index) = current_line.find(':') { - // If the parameter has a type annotation, exclude it. - ¤t_line[..semi_index] - } else { - // Otherwise, it's just a list of parameters on the current line. - current_line.trim() - }; - // Notably, NumPy lets you put multiple parameters of the same type on the same - // line. - for parameter in parameters.split(',') { - docstring_args.insert(parameter.trim()); + let mut lines = NewlineWithTrailingNewline::from(&adjusted_following_lines); + if let Some(mut current_line) = lines.next() { + for next_line in lines { + let current_leading_space = whitespace::leading_space(current_line); + if current_leading_space == section_level_indent + && (whitespace::leading_space(next_line).len() > current_leading_space.len()) + && !next_line.trim().is_empty() + { + let parameters = if let Some(semi_index) = current_line.find(':') { + // If the parameter has a type annotation, exclude it. + ¤t_line[..semi_index] + } else { + // Otherwise, it's just a list of parameters on the current line. + current_line.trim() + }; + // Notably, NumPy lets you put multiple parameters of the same type on the same + // line. + for parameter in parameters.split(',') { + docstring_args.insert(parameter.trim()); + } } + + current_line = next_line; } } + // Validate that all arguments were documented. missing_args(checker, docstring, &docstring_args); } diff --git a/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs b/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs index 720b70fe810e6a..6e6a2d95713eb4 100644 --- a/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs +++ b/crates/ruff/src/rules/pydocstyle/rules/triple_quotes.rs @@ -1,5 +1,6 @@ use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::types::Range; use crate::checkers::ast::Checker; @@ -20,8 +21,7 @@ pub fn triple_quotes(checker: &mut Checker, docstring: &Docstring) { let contents = docstring.contents; let body = docstring.body; - let Some(first_line) = contents - .lines() + let Some(first_line) = contents.universal_newlines() .next() .map(str::to_lowercase) else { diff --git a/crates/ruff_python_ast/Cargo.toml b/crates/ruff_python_ast/Cargo.toml index 087bcaabb909bb..44df6c5fb9f267 100644 --- a/crates/ruff_python_ast/Cargo.toml +++ b/crates/ruff_python_ast/Cargo.toml @@ -24,4 +24,6 @@ regex = { workspace = true } rustc-hash = { workspace = true } rustpython-common = { workspace = true } rustpython-parser = { workspace = true } +# TODO(charlie): See https://github.com/RustPython/RustPython/pull/4684. +serde = { workspace = true } smallvec = { version = "1.10.0" } diff --git a/crates/ruff_python_ast/src/helpers.rs b/crates/ruff_python_ast/src/helpers.rs index 047e03c59b0308..039d31cd8f0ac7 100644 --- a/crates/ruff_python_ast/src/helpers.rs +++ b/crates/ruff_python_ast/src/helpers.rs @@ -14,6 +14,7 @@ use rustpython_parser::{lexer, Mode, StringKind, Tok}; use smallvec::{smallvec, SmallVec}; use crate::context::Context; +use crate::newlines::StrExt; use crate::source_code::{Generator, Indexer, Locator, Stylist}; use crate::types::{Binding, BindingKind, CallPath, Range}; use crate::visitor; @@ -1125,7 +1126,7 @@ pub fn end_of_statement(stmt: &Stmt, locator: &Locator) -> Location { } // Otherwise, find the end of the last line that's "part of" the statement. - for (lineno, line) in contents.lines().enumerate() { + for (lineno, line) in contents.universal_newlines().enumerate() { if line.ends_with('\\') { continue; } diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 529034a98e4b67..2856716fbb3616 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -6,6 +6,7 @@ pub mod function_type; pub mod hashable; pub mod helpers; pub mod logging; +pub mod newlines; pub mod operations; pub mod relocate; pub mod source_code; diff --git a/crates/ruff_python_ast/src/newlines.rs b/crates/ruff_python_ast/src/newlines.rs new file mode 100644 index 00000000000000..371f80a3527bf0 --- /dev/null +++ b/crates/ruff_python_ast/src/newlines.rs @@ -0,0 +1,192 @@ +use std::iter::FusedIterator; + +/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`]. +pub trait StrExt { + fn universal_newlines(&self) -> UniversalNewlineIterator<'_>; +} + +impl StrExt for str { + fn universal_newlines(&self) -> UniversalNewlineIterator<'_> { + UniversalNewlineIterator::from(self) + } +} + +/// Like [`str#lines`], but accommodates LF, CRLF, and CR line endings, +/// the latter of which are not supported by [`str#lines`]. +/// +/// ## Examples +/// +/// ```rust +/// use ruff_python_ast::newlines::UniversalNewlineIterator; +/// +/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop"); +/// +/// assert_eq!(lines.next_back(), Some("bop")); +/// assert_eq!(lines.next(), Some("foo")); +/// assert_eq!(lines.next_back(), Some("baz")); +/// assert_eq!(lines.next(), Some("bar")); +/// assert_eq!(lines.next_back(), Some("")); +/// assert_eq!(lines.next(), None); +/// ``` +pub struct UniversalNewlineIterator<'a> { + text: &'a str, +} + +impl<'a> UniversalNewlineIterator<'a> { + pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> { + UniversalNewlineIterator { text } + } +} + +impl<'a> Iterator for UniversalNewlineIterator<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + if self.text.is_empty() { + return None; + } + + let line = match self.text.find(['\n', '\r']) { + // Non-last line + Some(line_end) => { + let (line, remainder) = self.text.split_at(line_end); + + self.text = match remainder.as_bytes()[0] { + // Explicit branch for `\n` as this is the most likely path + b'\n' => &remainder[1..], + // '\r\n' + b'\r' if remainder.as_bytes().get(1) == Some(&b'\n') => &remainder[2..], + // '\r' + _ => &remainder[1..], + }; + + line + } + // Last line + None => std::mem::take(&mut self.text), + }; + + Some(line) + } + + fn last(mut self) -> Option { + self.next_back() + } +} + +impl DoubleEndedIterator for UniversalNewlineIterator<'_> { + #[inline] + fn next_back(&mut self) -> Option { + if self.text.is_empty() { + return None; + } + + let len = self.text.len(); + + // Trim any trailing newlines. + self.text = match self.text.as_bytes()[len - 1] { + b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2], + b'\n' | b'\r' => &self.text[..len - 1], + _ => self.text, + }; + + // Find the end of the previous line. The previous line is the text up to, but not including + // the newline character. + let line = match self.text.rfind(['\n', '\r']) { + // '\n' or '\r' or '\r\n' + Some(line_end) => { + let (remainder, line) = self.text.split_at(line_end + 1); + self.text = remainder; + + line + } + // Last line + None => std::mem::take(&mut self.text), + }; + + Some(line) + } +} + +impl FusedIterator for UniversalNewlineIterator<'_> {} + +/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line. +pub struct NewlineWithTrailingNewline<'a> { + trailing: Option<&'a str>, + underlying: UniversalNewlineIterator<'a>, +} + +impl<'a> NewlineWithTrailingNewline<'a> { + pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> { + NewlineWithTrailingNewline { + underlying: UniversalNewlineIterator::from(input), + trailing: if input.ends_with(['\r', '\n']) { + Some("") + } else { + None + }, + } + } +} + +impl<'a> Iterator for NewlineWithTrailingNewline<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.underlying.next().or_else(|| self.trailing.take()) + } +} + +#[cfg(test)] +mod tests { + use super::UniversalNewlineIterator; + + #[test] + fn universal_newlines_empty_str() { + let lines: Vec<_> = UniversalNewlineIterator::from("").collect(); + assert_eq!(lines, Vec::<&str>::default()); + + let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect(); + assert_eq!(lines, Vec::<&str>::default()); + } + + #[test] + fn universal_newlines_forward() { + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect(); + assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect(); + assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop"]); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect(); + assert_eq!(lines, vec!["foo", "bar", "", "baz", "bop", ""]); + } + + #[test] + fn universal_newlines_backwards() { + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop") + .rev() + .collect(); + assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]); + + let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n") + .rev() + .collect(); + + assert_eq!(lines, vec!["bop", "baz", "", "bar", "foo"]); + } + + #[test] + fn universal_newlines_mixed() { + let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop"); + + assert_eq!(lines.next_back(), Some("bop")); + assert_eq!(lines.next(), Some("foo")); + assert_eq!(lines.next_back(), Some("baz")); + assert_eq!(lines.next(), Some("bar")); + assert_eq!(lines.next_back(), Some("")); + assert_eq!(lines.next(), None); + } +} diff --git a/crates/ruff_python_ast/src/source_code/locator.rs b/crates/ruff_python_ast/src/source_code/locator.rs index 035110f78ed671..9acf3a95db894c 100644 --- a/crates/ruff_python_ast/src/source_code/locator.rs +++ b/crates/ruff_python_ast/src/source_code/locator.rs @@ -56,10 +56,18 @@ impl<'a> Locator<'a> { self.contents } + /// Return the number of lines in the source code. + pub fn count_lines(&self) -> usize { + let index = self.get_or_init_index(); + index.count_lines() + } + + /// Return the number of bytes in the source code. pub const fn len(&self) -> usize { self.contents.len() } + /// Return `true` if the source code is empty. pub const fn is_empty(&self) -> bool { self.contents.is_empty() } @@ -83,6 +91,14 @@ impl Index { Index::Utf8(utf8) => utf8.byte_offset(location, contents), } } + + /// Return the number of lines in the source code. + fn count_lines(&self) -> usize { + match self { + Index::Ascii(ascii) => ascii.line_start_byte_offsets.len(), + Index::Utf8(utf8) => utf8.line_start_byte_offsets.len(), + } + } } impl From<&str> for Index { diff --git a/crates/ruff_python_ast/src/str.rs b/crates/ruff_python_ast/src/str.rs index 1a332b474cfe6a..34651f00afeb54 100644 --- a/crates/ruff_python_ast/src/str.rs +++ b/crates/ruff_python_ast/src/str.rs @@ -40,19 +40,18 @@ pub fn raw_contents(contents: &str) -> &str { /// Return the leading quote for a string or byte literal (e.g., `"""`). pub fn leading_quote(content: &str) -> Option<&str> { - if let Some(first_line) = content.lines().next() { - for pattern in TRIPLE_QUOTE_STR_PREFIXES - .iter() - .chain(TRIPLE_QUOTE_BYTE_PREFIXES) - .chain(SINGLE_QUOTE_STR_PREFIXES) - .chain(SINGLE_QUOTE_BYTE_PREFIXES) - { - if first_line.starts_with(pattern) { - return Some(pattern); + TRIPLE_QUOTE_STR_PREFIXES + .iter() + .chain(TRIPLE_QUOTE_BYTE_PREFIXES) + .chain(SINGLE_QUOTE_STR_PREFIXES) + .chain(SINGLE_QUOTE_BYTE_PREFIXES) + .find_map(|pattern| { + if content.starts_with(pattern) { + Some(*pattern) + } else { + None } - } - } - None + }) } /// Return the trailing quote string for a string or byte literal (e.g., `"""`). diff --git a/crates/ruff_python_ast/src/whitespace.rs b/crates/ruff_python_ast/src/whitespace.rs index 8a779421c14b47..64bdc35c8c85e0 100644 --- a/crates/ruff_python_ast/src/whitespace.rs +++ b/crates/ruff_python_ast/src/whitespace.rs @@ -1,5 +1,3 @@ -use std::str::Lines; - use rustpython_parser::ast::{Located, Location}; use crate::source_code::Locator; @@ -39,38 +37,3 @@ pub fn clean(indentation: &str) -> String { .map(|char| if char.is_whitespace() { char } else { ' ' }) .collect() } - -/// Like `str#lines`, but includes a trailing newline as an empty line. -pub struct LinesWithTrailingNewline<'a> { - trailing: Option<&'a str>, - underlying: Lines<'a>, -} - -impl<'a> LinesWithTrailingNewline<'a> { - pub fn from(input: &'a str) -> LinesWithTrailingNewline<'a> { - LinesWithTrailingNewline { - underlying: input.lines(), - trailing: if input.ends_with('\n') { - Some("") - } else { - None - }, - } - } -} - -impl<'a> Iterator for LinesWithTrailingNewline<'a> { - type Item = &'a str; - - #[inline] - fn next(&mut self) -> Option<&'a str> { - let mut next = self.underlying.next(); - if next.is_none() { - if self.trailing.is_some() { - next = self.trailing; - self.trailing = None; - } - } - next - } -} diff --git a/crates/ruff_python_formatter/src/cst/helpers.rs b/crates/ruff_python_formatter/src/cst/helpers.rs index f25a9dd903e8a6..19b8cdb7fa05fb 100644 --- a/crates/ruff_python_formatter/src/cst/helpers.rs +++ b/crates/ruff_python_formatter/src/cst/helpers.rs @@ -1,5 +1,6 @@ use rustpython_parser::ast::Location; +use ruff_python_ast::newlines::StrExt; use ruff_python_ast::source_code::Locator; use ruff_python_ast::types::Range; @@ -96,7 +97,11 @@ pub fn expand_indented_block( // Compound statement: from the colon to the end of the block. let mut offset = 0; - for (index, line) in contents[end_index..].lines().skip(1).enumerate() { + for (index, line) in contents[end_index..] + .universal_newlines() + .skip(1) + .enumerate() + { if line.is_empty() { continue; }