Skip to content

Commit

Permalink
[ruff] Mark fixes for unsorted-dunder-all and `unsorted-dunder-sl…
Browse files Browse the repository at this point in the history
…ots` as unsafe when there are complex comments in the sequence (`RUF022`, `RUF023`) (#14560)
  • Loading branch information
AlexWaygood authored Nov 24, 2024
1 parent e5c7d87 commit ac23c99
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 87 deletions.
70 changes: 70 additions & 0 deletions crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
use std::borrow::Cow;
use std::cmp::Ordering;

use itertools::Itertools;

use ruff_python_ast as ast;
use ruff_python_codegen::Stylist;
use ruff_python_parser::{TokenKind, Tokens};
Expand Down Expand Up @@ -314,6 +316,52 @@ impl<'a> SortClassification<'a> {
}
}

/// The complexity of the comments in a multiline sequence.
///
/// A sequence like this has "simple" comments: it's unambiguous
/// which item each comment refers to, so there's no "risk" in sorting it:
///
/// ```py
/// __all__ = [
/// "foo", # comment1
/// "bar", # comment2
/// ]
/// ```
///
/// This sequence has complex comments: we can't safely autofix the sort here,
/// as the own-line comments might be used to create sections in `__all__`:
///
/// ```py
/// __all__ = [
/// # fooey things
/// "foo1",
/// "foo2",
/// # barey things
/// "bar1",
/// "foobar",
/// ]
/// ```
///
/// This sequence also has complex comments -- it's ambiguous which item
/// each comment should belong to:
///
/// ```py
/// __all__ = [
/// "foo1", "foo", "barfoo", # fooey things
/// "baz", bazz2", "fbaz", # barrey things
/// ]
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub(super) enum CommentComplexity {
Simple,
Complex,
}

impl CommentComplexity {
pub(super) const fn is_complex(self) -> bool {
matches!(self, CommentComplexity::Complex)
}
}

// An instance of this struct encapsulates an analysis
/// of a multiline Python tuple/list that represents an
/// `__all__`/`__slots__`/etc. definition or augmentation.
Expand All @@ -328,6 +376,24 @@ impl<'a> MultilineStringSequenceValue<'a> {
self.items.len()
}

/// Determine the [`CommentComplexity`] of this multiline string sequence.
pub(super) fn comment_complexity(&self) -> CommentComplexity {
if self.items.iter().tuple_windows().any(|(first, second)| {
first.has_own_line_comments()
|| first
.end_of_line_comments
.is_some_and(|end_line_comment| second.start() < end_line_comment.end())
}) || self
.items
.last()
.is_some_and(StringSequenceItem::has_own_line_comments)
{
CommentComplexity::Complex
} else {
CommentComplexity::Simple
}
}

/// Analyse the source range for a multiline Python tuple/list that
/// represents an `__all__`/`__slots__`/etc. definition or augmentation.
/// Return `None` if the analysis fails for whatever reason.
Expand Down Expand Up @@ -792,6 +858,10 @@ impl<'a> StringSequenceItem<'a> {
fn with_no_comments(value: &'a str, element_range: TextRange) -> Self {
Self::new(value, vec![], element_range, None)
}

fn has_own_line_comments(&self) -> bool {
!self.preceding_comment_ranges.is_empty()
}
}

impl Ranged for StringSequenceItem<'_> {
Expand Down
104 changes: 65 additions & 39 deletions crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast as ast;
use ruff_source_file::LineRanges;
Expand Down Expand Up @@ -54,19 +54,40 @@ use crate::rules::ruff::rules::sequence_sorting::{
/// ```
///
/// ## Fix safety
/// This rule's fix is marked as always being safe, in that
/// This rule's fix is marked as unsafe if there are any comments that take up
/// a whole line by themselves inside the `__all__` definition, for example:
/// ```py
/// __all__ = [
/// # eggy things
/// "duck_eggs",
/// "chicken_eggs",
/// # hammy things
/// "country_ham",
/// "parma_ham",
/// ]
/// ```
///
/// This is a common pattern used to delimit categories within a module's API,
/// but it would be out of the scope of this rule to attempt to maintain these
/// categories when alphabetically sorting the items of `__all__`.
///
/// The fix is also marked as unsafe if there are more than two `__all__` items
/// on a single line and that line also has a trailing comment, since here it
/// is impossible to accurately gauge which item the comment should be moved
/// with when sorting `__all__`:
/// ```py
/// __all__ = [
/// "a", "c", "e", # a comment
/// "b", "d", "f", # a second comment
/// ]
/// ```
///
/// Other than this, the rule's fix is marked as always being safe, in that
/// it should very rarely alter the semantics of any Python code.
/// However, note that (although it's rare) the value of `__all__`
/// could be read by code elsewhere that depends on the exact
/// iteration order of the items in `__all__`, in which case this
/// rule's fix could theoretically cause breakage.
///
/// Note also that for multiline `__all__` definitions
/// that include comments on their own line, it can be hard
/// to tell where the comments should be moved to when sorting
/// the contents of `__all__`. While this rule's fix will
/// never delete a comment, it might *sometimes* move a
/// comment to an unexpected location.
#[violation]
pub struct UnsortedDunderAll;

Expand Down Expand Up @@ -208,37 +229,42 @@ fn create_fix(
let locator = checker.locator();
let is_multiline = locator.contains_line_break(range);

let sorted_source_code = {
// The machinery in the `MultilineDunderAllValue` is actually
// sophisticated enough that it would work just as well for
// single-line `__all__` definitions, and we could reduce
// the number of lines of code in this file by doing that.
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
// must process every token in an `__all__` definition as
// part of its analysis, and this is quite slow. For
// single-line `__all__` definitions, it's also unnecessary,
// as it's impossible to have comments in between the
// `__all__` elements if the `__all__` definition is all on
// a single line. Therefore, as an optimisation, we do the
// bare minimum of token-processing for single-line `__all__`
// definitions:
if is_multiline {
let value = MultilineStringSequenceValue::from_source_range(
range,
kind,
locator,
checker.tokens(),
string_items,
)?;
assert_eq!(value.len(), elts.len());
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist())
// The machinery in the `MultilineDunderAllValue` is actually
// sophisticated enough that it would work just as well for
// single-line `__all__` definitions, and we could reduce
// the number of lines of code in this file by doing that.
// Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
// must process every token in an `__all__` definition as
// part of its analysis, and this is quite slow. For
// single-line `__all__` definitions, it's also unnecessary,
// as it's impossible to have comments in between the
// `__all__` elements if the `__all__` definition is all on
// a single line. Therefore, as an optimisation, we do the
// bare minimum of token-processing for single-line `__all__`
// definitions:
let (sorted_source_code, applicability) = if is_multiline {
let value = MultilineStringSequenceValue::from_source_range(
range,
kind,
locator,
checker.tokens(),
string_items,
)?;
assert_eq!(value.len(), elts.len());
let applicability = if value.comment_complexity().is_complex() {
Applicability::Unsafe
} else {
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE)
}
Applicability::Safe
};
let sorted_source =
value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist());
(sorted_source, applicability)
} else {
let sorted_source =
sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE);
(sorted_source, Applicability::Safe)
};

Some(Fix::safe_edit(Edit::range_replacement(
sorted_source_code,
range,
)))
let edit = Edit::range_replacement(sorted_source_code, range);
Some(Fix::applicable_edit(edit, applicability))
}
92 changes: 65 additions & 27 deletions crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use ruff_text_size::{Ranged, TextRange};

use crate::checkers::ast::Checker;
use crate::rules::ruff::rules::sequence_sorting::{
sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind,
SortClassification, SortingStyle,
sort_single_line_elements_sequence, CommentComplexity, MultilineStringSequenceValue,
SequenceKind, SortClassification, SortingStyle,
};
use crate::Locator;

Expand All @@ -37,24 +37,51 @@ use crate::Locator;
/// ```
///
/// ## Fix safety
/// This rule's fix is marked as unsafe whenever Ruff can detect that code
/// elsewhere in the same file reads the `__slots__` variable in some way.
/// This is because the order of the items in `__slots__` may have semantic
/// significance if the `__slots__` of a class is being iterated over, or
/// being assigned to another value.
/// This rule's fix is marked as unsafe in three situations.
///
/// Firstly, the fix is unsafe if there are any comments that take up
/// a whole line by themselves inside the `__slots__` definition, for example:
/// ```py
/// class Foo:
/// __slots__ = [
/// # eggy things
/// "duck_eggs",
/// "chicken_eggs",
/// # hammy things
/// "country_ham",
/// "parma_ham",
/// ]
/// ```
///
/// This is a common pattern used to delimit categories within a class's slots,
/// but it would be out of the scope of this rule to attempt to maintain these
/// categories when applying a natural sort to the items of `__slots__`.
///
/// Secondly, the fix is also marked as unsafe if there are more than two
/// `__slots__` items on a single line and that line also has a trailing
/// comment, since here it is impossible to accurately gauge which item the
/// comment should be moved with when sorting `__slots__`:
/// ```py
/// class Bar:
/// __slots__ = [
/// "a", "c", "e", # a comment
/// "b", "d", "f", # a second comment
/// ]
/// ```
///
/// Lastly, this rule's fix is marked as unsafe whenever Ruff can detect that
/// code elsewhere in the same file reads the `__slots__` variable in some way
/// and the `__slots__` variable is not assigned to a set. This is because the
/// order of the items in `__slots__` may have semantic significance if the
/// `__slots__` of a class is being iterated over, or being assigned to another
/// value.
///
/// In the vast majority of other cases, this rule's fix is unlikely to
/// cause breakage; as such, Ruff will otherwise mark this rule's fix as
/// safe. However, note that (although it's rare) the value of `__slots__`
/// could still be read by code outside of the module in which the
/// `__slots__` definition occurs, in which case this rule's fix could
/// theoretically cause breakage.
///
/// Additionally, note that for multiline `__slots__` definitions that
/// include comments on their own line, it can be hard to tell where the
/// comments should be moved to when sorting the contents of `__slots__`.
/// While this rule's fix will never delete a comment, it might *sometimes*
/// move a comment to an unexpected location.
#[violation]
pub struct UnsortedDunderSlots {
class_name: ast::name::Name,
Expand Down Expand Up @@ -122,15 +149,17 @@ pub(crate) fn sort_dunder_slots(checker: &Checker, binding: &Binding) -> Option<
);

if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification {
if let Some(sorted_source_code) = display.generate_sorted_source_code(&items, checker) {
if let Some((sorted_source_code, comment_complexity)) =
display.generate_sorted_source_code(&items, checker)
{
let edit = Edit::range_replacement(sorted_source_code, display.range());

let applicability = if display.kind.is_set_literal() || binding.is_unused() {
Applicability::Safe
} else {
let applicability = if comment_complexity.is_complex()
|| (binding.is_used() && !display.kind.is_set_literal())
{
Applicability::Unsafe
} else {
Applicability::Safe
};

diagnostic.set_fix(Fix::applicable_edit(edit, applicability));
}
}
Expand Down Expand Up @@ -219,7 +248,11 @@ impl<'a> StringLiteralDisplay<'a> {
Some(result)
}

fn generate_sorted_source_code(&self, elements: &[&str], checker: &Checker) -> Option<String> {
fn generate_sorted_source_code(
&self,
elements: &[&str],
checker: &Checker,
) -> Option<(String, CommentComplexity)> {
let locator = checker.locator();

let multiline_classification = if locator.contains_line_break(self.range()) {
Expand All @@ -238,26 +271,31 @@ impl<'a> StringLiteralDisplay<'a> {
elements,
)?;
assert_eq!(analyzed_sequence.len(), self.elts.len());
Some(analyzed_sequence.into_sorted_source_code(
let comment_complexity = analyzed_sequence.comment_complexity();
let sorted_code = analyzed_sequence.into_sorted_source_code(
SORTING_STYLE,
locator,
checker.stylist(),
))
);
Some((sorted_code, comment_complexity))
}
// Sorting multiline dicts is unsupported
(DisplayKind::Dict { .. }, MultilineClassification::Multiline) => None,
(DisplayKind::Sequence(sequence_kind), MultilineClassification::SingleLine) => {
Some(sort_single_line_elements_sequence(
let sorted_code = sort_single_line_elements_sequence(
*sequence_kind,
&self.elts,
elements,
locator,
SORTING_STYLE,
))
);
Some((sorted_code, CommentComplexity::Simple))
}
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => {
let sorted_code =
sort_single_line_elements_dict(&self.elts, elements, items, locator);
Some((sorted_code, CommentComplexity::Simple))
}
(DisplayKind::Dict { items }, MultilineClassification::SingleLine) => Some(
sort_single_line_elements_dict(&self.elts, elements, items, locator),
),
}
}
}
Expand Down
Loading

0 comments on commit ac23c99

Please sign in to comment.