[ruff] Mark fixes for unsorted-dunder-all and `unsorted-dunder-sl…

…ots` as unsafe when there are complex comments in the sequence (`RUF022`, `RUF023`) (#14560)
astral-sh · Nov 24, 2024 · ac23c99 · ac23c99
1 parent e5c7d87
commit ac23c99
Show file tree

Hide file tree

Showing 5 changed files with 219 additions and 87 deletions.
diff --git a/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs b/crates/ruff_linter/src/rules/ruff/rules/sequence_sorting.rs
@@ -6,6 +6,8 @@
 use std::borrow::Cow;
 use std::cmp::Ordering;
 
+use itertools::Itertools;
+
 use ruff_python_ast as ast;
 use ruff_python_codegen::Stylist;
 use ruff_python_parser::{TokenKind, Tokens};
@@ -314,6 +316,52 @@ impl<'a> SortClassification<'a> {
     }
 }
 
+/// The complexity of the comments in a multiline sequence.
+///
+/// A sequence like this has "simple" comments: it's unambiguous
+/// which item each comment refers to, so there's no "risk" in sorting it:
+///
+/// ```py
+/// __all__ = [
+///     "foo",  # comment1
+///     "bar",  # comment2
+/// ]
+/// ```
+///
+/// This sequence has complex comments: we can't safely autofix the sort here,
+/// as the own-line comments might be used to create sections in `__all__`:
+///
+/// ```py
+/// __all__ = [
+///     # fooey things
+///     "foo1",
+///     "foo2",
+///     # barey things
+///     "bar1",
+///     "foobar",
+/// ]
+/// ```
+///
+/// This sequence also has complex comments -- it's ambiguous which item
+/// each comment should belong to:
+///
+/// ```py
+/// __all__ = [
+///     "foo1", "foo", "barfoo",  # fooey things
+///     "baz", bazz2", "fbaz",  # barrey things
+/// ]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub(super) enum CommentComplexity {
+    Simple,
+    Complex,
+}
+
+impl CommentComplexity {
+    pub(super) const fn is_complex(self) -> bool {
+        matches!(self, CommentComplexity::Complex)
+    }
+}
+
 // An instance of this struct encapsulates an analysis
 /// of a multiline Python tuple/list that represents an
 /// `__all__`/`__slots__`/etc. definition or augmentation.
@@ -328,6 +376,24 @@ impl<'a> MultilineStringSequenceValue<'a> {
         self.items.len()
     }
 
+    /// Determine the [`CommentComplexity`] of this multiline string sequence.
+    pub(super) fn comment_complexity(&self) -> CommentComplexity {
+        if self.items.iter().tuple_windows().any(|(first, second)| {
+            first.has_own_line_comments()
+                || first
+                    .end_of_line_comments
+                    .is_some_and(|end_line_comment| second.start() < end_line_comment.end())
+        }) || self
+            .items
+            .last()
+            .is_some_and(StringSequenceItem::has_own_line_comments)
+        {
+            CommentComplexity::Complex
+        } else {
+            CommentComplexity::Simple
+        }
+    }
+
     /// Analyse the source range for a multiline Python tuple/list that
     /// represents an `__all__`/`__slots__`/etc. definition or augmentation.
     /// Return `None` if the analysis fails for whatever reason.
@@ -792,6 +858,10 @@ impl<'a> StringSequenceItem<'a> {
     fn with_no_comments(value: &'a str, element_range: TextRange) -> Self {
         Self::new(value, vec![], element_range, None)
     }
+
+    fn has_own_line_comments(&self) -> bool {
+        !self.preceding_comment_ranges.is_empty()
+    }
 }
 
 impl Ranged for StringSequenceItem<'_> {

diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_all.rs
@@ -1,4 +1,4 @@
-use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
+use ruff_diagnostics::{Applicability, Diagnostic, Edit, Fix, FixAvailability, Violation};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast as ast;
 use ruff_source_file::LineRanges;
@@ -54,19 +54,40 @@ use crate::rules::ruff::rules::sequence_sorting::{
 /// ```
 ///
 /// ## Fix safety
-/// This rule's fix is marked as always being safe, in that
+/// This rule's fix is marked as unsafe if there are any comments that take up
+/// a whole line by themselves inside the `__all__` definition, for example:
+/// ```py
+/// __all__ = [
+///     # eggy things
+///     "duck_eggs",
+///     "chicken_eggs",
+///     # hammy things
+///     "country_ham",
+///     "parma_ham",
+/// ]
+/// ```
+///
+/// This is a common pattern used to delimit categories within a module's API,
+/// but it would be out of the scope of this rule to attempt to maintain these
+/// categories when alphabetically sorting the items of `__all__`.
+///
+/// The fix is also marked as unsafe if there are more than two `__all__` items
+/// on a single line and that line also has a trailing comment, since here it
+/// is impossible to accurately gauge which item the comment should be moved
+/// with when sorting `__all__`:
+/// ```py
+/// __all__ = [
+///     "a", "c", "e",  # a comment
+///     "b", "d", "f",  # a second  comment
+/// ]
+/// ```
+///
+/// Other than this, the rule's fix is marked as always being safe, in that
 /// it should very rarely alter the semantics of any Python code.
 /// However, note that (although it's rare) the value of `__all__`
 /// could be read by code elsewhere that depends on the exact
 /// iteration order of the items in `__all__`, in which case this
 /// rule's fix could theoretically cause breakage.
-///
-/// Note also that for multiline `__all__` definitions
-/// that include comments on their own line, it can be hard
-/// to tell where the comments should be moved to when sorting
-/// the contents of `__all__`. While this rule's fix will
-/// never delete a comment, it might *sometimes* move a
-/// comment to an unexpected location.
 #[violation]
 pub struct UnsortedDunderAll;
 
@@ -208,37 +229,42 @@ fn create_fix(
     let locator = checker.locator();
     let is_multiline = locator.contains_line_break(range);
 
-    let sorted_source_code = {
-        // The machinery in the `MultilineDunderAllValue` is actually
-        // sophisticated enough that it would work just as well for
-        // single-line `__all__` definitions, and we could reduce
-        // the number of lines of code in this file by doing that.
-        // Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
-        // must process every token in an `__all__` definition as
-        // part of its analysis, and this is quite slow. For
-        // single-line `__all__` definitions, it's also unnecessary,
-        // as it's impossible to have comments in between the
-        // `__all__` elements if the `__all__` definition is all on
-        // a single line. Therefore, as an optimisation, we do the
-        // bare minimum of token-processing for single-line `__all__`
-        // definitions:
-        if is_multiline {
-            let value = MultilineStringSequenceValue::from_source_range(
-                range,
-                kind,
-                locator,
-                checker.tokens(),
-                string_items,
-            )?;
-            assert_eq!(value.len(), elts.len());
-            value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist())
+    // The machinery in the `MultilineDunderAllValue` is actually
+    // sophisticated enough that it would work just as well for
+    // single-line `__all__` definitions, and we could reduce
+    // the number of lines of code in this file by doing that.
+    // Unfortunately, however, `MultilineDunderAllValue::from_source_range()`
+    // must process every token in an `__all__` definition as
+    // part of its analysis, and this is quite slow. For
+    // single-line `__all__` definitions, it's also unnecessary,
+    // as it's impossible to have comments in between the
+    // `__all__` elements if the `__all__` definition is all on
+    // a single line. Therefore, as an optimisation, we do the
+    // bare minimum of token-processing for single-line `__all__`
+    // definitions:
+    let (sorted_source_code, applicability) = if is_multiline {
+        let value = MultilineStringSequenceValue::from_source_range(
+            range,
+            kind,
+            locator,
+            checker.tokens(),
+            string_items,
+        )?;
+        assert_eq!(value.len(), elts.len());
+        let applicability = if value.comment_complexity().is_complex() {
+            Applicability::Unsafe
         } else {
-            sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE)
-        }
+            Applicability::Safe
+        };
+        let sorted_source =
+            value.into_sorted_source_code(SORTING_STYLE, locator, checker.stylist());
+        (sorted_source, applicability)
+    } else {
+        let sorted_source =
+            sort_single_line_elements_sequence(kind, elts, string_items, locator, SORTING_STYLE);
+        (sorted_source, Applicability::Safe)
     };
 
-    Some(Fix::safe_edit(Edit::range_replacement(
-        sorted_source_code,
-        range,
-    )))
+    let edit = Edit::range_replacement(sorted_source_code, range);
+    Some(Fix::applicable_edit(edit, applicability))
 }
diff --git a/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs b/crates/ruff_linter/src/rules/ruff/rules/sort_dunder_slots.rs
@@ -11,8 +11,8 @@ use ruff_text_size::{Ranged, TextRange};
 
 use crate::checkers::ast::Checker;
 use crate::rules::ruff::rules::sequence_sorting::{
-    sort_single_line_elements_sequence, MultilineStringSequenceValue, SequenceKind,
-    SortClassification, SortingStyle,
+    sort_single_line_elements_sequence, CommentComplexity, MultilineStringSequenceValue,
+    SequenceKind, SortClassification, SortingStyle,
 };
 use crate::Locator;
 
@@ -37,24 +37,51 @@ use crate::Locator;
 /// ```
 ///
 /// ## Fix safety
-/// This rule's fix is marked as unsafe whenever Ruff can detect that code
-/// elsewhere in the same file reads the `__slots__` variable in some way.
-/// This is because the order of the items in `__slots__` may have semantic
-/// significance if the `__slots__` of a class is being iterated over, or
-/// being assigned to another value.
+/// This rule's fix is marked as unsafe in three situations.
+///
+/// Firstly, the fix is unsafe if there are any comments that take up
+/// a whole line by themselves inside the `__slots__` definition, for example:
+/// ```py
+/// class Foo:
+///     __slots__ = [
+///         # eggy things
+///         "duck_eggs",
+///         "chicken_eggs",
+///         # hammy things
+///         "country_ham",
+///         "parma_ham",
+///     ]
+/// ```
+///
+/// This is a common pattern used to delimit categories within a class's slots,
+/// but it would be out of the scope of this rule to attempt to maintain these
+/// categories when applying a natural sort to the items of `__slots__`.
+///
+/// Secondly, the fix is also marked as unsafe if there are more than two
+/// `__slots__` items on a single line and that line also has a trailing
+/// comment, since here it is impossible to accurately gauge which item the
+/// comment should be moved with when sorting `__slots__`:
+/// ```py
+/// class Bar:
+///     __slots__ = [
+///         "a", "c", "e",  # a comment
+///         "b", "d", "f",  # a second  comment
+///     ]
+/// ```
+///
+/// Lastly, this rule's fix is marked as unsafe whenever Ruff can detect that
+/// code elsewhere in the same file reads the `__slots__` variable in some way
+/// and the `__slots__` variable is not assigned to a set. This is because the
+/// order of the items in `__slots__` may have semantic significance if the
+/// `__slots__` of a class is being iterated over, or being assigned to another
+/// value.
 ///
 /// In the vast majority of other cases, this rule's fix is unlikely to
 /// cause breakage; as such, Ruff will otherwise mark this rule's fix as
 /// safe. However, note that (although it's rare) the value of `__slots__`
 /// could still be read by code outside of the module in which the
 /// `__slots__` definition occurs, in which case this rule's fix could
 /// theoretically cause breakage.
-///
-/// Additionally, note that for multiline `__slots__` definitions that
-/// include comments on their own line, it can be hard to tell where the
-/// comments should be moved to when sorting the contents of `__slots__`.
-/// While this rule's fix will never delete a comment, it might *sometimes*
-/// move a comment to an unexpected location.
 #[violation]
 pub struct UnsortedDunderSlots {
     class_name: ast::name::Name,
@@ -122,15 +149,17 @@ pub(crate) fn sort_dunder_slots(checker: &Checker, binding: &Binding) -> Option<
     );
 
     if let SortClassification::UnsortedAndMaybeFixable { items } = sort_classification {
-        if let Some(sorted_source_code) = display.generate_sorted_source_code(&items, checker) {
+        if let Some((sorted_source_code, comment_complexity)) =
+            display.generate_sorted_source_code(&items, checker)
+        {
             let edit = Edit::range_replacement(sorted_source_code, display.range());
-
-            let applicability = if display.kind.is_set_literal() || binding.is_unused() {
-                Applicability::Safe
-            } else {
+            let applicability = if comment_complexity.is_complex()
+                || (binding.is_used() && !display.kind.is_set_literal())
+            {
                 Applicability::Unsafe
+            } else {
+                Applicability::Safe
             };
-
             diagnostic.set_fix(Fix::applicable_edit(edit, applicability));
         }
     }
@@ -219,7 +248,11 @@ impl<'a> StringLiteralDisplay<'a> {
         Some(result)
     }
 
-    fn generate_sorted_source_code(&self, elements: &[&str], checker: &Checker) -> Option<String> {
+    fn generate_sorted_source_code(
+        &self,
+        elements: &[&str],
+        checker: &Checker,
+    ) -> Option<(String, CommentComplexity)> {
         let locator = checker.locator();
 
         let multiline_classification = if locator.contains_line_break(self.range()) {
@@ -238,26 +271,31 @@ impl<'a> StringLiteralDisplay<'a> {
                     elements,
                 )?;
                 assert_eq!(analyzed_sequence.len(), self.elts.len());
-                Some(analyzed_sequence.into_sorted_source_code(
+                let comment_complexity = analyzed_sequence.comment_complexity();
+                let sorted_code = analyzed_sequence.into_sorted_source_code(
                     SORTING_STYLE,
                     locator,
                     checker.stylist(),
-                ))
+                );
+                Some((sorted_code, comment_complexity))
             }
             // Sorting multiline dicts is unsupported
             (DisplayKind::Dict { .. }, MultilineClassification::Multiline) => None,
             (DisplayKind::Sequence(sequence_kind), MultilineClassification::SingleLine) => {
-                Some(sort_single_line_elements_sequence(
+                let sorted_code = sort_single_line_elements_sequence(
                     *sequence_kind,
                     &self.elts,
                     elements,
                     locator,
                     SORTING_STYLE,
-                ))
+                );
+                Some((sorted_code, CommentComplexity::Simple))
+            }
+            (DisplayKind::Dict { items }, MultilineClassification::SingleLine) => {
+                let sorted_code =
+                    sort_single_line_elements_dict(&self.elts, elements, items, locator);
+                Some((sorted_code, CommentComplexity::Simple))
             }
-            (DisplayKind::Dict { items }, MultilineClassification::SingleLine) => Some(
-                sort_single_line_elements_dict(&self.elts, elements, items, locator),
-            ),
         }
     }
 }