Bug: `CPY001` sneaking through `ruff check --statistics` #7805

jamesbraza · 2023-10-04T07:05:41Z

With ruff==0.0.292, check the below Python script designed to make a select with no work:

"""Hack script to output ruff codes for select, using Python 3.8+."""

from __future__ import annotations

import collections
import json
import shutil
import string
import subprocess
from collections.abc import Collection
from typing import Any

RUFF_PATH = shutil.which("ruff")


def decompose_code(code: str) -> tuple[str, int]:
    """Convert a code like ABC123 to a tuple of ("ABC", 123)."""
    code_text = code.rstrip(string.digits)
    return code_text, int(code[len(code_text) :])


def group_code_text(
    codes: Collection[str],
    text_using_first_digit: bool = False,
) -> dict[str, list[int]]:
    code_text_to_numbers = collections.defaultdict(list)
    for code in sorted(codes):
        text, number = decompose_code(code)
        if text_using_first_digit:
            text = code[: len(text) + 1]
            number = int(code[len(text) :])
        code_text_to_numbers[text].append(number)
    return code_text_to_numbers


def stringify(
    all_rules: list[dict[str, Any]],
    statistics_rules: list[dict[str, Any]],
    autofix_only: bool = True,
) -> str:
    """Convert `ruff rule` and `ruff check --statistics` outputs to a string."""
    # 1. Compute raw delta
    all_codes = {
        blob["code"]
        for blob in all_rules
    }
    nonpreview_codes = {
        blob["code"]
        for blob in all_rules
        if not blob["preview"]
        if ((autofix_only and "always" in blob["fix"]) or not autofix_only)
    }
    erroring_nonpreview_codes = {
        blob["code"]
        for blob in statistics_rules
        if blob["code"] in nonpreview_codes and blob["count"] > 0
    }
    difference = nonpreview_codes - erroring_nonpreview_codes

    # 2. Group on text
    all_code_text_to_numbers = group_code_text(all_codes)
    all_code_text_to_fill = {
        text: max(*(len(str(n)) for n in nums), 3)
        for text, nums in all_code_text_to_numbers.items()
    }
    nonpreview_text_to_numbers = {
        text: nums
        for text, nums in group_code_text(nonpreview_codes).items()
        if nums == all_code_text_to_numbers[text]
    }
    for text, nums in group_code_text(codes=difference).items():
        if nums != nonpreview_text_to_numbers.get(text, []):
            continue
        fill_size = all_code_text_to_fill[text]
        for num in nums:
            difference.remove(text + str(num).zfill(fill_size))
        difference.add(text)

    # 3. Group on text + first digit
    all_code_text_to_numbers = group_code_text(all_codes, text_using_first_digit=True)
    nonpreview_text_to_numbers = {
        text: nums
        for text, nums in group_code_text(
            nonpreview_codes,
            text_using_first_digit=True,
        ).items()
        if nums == all_code_text_to_numbers[text]
    }
    for text, nums in group_code_text(
        codes={code for code in difference if not code.isalpha()},
        text_using_first_digit=True,
    ).items():
        if nums != nonpreview_text_to_numbers.get(text, []):
            continue
        fill_size = all_code_text_to_fill[text[:-1]] - 1
        for num in nums:
            difference.remove(text + str(num).zfill(fill_size))
        difference.add(text)

    # 4. Stringify
    stringified = '",\n    "'.join(sorted(difference))
    return f"""select = [\n    "{stringified}",\n]"""


def main() -> None:
    all_rules = json.loads(
        subprocess.check_output((RUFF_PATH, "rule", "--format=json", "--all")).decode(),
    )
    subprocess.check_call((RUFF_PATH, "check", "--select=ALL", "--fix-only", "."))
    out = subprocess.run(
        (
            RUFF_PATH,
            "check",
            "--select=ALL",
            "--statistics",
            "--output-format=json",
            ".",
        ),
        capture_output=True,
    )
    if out.stderr or out.returncode != 1:
        raise NotImplementedError(
            f"Unhandled return code {out.returncode} or stderr {out.stderr.decode()!r}."
        )
    print(stringify(all_rules, statistics_rules=json.loads(out.stdout.decode())))


if __name__ == "__main__":
    main()

However,CPY001 (and multiple others) are somehow sneaking through the ruff check --statistics, because they're not showing up as having errors (e.g. it's neglected in the output). Any ideas why?

The text was updated successfully, but these errors were encountered:

charliermarsh · 2023-10-04T13:25:21Z

I think you need to add --preview to the ruff check call?

jamesbraza · 2023-10-04T17:04:00Z

This was somehow fixed by #7812, so going to close this out

charliermarsh added question Asking for support or clarification needs-info More information is needed from the issue author labels Oct 4, 2023

jamesbraza closed this as completed Oct 4, 2023

jamesbraza mentioned this issue Oct 4, 2023

More complete ruff rule set run-llama/llama_index#7970

Merged

4 tasks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Bug: `CPY001` sneaking through `ruff check --statistics` #7805

Bug: `CPY001` sneaking through `ruff check --statistics` #7805

jamesbraza commented Oct 4, 2023 •

edited

Loading

charliermarsh commented Oct 4, 2023

jamesbraza commented Oct 4, 2023

Bug: CPY001 sneaking through ruff check --statistics #7805

Bug: CPY001 sneaking through ruff check --statistics #7805

Comments

jamesbraza commented Oct 4, 2023 • edited Loading

charliermarsh commented Oct 4, 2023

jamesbraza commented Oct 4, 2023

Bug: `CPY001` sneaking through `ruff check --statistics` #7805

Bug: `CPY001` sneaking through `ruff check --statistics` #7805

jamesbraza commented Oct 4, 2023 •

edited

Loading