diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bdd6f66..a1d018c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, macOS-10.15, windows-2019] - python-version: [3.6, 3.7, 3.8, 3.9, "3.10", 3.11, 3.12.0-rc.2] + python-version: [3.8, 3.9, "3.10", 3.11, 3.12.0-rc.2] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e9a097..ff40c44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,12 @@ +# (Fork) 0.0.8 (2024-06-23) + +- Added support for parsing attributes from Sphinx-style docstrings +- Dropped support for Python 3.6 because it doesn't support data classes + # (Fork) 0.0.7 (2024-06-22) - Made "Attributes" a separate section from "Parameters" (for Google, Numpy, and Sphinx - styles) + styles) # (Fork) 0.0.6 (2024-06-22) @@ -24,6 +29,8 @@ # (Fork) 0.0.3 (2023-08-28) - Google, Numpy, Sphinx: Make "Yields" an official parsed section (`DocstringYields`) + - This corresponds to a PR in the upstream repo that was open + since June 2023 (https://github.com/rr-/docstring_parser/pull/79) # (Fork) 0.0.2 (2023-08-26) @@ -33,7 +40,7 @@ # (Fork) 0.0.1 (2023-08-18) -- Google: Fixed a bug where union style return types (such as `int | str`) are not parsed correctly +- Google: Fixed a bug where union style return types (such as `int | str`) are not parsed correctly (https://github.com/rr-/docstring_parser/issues/81) # 0.15 (2022-09-05) diff --git a/README.md b/README.md index 443c100..f7f7c87 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,12 @@ docstring_parser_fork ================ -This is a fork of [docstring_parser](https://github.com/rr-/docstring_parser). The reason I'm forking that is to quickly get some bug fixes out for users of [pydoclint](https://github.com/jsh9/pydoclint). +This is a fork of [docstring_parser](https://github.com/rr-/docstring_parser). + +This fork fixes bugs that the upstream library has not fixed, and it also +offers additional functionalities. To inspect the difference between this +fort and the upstream, go to [CHANGELOG.md](./CHANGELOG.md) and read the +entries that start with "(Fork)". ------ diff --git a/docstring_parser/rest.py b/docstring_parser/rest.py index 86cd931..02f99a7 100644 --- a/docstring_parser/rest.py +++ b/docstring_parser/rest.py @@ -6,7 +6,6 @@ from .common import ( DEPRECATION_KEYWORDS, - ATTR_KEYWORDS, PARAM_KEYWORDS, RAISES_KEYWORDS, RETURNS_KEYWORDS, @@ -24,11 +23,13 @@ RenderingStyle, ) +from docstring_parser.rest_attr_parser import Attribute, parse_attributes + def _build_meta(args: T.List[str], desc: str) -> DocstringMeta: key = args[0] - if key in PARAM_KEYWORDS | ATTR_KEYWORDS: + if key in PARAM_KEYWORDS: if len(args) == 3: key, type_name, arg_name = args if type_name.endswith("?"): @@ -48,11 +49,7 @@ def _build_meta(args: T.List[str], desc: str) -> DocstringMeta: match = re.match(r".*defaults to (.+)", desc, flags=re.DOTALL) default = match.group(1).rstrip(".") if match else None - DocstringSectionType = ( - DocstringParam if key in PARAM_KEYWORDS else DocstringAttr - ) - - return DocstringSectionType( + return DocstringParam( args=args, description=desc, arg_name=arg_name, @@ -133,6 +130,21 @@ def parse(text: str) -> Docstring: return ret text = inspect.cleandoc(text) + + parsed_attrs: T.List[Attribute] + line_nums_with_attrs: T.List[int] + parsed_attrs, line_nums_with_attrs = parse_attributes(text) + + # Exclude lines with attributes, because they can interfere with + # other contents + text_lines: T.List[str] = text.split('\n') + lines_without_attr = [] + for i, line in enumerate(text_lines): + if i not in line_nums_with_attrs: + lines_without_attr.append(line) + + text = '\n'.join(lines_without_attr) + match = re.search("^:", text, flags=re.M) if match: desc_chunk = text[: match.start()] @@ -201,6 +213,20 @@ def parse(text: str) -> Docstring: ) ) + + + ret.meta.extend([ + DocstringAttr( + args=['attr', _.name], + description=_.description, + arg_name=_.name, + type_name=_.type, + is_optional=None, + default=None, + ) + for _ in parsed_attrs + ]) + return ret diff --git a/docstring_parser/rest_attr_parser.py b/docstring_parser/rest_attr_parser.py new file mode 100644 index 0000000..93ecb46 --- /dev/null +++ b/docstring_parser/rest_attr_parser.py @@ -0,0 +1,116 @@ +"""Parser for attributes in ReST-style docstrings""" +from typing import List, Optional, Tuple +from dataclasses import dataclass + +@dataclass +class Attribute: + name: str + type: Optional[str] = None + description: Optional[str] = None + + +def parse_attributes(docstring: str) -> Tuple[List[Attribute], List[int]]: + attributes = [] + lines = docstring.split('\n') + + current_attr_lines = [] + current_attr_line_nums = [] + inside_attribute_block = False + + all_line_nums_with_attr: List[int] = [] + + for i, line in enumerate(lines): + stripped_line = line.strip() + + if stripped_line.startswith(".. attribute ::"): + if current_attr_lines: + attrs, line_nums_with_actual_attr = parse_attribute_block( + current_attr_lines, current_attr_line_nums + ) + attributes.append(attrs) + all_line_nums_with_attr.extend(line_nums_with_actual_attr) + current_attr_lines = [] + current_attr_line_nums = [] + + inside_attribute_block = True + current_attr_lines.append(line) + current_attr_line_nums.append(i) + elif inside_attribute_block: + if not stripped_line and current_attr_lines: + # Check if the next line is also blank indicating end of block + if current_attr_lines[-1].strip() == '': + inside_attribute_block = False + attrs, line_nums_with_actual_attr = parse_attribute_block( + current_attr_lines, current_attr_line_nums + ) + attributes.append(attrs) + all_line_nums_with_attr.extend(line_nums_with_actual_attr) + current_attr_lines = [] + current_attr_line_nums = [] + + current_attr_lines.append(line) + current_attr_line_nums.append(i) + elif stripped_line.startswith(":") and current_attr_lines: + # End the current attribute block if a new param or similar + # is detected + inside_attribute_block = False + attrs, line_nums_with_actual_attr = parse_attribute_block( + current_attr_lines, current_attr_line_nums + ) + all_line_nums_with_attr.extend(line_nums_with_actual_attr) + attributes.append(attrs) + current_attr_lines = [] + current_attr_line_nums = [] + + if current_attr_lines: + attrs, line_nums_with_actual_attr = parse_attribute_block( + current_attr_lines, current_attr_line_nums + ) + attributes.append(attrs) + all_line_nums_with_attr.extend(line_nums_with_actual_attr) + + return attributes, all_line_nums_with_attr + + +def parse_attribute_block( + lines: List[str], + global_line_nums: List[int], +) -> Tuple[Attribute, List[int]]: + name = None + type_ = None + description = [] + description_started = False + + line_nums_with_actual_attr: List[int] = [] + lines_with_actual_attr: List[str] = [] + + # Get the base indentation level from the first line + base_indent_level = len(lines[0]) - len(lines[0].lstrip()) + + for j, line in zip(global_line_nums, lines): + stripped_line = line.strip() + current_indent_level = len(line) - len(line.lstrip()) + + if stripped_line.startswith(".. attribute ::"): + name = stripped_line[len(".. attribute ::"):].strip() + lines_with_actual_attr.append(line) + line_nums_with_actual_attr.append(j) + elif stripped_line.startswith(":type:"): + type_ = stripped_line[len(":type:"):].strip() + lines_with_actual_attr.append(line) + line_nums_with_actual_attr.append(j) + elif current_indent_level > base_indent_level: + # Include in the description if it has greater indentation or + # description has already started + if stripped_line or description_started: + description_started = True + description.append(stripped_line) + lines_with_actual_attr.append(line) + line_nums_with_actual_attr.append(j) + + # Clean up the description, removing leading/trailing empty lines + description_text = '\n'.join(description).strip() if description else None + + attr = Attribute(name=name, type=type_, description=description_text) + + return attr, line_nums_with_actual_attr diff --git a/docstring_parser/tests/test_parser.py b/docstring_parser/tests/test_parser.py index 932ae81..b3744a4 100644 --- a/docstring_parser/tests/test_parser.py +++ b/docstring_parser/tests/test_parser.py @@ -20,8 +20,12 @@ def test_rest() -> None: :param spam: spam desc :param int bla: bla desc :param str yay: - :attr hello: hello world - :type hello: bool + + .. attribute :: hello + :type: bool + + hello world + :raises ValueError: exc desc :returns tuple: ret desc """ diff --git a/docstring_parser/tests/test_rest_attr_parser.py b/docstring_parser/tests/test_rest_attr_parser.py new file mode 100644 index 0000000..65c9a10 --- /dev/null +++ b/docstring_parser/tests/test_rest_attr_parser.py @@ -0,0 +1,114 @@ +from typing import List + +import pytest + +from docstring_parser.rest_attr_parser import parse_attributes, Attribute + + +@pytest.mark.parametrize( + 'docstring, expected_attributes, expected_lines_with_attributes', + [ + ( + '', + [], + [], + ), + ( + """ + My Class + + :param name: My name + :type name: str + """, + [], + [], + ), + ( + """ + My Class + + .. attribute :: attr_1 + :type: str + + .. attribute :: attr_2 + :type: bool + + Attr 2 + + .. attribute :: attr_3 + + Attr 3 + + .. attribute :: attr_4 + .. attribute :: attr_5 + .. attribute :: attr_6 + :type: dict | list + + ! + + :param name: My name + :type name: str + """, + [ + Attribute(name='attr_1', type='str', description=None), + Attribute(name='attr_2', type='bool', description='Attr 2'), + Attribute(name='attr_3', type=None, description='Attr 3'), + Attribute(name='attr_4', type=None, description=None), + Attribute(name='attr_5', type=None, description=None), + Attribute(name='attr_6', type='dict | list', description='!'), + ], + [3, 4, 6, 7, 9, 11, 13, 15, 16, 17, 18, 20], + ), + ( + """ + My Class + + .. attribute :: attr_1 + :type: str + + .. attribute :: attr_2 + :type: bool + + Attr 2 + + :param bar: A param called "bar" + :type name: float + + .. attribute :: attr_3 + + Attr 3 + :param goo: A param called "goo" + :type name: bool + + .. attribute :: attr_4 + .. attribute :: attr_5 + :param foo: A param called "foo" + :type name: float + .. attribute :: attr_6 + :type: dict | list + + ! + + :param name: My name + :type name: str + """, + [ + Attribute(name='attr_1', type='str', description=None), + Attribute(name='attr_2', type='bool', description='Attr 2'), + Attribute(name='attr_3', type=None, description='Attr 3'), + Attribute(name='attr_4', type=None, description=None), + Attribute(name='attr_5', type=None, description=None), + Attribute(name='attr_6', type='dict | list', description='!'), + ], + [3, 4, 6, 7, 9, 14, 16, 20, 21, 24, 25, 27], + ), + ], +) +def test_parser_attributes( + docstring: str, + expected_attributes: List[Attribute], + expected_lines_with_attributes: List[str], +) -> None: + attributes, lines_with_attributes = parse_attributes(docstring) + assert attributes == expected_attributes + assert lines_with_attributes == expected_lines_with_attributes diff --git a/pyproject.toml b/pyproject.toml index fd86fcf..d15103a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "docstring_parser_fork" -version = "0.0.7" +version = "0.0.8" description = "Parse Python docstrings in reST, Google and Numpydoc format" authors = ["Marcin Kurczewski "] license = "MIT" @@ -17,7 +17,6 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -34,7 +33,7 @@ packages = [ include = ["docstring_parser/py.typed"] [tool.poetry.dependencies] -python = ">=3.6,<4.0" +python = ">=3.7,<4.0" [tool.poetry.dev-dependencies] pre-commit = {version = ">=2.16.0", python = ">=3.9"}