From 85c5a7f9df490fc79a613b9b0488c5810c037824 Mon Sep 17 00:00:00 2001 From: Jeremy Howard Date: Mon, 11 Apr 2022 10:56:26 +1000 Subject: [PATCH] fixes #405 --- fastcore/_nbdev.py | 2 + fastcore/docments.py | 42 +++++- fastcore/docscrape.py | 37 ++--- nbs/06_docments.ipynb | 318 +++++++++++++++++++++++++++++++++++++----- 4 files changed, 336 insertions(+), 63 deletions(-) diff --git a/fastcore/_nbdev.py b/fastcore/_nbdev.py index 011afcc5..a97c2494 100644 --- a/fastcore/_nbdev.py +++ b/fastcore/_nbdev.py @@ -244,6 +244,8 @@ "gather_attrs": "05_transform.ipynb", "gather_attr_names": "05_transform.ipynb", "Pipeline": "05_transform.ipynb", + "docstring": "06_docments.ipynb", + "parse_docstring": "06_docments.ipynb", "empty": "06_docments.ipynb", "docments": "06_docments.ipynb", "test_sig": "07_meta.ipynb", diff --git a/fastcore/docments.py b/fastcore/docments.py index 59aede92..39a8fcbb 100644 --- a/fastcore/docments.py +++ b/fastcore/docments.py @@ -4,19 +4,36 @@ from __future__ import annotations -__all__ = ['empty', 'docments'] +__all__ = ['docstring', 'parse_docstring', 'empty', 'docments'] # Cell #nbdev_comment from __future__ import annotations + +import re from tokenize import tokenize,COMMENT from ast import parse,FunctionDef from io import BytesIO from textwrap import dedent from types import SimpleNamespace from inspect import getsource,isfunction,isclass,signature,Parameter -from .basics import * +from .utils import * -import re +from fastcore import docscrape +from inspect import isclass + +# Cell +def docstring(sym): + "Get docstring for `sym` for functions ad classes" + if isinstance(sym, str): return sym + res = getattr(sym, "__doc__", None) + if not res and isclass(sym): res = nested_attr(sym, "__init__.__doc__") + return res or "" + +# Cell +def parse_docstring(sym): + "Parse a numpy-style docstring in `sym`" + docs = docstring(sym) + return AttrDict(**docscrape.NumpyDocString(docstring(sym))) # Cell def _parses(s): @@ -36,7 +53,7 @@ def _clean_comment(s): def _param_locs(s, returns=True): "`dict` of parameter line numbers to names" body = _parses(s).body - if len(body)!=1or not isinstance(body[0], FunctionDef): return None + if len(body)!=1 or not isinstance(body[0], FunctionDef): return None defn = body[0] res = {arg.lineno:arg.arg for arg in defn.args.args} if returns and defn.returns: res[defn.returns.lineno] = 'return' @@ -59,21 +76,36 @@ def _get_full(anno, name, default, docs): if anno==empty and default!=empty: anno = type(default) return AttrDict(docment=docs.get(name), anno=anno, default=default) +# Cell +def _merge_doc(dm, npdoc): + if not npdoc: return dm + if not dm.anno or dm.anno==empty: dm.anno = npdoc.type + if not dm.docment: dm.docment = '\n'.join(npdoc.desc) + return dm + +def _merge_docs(dms, npdocs): + npparams = npdocs['Parameters'] + params = {nm:_merge_doc(dm,npparams.get(nm,None)) for nm,dm in dms.items()} + if 'return' in dms: params['return'] = _merge_doc(dms['return'], npdocs['Returns']) + return params + # Cell def docments(s, full=False, returns=True, eval_str=False): "`dict` of parameter names to 'docment-style' comments in function or string `s`" + nps = parse_docstring(s) if isclass(s): s = s.__init__ # Constructor for a class comments = {o.start[0]:_clean_comment(o.string) for o in _tokens(s) if o.type==COMMENT} parms = _param_locs(s, returns=returns) docs = {arg:_get_comment(line, arg, comments, parms) for line,arg in parms.items()} - if not full: return AttrDict(docs) if isinstance(s,str): s = eval(s) sig = signature(s) res = {arg:_get_full(p.annotation, p.name, p.default, docs) for arg,p in sig.parameters.items()} if returns: res['return'] = _get_full(sig.return_annotation, 'return', empty, docs) + res = _merge_docs(res, nps) if eval_str: hints = type_hints(s) for k,v in res.items(): if k in hints: v['anno'] = hints.get(k) + if not full: res = {k:v['docment'] for k,v in res.items()} return AttrDict(res) \ No newline at end of file diff --git a/fastcore/docscrape.py b/fastcore/docscrape.py index 3dce9481..182a293b 100644 --- a/fastcore/docscrape.py +++ b/fastcore/docscrape.py @@ -21,6 +21,11 @@ contributors may be used to endorse or promote products derived from this software without specific prior written permission. """ +import textwrap, re, copy +from warnings import warn +from collections import namedtuple +from collections.abc import Mapping + __all__ = ['Parameter', 'NumpyDocString', 'dedent_lines'] Parameter = namedtuple('Parameter', ['name', 'type', 'desc']) @@ -89,14 +94,17 @@ def __str__(self): class NumpyDocString(Mapping): """Parses a numpydoc string to an abstract representation """ - sections = { 'Signature': '', 'Summary': [''], 'Extended': [], 'Parameters': [], 'Returns': [], 'Yields': [], 'Raises': [] } + sections = { 'Summary': [''], 'Extended': [], 'Parameters': [], 'Returns': [] } def __init__(self, docstring, config=None): docstring = textwrap.dedent(docstring).split('\n') self._doc = Reader(docstring) self._parsed_data = copy.deepcopy(self.sections) self._parse() - if 'Parameters' in self: self['Parameters'] = {o.name:o for o in self['Parameters']} + self['Parameters'] = {o.name:o for o in self['Parameters']} + if self['Returns']: self['Returns'] = self['Returns'][0] + self['Summary'] = dedent_lines(self['Summary'], split=False) + self['Extended'] = dedent_lines(self['Extended'], split=False) def __iter__(self): return iter(self._parsed_data) def __len__(self): return len(self._parsed_data) @@ -171,7 +179,6 @@ def _parse_summary(self): summary_str = " ".join([s.strip() for s in summary]).strip() compiled = re.compile(r'^([\w., ]+=)?\s*[\w\.]+\(.*\)$') if compiled.match(summary_str): - self['Signature'] = summary_str if not self._is_at_section(): continue break @@ -216,16 +223,12 @@ def _obj(self): def _error_location(self, msg, error=True): if self._obj is not None: - # we know where the docs came from: - try: filename = inspect.getsourcefile(self._obj) - except TypeError: filename = None # Make UserWarning more descriptive via object introspection. # Skip if introspection fails name = getattr(self._obj, '__name__', None) if name is None: name = getattr(getattr(self._obj, '__class__', None), '__name__', None) if name is not None: msg += f" in the docstring of {name}" - msg += f" in {filename}." if filename else "" if error: raise ValueError(msg) else: warn(msg) @@ -234,10 +237,6 @@ def _error_location(self, msg, error=True): def _str_header(self, name, symbol='-'): return [name, len(name)*symbol] def _str_indent(self, doc, indent=4): return [' '*indent + line for line in doc] - def _str_signature(self): - if self['Signature']: return [self['Signature'].replace('*', r'\*')] + [''] - return [''] - def _str_summary(self): if self['Summary']: return self['Summary'] + [''] return [] @@ -259,18 +258,10 @@ def _str_param_list(self, name): out += [''] return out - def __str__(self, func_role=''): - out = [] - out += self._str_signature() - out += self._str_summary() - out += self._str_extended_summary() - for param_list in ('Parameters', 'Returns', 'Yields', 'Receives', 'Other Parameters', 'Raises', 'Warns'): - out += self._str_param_list(param_list) - for param_list in ('Attributes', 'Methods'): out += self._str_param_list(param_list) - return '\n'.join(out) - -def dedent_lines(lines): +def dedent_lines(lines, split=True): """Deindent a list of lines maximally""" - return textwrap.dedent("\n".join(lines)).split("\n") + res = textwrap.dedent("\n".join(lines)) + if split: res = res.split("\n") + return res diff --git a/nbs/06_docments.ipynb b/nbs/06_docments.ipynb index d88a2bab..e6c4bb32 100644 --- a/nbs/06_docments.ipynb +++ b/nbs/06_docments.ipynb @@ -26,15 +26,18 @@ "source": [ "#export\n", "from __future__ import annotations\n", + "\n", + "import re\n", "from tokenize import tokenize,COMMENT\n", "from ast import parse,FunctionDef\n", "from io import BytesIO\n", "from textwrap import dedent\n", "from types import SimpleNamespace\n", "from inspect import getsource,isfunction,isclass,signature,Parameter\n", - "from fastcore.basics import *\n", + "from fastcore.utils import *\n", "\n", - "import re" + "from fastcore import docscrape\n", + "from inspect import isclass" ] }, { @@ -76,14 +79,16 @@ "metadata": {}, "outputs": [], "source": [ - "def add(a:int, b:int=0)->int:\n", + "def add_np(a:int, b:int=0)->int:\n", " \"\"\"The sum of two numbers.\n", + " \n", + " Used to demonstrate numpy-style docstrings.\n", "\n", "Parameters\n", "----------\n", - "x : int\n", + "a : int\n", " the 1st number to add\n", - "y : int\n", + "b : int\n", " the 2nd number to add (default: 0)\n", "\n", "Returns\n", @@ -114,22 +119,88 @@ " return a+b" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numpy docstring helper functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`docments` also supports numpy-style docstrings, or a mix or numpy-style and docments parameter documentation. The functions in this section help get and parse this information." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def add(a, b:int=0)->int:\n", - " \"\"\"The sum of two numbers.\n", - "\n", - "Parameters\n", - "----------\n", - "x :\n", - " the 1st number to add\n", - "y : int\n", - " the 2nd number to add (default: 0)\"\"\"\n", - " return a+b" + "#export\n", + "def docstring(sym):\n", + " \"Get docstring for `sym` for functions ad classes\"\n", + " if isinstance(sym, str): return sym\n", + " res = getattr(sym, \"__doc__\", None)\n", + " if not res and isclass(sym): res = nested_attr(sym, \"__init__.__doc__\")\n", + " return res or \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_eq(docstring(add), \"The sum of two numbers.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def parse_docstring(sym):\n", + " \"Parse a numpy-style docstring in `sym`\"\n", + " docs = docstring(sym)\n", + " return AttrDict(**docscrape.NumpyDocString(docstring(sym)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "```json\n", + "{ 'Extended': 'Used to demonstrate numpy-style docstrings.',\n", + " 'Parameters': { 'a': Parameter(name='a', type='int', desc=['the 1st number to add']),\n", + " 'b': Parameter(name='b', type='int', desc=['the 2nd number to add (default: 0)'])},\n", + " 'Returns': Parameter(name='', type='int', desc=['the result of adding `a` to `b`']),\n", + " 'Summary': 'The sum of two numbers.'}\n", + "```" + ], + "text/plain": [ + "{'Summary': 'The sum of two numbers.',\n", + " 'Extended': 'Used to demonstrate numpy-style docstrings.',\n", + " 'Parameters': {'a': Parameter(name='a', type='int', desc=['the 1st number to add']),\n", + " 'b': Parameter(name='b', type='int', desc=['the 2nd number to add (default: 0)'])},\n", + " 'Returns': Parameter(name='', type='int', desc=['the result of adding `a` to `b`'])}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parse_docstring(add_np)" ] }, { @@ -163,7 +234,7 @@ "def _param_locs(s, returns=True):\n", " \"`dict` of parameter line numbers to names\"\n", " body = _parses(s).body\n", - " if len(body)!=1or not isinstance(body[0], FunctionDef): return None\n", + " if len(body)!=1 or not isinstance(body[0], FunctionDef): return None\n", " defn = body[0]\n", " res = {arg.lineno:arg.arg for arg in defn.args.args}\n", " if returns and defn.returns: res[defn.returns.lineno] = 'return'\n", @@ -201,6 +272,26 @@ " return AttrDict(docment=docs.get(name), anno=anno, default=default)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def _merge_doc(dm, npdoc):\n", + " if not npdoc: return dm\n", + " if not dm.anno or dm.anno==empty: dm.anno = npdoc.type\n", + " if not dm.docment: dm.docment = '\\n'.join(npdoc.desc)\n", + " return dm\n", + "\n", + "def _merge_docs(dms, npdocs):\n", + " npparams = npdocs['Parameters']\n", + " params = {nm:_merge_doc(dm,npparams.get(nm,None)) for nm,dm in dms.items()}\n", + " if 'return' in dms: params['return'] = _merge_doc(dms['return'], npdocs['Returns'])\n", + " return params" + ] + }, { "cell_type": "code", "execution_count": null, @@ -210,20 +301,22 @@ "#export\n", "def docments(s, full=False, returns=True, eval_str=False):\n", " \"`dict` of parameter names to 'docment-style' comments in function or string `s`\"\n", + " nps = parse_docstring(s)\n", " if isclass(s): s = s.__init__ # Constructor for a class\n", " comments = {o.start[0]:_clean_comment(o.string) for o in _tokens(s) if o.type==COMMENT}\n", " parms = _param_locs(s, returns=returns)\n", " docs = {arg:_get_comment(line, arg, comments, parms) for line,arg in parms.items()}\n", - " if not full: return AttrDict(docs)\n", "\n", " if isinstance(s,str): s = eval(s)\n", " sig = signature(s)\n", " res = {arg:_get_full(p.annotation, p.name, p.default, docs) for arg,p in sig.parameters.items()}\n", " if returns: res['return'] = _get_full(sig.return_annotation, 'return', empty, docs)\n", + " res = _merge_docs(res, nps)\n", " if eval_str:\n", " hints = type_hints(s)\n", " for k,v in res.items():\n", " if k in hints: v['anno'] = hints.get(k)\n", + " if not full: res = {k:v['docment'] for k,v in res.items()}\n", " return AttrDict(res)" ] }, @@ -231,7 +324,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The returned `dict` has parameter names as keys, and comments as values. The return value comment appears in the `return`, unless `returns=False`. Using the `add` definition above, we get:" + "The returned `dict` has parameter names as keys, docments as values. The return value comment appears in the `return`, unless `returns=False`. Using the `add` definition above, we get:" ] }, { @@ -267,7 +360,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This is the same format that `__annotations__` uses in Python." + "If you pass `full=False`, the values are `dict` of defaults, types, and docments as values. Note that the type annotation is inferred from the default value, if the annotation is empty and a default is supplied." ] }, { @@ -277,8 +370,27 @@ "outputs": [ { "data": { + "text/markdown": [ + "```json\n", + "{ 'a': { 'anno': 'int',\n", + " 'default': ,\n", + " 'docment': 'the 1st number to add'},\n", + " 'b': { 'anno': ,\n", + " 'default': 0,\n", + " 'docment': 'the 2nd number to add'},\n", + " 'return': { 'anno': 'int',\n", + " 'default': ,\n", + " 'docment': 'the result of adding `a` to `b`'}}\n", + "```" + ], "text/plain": [ - "{'a': 'int', 'return': 'int'}" + "{'a': {'docment': 'the 1st number to add',\n", + " 'anno': 'int',\n", + " 'default': inspect._empty},\n", + " 'b': {'docment': 'the 2nd number to add', 'anno': int, 'default': 0},\n", + " 'return': {'docment': 'the result of adding `a` to `b`',\n", + " 'anno': 'int',\n", + " 'default': inspect._empty}}" ] }, "execution_count": null, @@ -287,14 +399,7 @@ } ], "source": [ - "add.__annotations__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pass `full=True` to get the annotation and default value for each parameter, along with its docment. Note that the type annotation is inferred from the default value, if the annotation is empty and a default is supplied." + "docments(add, full=True)" ] }, { @@ -306,13 +411,25 @@ "data": { "text/markdown": [ "```json\n", - "{ 'anno': 'int',\n", - " 'default': ,\n", - " 'docment': 'the 1st number to add'}\n", + "{ 'a': { 'anno': 'int',\n", + " 'default': ,\n", + " 'docment': 'the 1st number to add'},\n", + " 'b': { 'anno': ,\n", + " 'default': 0,\n", + " 'docment': 'the 2nd number to add'},\n", + " 'return': { 'anno': 'int',\n", + " 'default': ,\n", + " 'docment': 'the result of adding `a` to `b`'}}\n", "```" ], "text/plain": [ - "{'docment': 'the 1st number to add', 'anno': 'int', 'default': inspect._empty}" + "{'a': {'docment': 'the 1st number to add',\n", + " 'anno': 'int',\n", + " 'default': inspect._empty},\n", + " 'b': {'docment': 'the 2nd number to add', 'anno': int, 'default': 0},\n", + " 'return': {'docment': 'the result of adding `a` to `b`',\n", + " 'anno': 'int',\n", + " 'default': inspect._empty}}" ] }, "execution_count": null, @@ -321,7 +438,7 @@ } ], "source": [ - "docments(add, full=True)['a']" + "docments(add, full=True)" ] }, { @@ -448,11 +565,11 @@ "data": { "text/markdown": [ "```json\n", - "{'a': 'First operand', 'b': '2nd operand', 'self': None}\n", + "{'a': 'First operand', 'b': '2nd operand', 'return': None, 'self': None}\n", "```" ], "text/plain": [ - "{'self': None, 'a': 'First operand', 'b': '2nd operand'}" + "{'self': None, 'a': 'First operand', 'b': '2nd operand', 'return': None}" ] }, "execution_count": null, @@ -489,6 +606,137 @@ "docments(Adder.calculate)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "docments can also be extracted from numpy-style docstrings:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The sum of two numbers.\n", + " \n", + " Used to demonstrate numpy-style docstrings.\n", + "\n", + "Parameters\n", + "----------\n", + "a : int\n", + " the 1st number to add\n", + "b : int\n", + " the 2nd number to add (default: 0)\n", + "\n", + "Returns\n", + "-------\n", + "int\n", + " the result of adding `a` to `b`\n" + ] + } + ], + "source": [ + "print(add_np.__doc__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "```json\n", + "{ 'a': 'the 1st number to add',\n", + " 'b': 'the 2nd number to add (default: 0)',\n", + " 'return': 'the result of adding `a` to `b`'}\n", + "```" + ], + "text/plain": [ + "{'a': 'the 1st number to add',\n", + " 'b': 'the 2nd number to add (default: 0)',\n", + " 'return': 'the result of adding `a` to `b`'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docments(add_np)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can even mix and match docments and numpy parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def add_mixed(a:int, # the first number to add\n", + " b\n", + " )->int: # the result\n", + " \"\"\"The sum of two numbers.\n", + "\n", + "Parameters\n", + "----------\n", + "b : int\n", + " the 2nd number to add (default: 0)\"\"\"\n", + " return a+b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "```json\n", + "{ 'a': { 'anno': 'int',\n", + " 'default': ,\n", + " 'docment': 'the first number to add'},\n", + " 'b': { 'anno': 'int',\n", + " 'default': ,\n", + " 'docment': 'the 2nd number to add (default: 0)'},\n", + " 'return': { 'anno': 'int',\n", + " 'default': ,\n", + " 'docment': 'the result'}}\n", + "```" + ], + "text/plain": [ + "{'a': {'docment': 'the first number to add',\n", + " 'anno': 'int',\n", + " 'default': inspect._empty},\n", + " 'b': {'docment': 'the 2nd number to add (default: 0)',\n", + " 'anno': 'int',\n", + " 'default': inspect._empty},\n", + " 'return': {'docment': 'the result', 'anno': 'int', 'default': inspect._empty}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docments(add_mixed, full=True)" + ] + }, { "cell_type": "markdown", "metadata": {},