-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DOC: Adding script to validate docstrings, and generate list of all functions/methods with state #19898
DOC: Adding script to validate docstrings, and generate list of all functions/methods with state #19898
Changes from 6 commits
40f2f0a
356304e
03749d5
070841e
b386271
78419b7
b32df4b
da24d08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,296 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Analyze docstrings to detect errors. | ||
|
||
If no argument is provided, it does a quick check of docstrings and returns | ||
a csv with all API functions and results of basic checks. | ||
|
||
If a function or method is provided in the form "pandas.function", | ||
"pandas.module.class.method", etc. a list of all errors in the docstring for | ||
the specified function or method. | ||
|
||
Usage:: | ||
$ ./validate_docstrings.py | ||
$ ./validate_docstrings.py pandas.DataFrame.head | ||
""" | ||
import os | ||
import sys | ||
import csv | ||
import re | ||
import functools | ||
import argparse | ||
import inspect | ||
import importlib | ||
import doctest | ||
import numpy | ||
|
||
BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
|
||
sys.path.insert(0, os.path.join(BASE_PATH)) | ||
import pandas | ||
|
||
sys.path.insert(1, os.path.join(BASE_PATH, 'doc', 'sphinxext')) | ||
from numpydoc.docscrape import NumpyDocString | ||
|
||
|
||
def _to_original_callable(obj): | ||
while True: | ||
if inspect.isfunction(obj) or inspect.isclass(obj): | ||
f = inspect.getfile(obj) | ||
if f.startswith('<') and f.endswith('>'): | ||
return None | ||
return obj | ||
if inspect.ismethod(obj): | ||
obj = obj.__func__ | ||
elif isinstance(obj, functools.partial): | ||
obj = obj.func | ||
elif isinstance(obj, property): | ||
obj = obj.fget | ||
else: | ||
return None | ||
|
||
|
||
class Docstring: | ||
def __init__(self, method_name, method_obj): | ||
self.method_name = method_name | ||
self.method_obj = method_obj | ||
self.raw_doc = method_obj.__doc__ or '' | ||
self.doc = NumpyDocString(self.raw_doc) | ||
|
||
def __len__(self): | ||
return len(self.raw_doc) | ||
|
||
@property | ||
def source_file_name(self): | ||
fname = inspect.getsourcefile(self.method_obj) | ||
if fname: | ||
fname = os.path.relpath(fname, BASE_PATH) | ||
return fname | ||
|
||
@property | ||
def source_file_def_line(self): | ||
try: | ||
return inspect.getsourcelines(self.method_obj)[-1] | ||
except OSError: | ||
pass | ||
|
||
@property | ||
def github_url(self): | ||
url = 'https://github.com/pandas-dev/pandas/blob/master/' | ||
url += '{}#L{}'.format(self.source_file_name, | ||
self.source_file_def_line) | ||
return url | ||
|
||
@property | ||
def first_line_blank(self): | ||
if self.raw_doc: | ||
return not bool(self.raw_doc.split('\n')[0].strip()) | ||
|
||
@property | ||
def summary(self): | ||
return ' '.join(self.doc['Summary']) | ||
|
||
@property | ||
def extended_summary(self): | ||
return ' '.join(self.doc['Extended Summary']) | ||
|
||
@property | ||
def needs_summary(self): | ||
return not (bool(self.summary) and bool(self.extended_summary)) | ||
|
||
@property | ||
def doc_parameters(self): | ||
return self.doc['Parameters'] | ||
|
||
@property | ||
def signature_parameters(self): | ||
if not inspect.isfunction(self.method_obj): | ||
return tuple() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason you do this? As the code below works fine for a class as well, eg pandas.DataFrame (while now the script will say the keywords and signature did not match, while in fact they do) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't research much, but if I introspect parameters from a non-function I get this error:
|
||
params = tuple(inspect.signature(self.method_obj).parameters.keys()) | ||
if params and params[0] in ('self', 'cls'): | ||
return params[1:] | ||
return params | ||
|
||
@property | ||
def correct_parameters(self): | ||
if self.doc_parameters: | ||
doc_param_names = list(zip(*self.doc_parameters))[0] | ||
return doc_param_names == self.signature_parameters | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could return here the non-fitting ones to make the message more informative.
(or do a difference in both directions to be even more explicit where it is missing, but that makes it a bit more complex) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another thing: do we care about the order? Because now for
So this will be very confusing I think. So either we need to say that in the error message, or for now not care about the order (I would maybe go for this last one, as in some cases it can be done on purpose, like putting a deprecated one at the end of the docstring, but for compatibility you need to keep it in its current place) |
||
|
||
return not bool(self.signature_parameters) | ||
|
||
@property | ||
def see_also(self): | ||
return self.doc['See Also'] | ||
|
||
@property | ||
def examples(self): | ||
return self.doc['Examples'] | ||
|
||
@property | ||
def first_line_ends_in_dot(self): | ||
if self.doc: | ||
return self.doc.split('\n')[0][-1] == '.' | ||
|
||
@property | ||
def deprecated(self): | ||
pattern = re.compile('.. deprecated:: ') | ||
return (self.method_name.startswith('pandas.Panel') or | ||
bool(pattern.search(self.summary)) or | ||
bool(pattern.search(self.extended_summary))) | ||
|
||
@property | ||
def examples_pass_tests(self): | ||
flags = doctest.NORMALIZE_WHITESPACE | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you also add the |
||
finder = doctest.DocTestFinder() | ||
runner = doctest.DocTestRunner(optionflags=flags) | ||
context = {'np': numpy, 'pd': pandas} | ||
total_failed = 0 | ||
for test in finder.find(self.raw_doc, self.method_name, globs=context): | ||
total_failed += runner.run(test).failed | ||
return total_failed == 0 | ||
|
||
|
||
def get_api_items(): | ||
api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst') | ||
|
||
position = None | ||
with open(api_fname) as f: | ||
for line in f: | ||
if line.startswith('.. currentmodule::'): | ||
current_module = line.replace('.. currentmodule::', '').strip() | ||
continue | ||
|
||
if line == '.. autosummary::\n': | ||
position = 'autosummary' | ||
continue | ||
|
||
if position == 'autosummary': | ||
if line == '\n': | ||
position = 'items' | ||
continue | ||
|
||
if position == 'items': | ||
if line == '\n': | ||
position = None | ||
continue | ||
item = line.strip() | ||
func = importlib.import_module(current_module) | ||
for part in item.split('.'): | ||
func = getattr(func, part) | ||
|
||
yield '.'.join([current_module, item]), func | ||
|
||
|
||
def validate_all(): | ||
writer = csv.writer(sys.stdout) | ||
writer.writerow(['Function or method', | ||
'Type', | ||
'File', | ||
'Code line', | ||
'GitHub link', | ||
'Is deprecated', | ||
'Has summary', | ||
'Has extended summary', | ||
'Parameters ok', | ||
'Has examples', | ||
'Shared code with']) | ||
seen = {} | ||
for func_name, func in get_api_items(): | ||
obj_type = type(func).__name__ | ||
original_callable = _to_original_callable(func) | ||
if original_callable is None: | ||
writer.writerow([func_name, obj_type] + [''] * 9) | ||
else: | ||
doc = Docstring(func_name, original_callable) | ||
key = doc.source_file_name, doc.source_file_def_line | ||
shared_code = seen.get(key, '') | ||
seen[key] = func_name | ||
writer.writerow([func_name, | ||
obj_type, | ||
doc.source_file_name, | ||
doc.source_file_def_line, | ||
doc.github_url, | ||
int(doc.deprecated), | ||
int(bool(doc.summary)), | ||
int(bool(doc.extended_summary)), | ||
int(doc.correct_parameters), | ||
int(bool(doc.examples)), | ||
shared_code]) | ||
|
||
return 0 | ||
|
||
|
||
def validate_one(func_name): | ||
for maxsplit in range(1, func_name.count('.') + 1): | ||
# TODO when py3 only replace by: module, *func_parts = ... | ||
func_name_split = func_name.rsplit('.', maxsplit=maxsplit) | ||
module = func_name_split[0] | ||
func_parts = func_name_split[1:] | ||
try: | ||
func_obj = importlib.import_module(module) | ||
except ImportError: | ||
pass | ||
else: | ||
continue | ||
|
||
if 'module' not in locals(): | ||
raise ImportError('No module can be imported ' | ||
'from "{}"'.format(func_name)) | ||
|
||
for part in func_parts: | ||
func_obj = getattr(func_obj, part) | ||
|
||
doc = Docstring(func_name, func_obj) | ||
|
||
errs = [] | ||
if not doc.summary: | ||
errs.append('No summary found') | ||
else: | ||
if not doc.summary[0].isupper(): | ||
errs.append('Summary does not start with capital') | ||
if doc.summary[-1] != '.': | ||
errs.append('Summary does not end with dot') | ||
if doc.summary.split(' ')[0][-1] == 's': | ||
errs.append('Summary must start with infinitive verb, ' | ||
'not third person (e.g. use "Generate" instead of ' | ||
'"Generates")') | ||
if not doc.extended_summary: | ||
errs.append('No extended summary found') | ||
if not doc.correct_parameters: | ||
errs.append('Documented parameters do not match the signature') | ||
if not doc.examples: | ||
errs.append('No examples') | ||
else: | ||
if not doc.examples_pass_tests: | ||
errs.append('Examples do not pass test') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe add between brackets "(see output above)" ? |
||
|
||
if errs: | ||
sys.stderr.write('Errors for "{}" docstring:\n'.format(func_name)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe start the string with a |
||
for err in errs: | ||
sys.stderr.write('\t* {}\n'.format(err)) | ||
else: | ||
sys.stderr.write('Docstring for "{}" correct. :)\n'.format(func_name)) | ||
|
||
return len(errs) | ||
|
||
|
||
def main(function): | ||
if function is None: | ||
return validate_all() | ||
else: | ||
return validate_one(function) | ||
|
||
|
||
if __name__ == '__main__': | ||
argparser = argparse.ArgumentParser( | ||
description='validate pandas docstrings') | ||
argparser.add_argument('function', | ||
nargs='?', | ||
default=None, | ||
help=('function or method to validate ' | ||
'(e.g. pandas.DataFrame.head) ' | ||
'if not provided, all docstrings ' | ||
'are validated')) | ||
args = argparser.parse_args() | ||
sys.exit(main(args.function)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could return the summary, depending on how long the summary is. Eg is the summary is longer than 2 lines, regard it as an extended summary, and that there is no summary line.
Becuase now eg for the DataFrame.resample docstring, I get the message "No extended summary found", while actually it has one, but the summary line is missing, so that would be a more helpful message.