CLN: Replace Appender and Substitution with simpler doc decorator (#3…

…1060)
pandas-dev · Feb 12, 2020 · 143b011 · 143b011
1 parent 012a6a3
commit 143b011
Show file tree

Hide file tree

Showing 9 changed files with 266 additions and 145 deletions.
diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst
@@ -937,33 +937,31 @@ classes. This helps us keep docstrings consistent, while keeping things clear
 for the user reading. It comes at the cost of some complexity when writing.
 
 Each shared docstring will have a base template with variables, like
-``%(klass)s``. The variables filled in later on using the ``Substitution``
-decorator. Finally, docstrings can be appended to with the ``Appender``
-decorator.
+``{klass}``. The variables filled in later on using the ``doc`` decorator.
+Finally, docstrings can also be appended to with the ``doc`` decorator.
 
 In this example, we'll create a parent docstring normally (this is like
 ``pandas.core.generic.NDFrame``. Then we'll have two children (like
 ``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll
-substitute the children's class names in this docstring.
+substitute the class names in this docstring.
 
 .. code-block:: python
 
    class Parent:
+       @doc(klass="Parent")
        def my_function(self):
-           """Apply my function to %(klass)s."""
+           """Apply my function to {klass}."""
            ...
 
 
    class ChildA(Parent):
-       @Substitution(klass="ChildA")
-       @Appender(Parent.my_function.__doc__)
+       @doc(Parent.my_function, klass="ChildA")
        def my_function(self):
            ...
 
 
    class ChildB(Parent):
-       @Substitution(klass="ChildB")
-       @Appender(Parent.my_function.__doc__)
+       @doc(Parent.my_function, klass="ChildB")
        def my_function(self):
            ...
 
@@ -972,18 +970,16 @@ The resulting docstrings are
 .. code-block:: python
 
    >>> print(Parent.my_function.__doc__)
-   Apply my function to %(klass)s.
+   Apply my function to Parent.
    >>> print(ChildA.my_function.__doc__)
    Apply my function to ChildA.
    >>> print(ChildB.my_function.__doc__)
    Apply my function to ChildB.
 
-Notice two things:
+Notice:
 
 1. We "append" the parent docstring to the children docstrings, which are
    initially empty.
-2. Python decorators are applied inside out. So the order is Append then
-   Substitution, even though Substitution comes first in the file.
 
 Our files will often contain a module-level ``_shared_doc_kwargs`` with some
 common substitution values (things like ``klass``, ``axes``, etc).
@@ -992,14 +988,13 @@ You can substitute and append in one shot with something like
 
 .. code-block:: python
 
-   @Appender(template % _shared_doc_kwargs)
+   @doc(template, **_shared_doc_kwargs)
    def my_function(self):
        ...
 
 where ``template`` may come from a module-level ``_shared_docs`` dictionary
 mapping function names to docstrings. Wherever possible, we prefer using
-``Appender`` and ``Substitution``, since the docstring-writing processes is
-slightly closer to normal.
+``doc``, since the docstring-writing processes is slightly closer to normal.
 
 See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
 ``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
@@ -7,7 +7,7 @@
 from typing import FrozenSet, Set
 import warnings
 
-from pandas.util._decorators import Appender
+from pandas.util._decorators import doc
 
 
 class DirNamesMixin:
@@ -193,122 +193,119 @@ def __get__(self, obj, cls):
         return accessor_obj
 
 
+@doc(klass="", others="")
 def _register_accessor(name, cls):
-    def decorator(accessor):
-        if hasattr(cls, name):
-            warnings.warn(
-                f"registration of accessor {repr(accessor)} under name "
-                f"{repr(name)} for type {repr(cls)} is overriding a preexisting"
-                f"attribute with the same name.",
-                UserWarning,
-                stacklevel=2,
-            )
-        setattr(cls, name, CachedAccessor(name, accessor))
-        cls._accessors.add(name)
-        return accessor
-
-    return decorator
+    """
+    Register a custom accessor on {klass} objects.
 
+    Parameters
+    ----------
+    name : str
+        Name under which the accessor should be registered. A warning is issued
+        if this name conflicts with a preexisting attribute.
 
-_doc = """
-Register a custom accessor on %(klass)s objects.
+    Returns
+    -------
+    callable
+        A class decorator.
 
-Parameters
-----------
-name : str
-    Name under which the accessor should be registered. A warning is issued
-    if this name conflicts with a preexisting attribute.
+    See Also
+    --------
+    {others}
 
-Returns
--------
-callable
-    A class decorator.
+    Notes
+    -----
+    When accessed, your accessor will be initialized with the pandas object
+    the user is interacting with. So the signature must be
 
-See Also
---------
-%(others)s
+    .. code-block:: python
 
-Notes
------
-When accessed, your accessor will be initialized with the pandas object
-the user is interacting with. So the signature must be
+        def __init__(self, pandas_object):  # noqa: E999
+            ...
 
-.. code-block:: python
+    For consistency with pandas methods, you should raise an ``AttributeError``
+    if the data passed to your accessor has an incorrect dtype.
 
-    def __init__(self, pandas_object):  # noqa: E999
-        ...
+    >>> pd.Series(['a', 'b']).dt
+    Traceback (most recent call last):
+    ...
+    AttributeError: Can only use .dt accessor with datetimelike values
 
-For consistency with pandas methods, you should raise an ``AttributeError``
-if the data passed to your accessor has an incorrect dtype.
+    Examples
+    --------
 
->>> pd.Series(['a', 'b']).dt
-Traceback (most recent call last):
-...
-AttributeError: Can only use .dt accessor with datetimelike values
+    In your library code::
 
-Examples
---------
+        import pandas as pd
 
-In your library code::
+        @pd.api.extensions.register_dataframe_accessor("geo")
+        class GeoAccessor:
+            def __init__(self, pandas_obj):
+                self._obj = pandas_obj
 
-    import pandas as pd
+            @property
+            def center(self):
+                # return the geographic center point of this DataFrame
+                lat = self._obj.latitude
+                lon = self._obj.longitude
+                return (float(lon.mean()), float(lat.mean()))
 
-    @pd.api.extensions.register_dataframe_accessor("geo")
-    class GeoAccessor:
-        def __init__(self, pandas_obj):
-            self._obj = pandas_obj
+            def plot(self):
+                # plot this array's data on a map, e.g., using Cartopy
+                pass
 
-        @property
-        def center(self):
-            # return the geographic center point of this DataFrame
-            lat = self._obj.latitude
-            lon = self._obj.longitude
-            return (float(lon.mean()), float(lat.mean()))
+    Back in an interactive IPython session:
 
-        def plot(self):
-            # plot this array's data on a map, e.g., using Cartopy
-            pass
+        >>> ds = pd.DataFrame({{'longitude': np.linspace(0, 10),
+        ...                    'latitude': np.linspace(0, 20)}})
+        >>> ds.geo.center
+        (5.0, 10.0)
+        >>> ds.geo.plot()
+        # plots data on a map
+    """
 
-Back in an interactive IPython session:
+    def decorator(accessor):
+        if hasattr(cls, name):
+            warnings.warn(
+                f"registration of accessor {repr(accessor)} under name "
+                f"{repr(name)} for type {repr(cls)} is overriding a preexisting"
+                f"attribute with the same name.",
+                UserWarning,
+                stacklevel=2,
+            )
+        setattr(cls, name, CachedAccessor(name, accessor))
+        cls._accessors.add(name)
+        return accessor
 
-    >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10),
-    ...                    'latitude': np.linspace(0, 20)})
-    >>> ds.geo.center
-    (5.0, 10.0)
-    >>> ds.geo.plot()
-    # plots data on a map
-"""
+    return decorator
 
 
-@Appender(
-    _doc
-    % dict(
-        klass="DataFrame", others=("register_series_accessor, register_index_accessor")
-    )
+@doc(
+    _register_accessor,
+    klass="DataFrame",
+    others="register_series_accessor, register_index_accessor",
 )
 def register_dataframe_accessor(name):
     from pandas import DataFrame
 
     return _register_accessor(name, DataFrame)
 
 
-@Appender(
-    _doc
-    % dict(
-        klass="Series", others=("register_dataframe_accessor, register_index_accessor")
-    )
+@doc(
+    _register_accessor,
+    klass="Series",
+    others="register_dataframe_accessor, register_index_accessor",
 )
 def register_series_accessor(name):
     from pandas import Series
 
     return _register_accessor(name, Series)
 
 
-@Appender(
-    _doc
-    % dict(
-        klass="Index", others=("register_dataframe_accessor, register_series_accessor")
-    )
+@doc(
+    _register_accessor,
+    klass="Index",
+    others="register_dataframe_accessor, register_series_accessor",
 )
 def register_index_accessor(name):
     from pandas import Index

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -11,7 +11,7 @@
 
 from pandas._libs import Timestamp, algos, hashtable as htable, lib
 from pandas._libs.tslib import iNaT
-from pandas.util._decorators import Appender, Substitution
+from pandas.util._decorators import doc
 
 from pandas.core.dtypes.cast import (
     construct_1d_object_array_from_listlike,
@@ -487,9 +487,32 @@ def _factorize_array(
     return codes, uniques
 
 
-_shared_docs[
-    "factorize"
-] = """
+@doc(
+    values=dedent(
+        """\
+    values : sequence
+        A 1-D sequence. Sequences that aren't pandas objects are
+        coerced to ndarrays before factorization.
+    """
+    ),
+    sort=dedent(
+        """\
+    sort : bool, default False
+        Sort `uniques` and shuffle `codes` to maintain the
+        relationship.
+    """
+    ),
+    size_hint=dedent(
+        """\
+    size_hint : int, optional
+        Hint to the hashtable sizer.
+    """
+    ),
+)
+def factorize(
+    values, sort: bool = False, na_sentinel: int = -1, size_hint: Optional[int] = None
+) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]:
+    """
     Encode the object as an enumerated type or categorical variable.
 
     This method is useful for obtaining a numeric representation of an
@@ -499,10 +522,10 @@ def _factorize_array(
 
     Parameters
     ----------
-    %(values)s%(sort)s
+    {values}{sort}
     na_sentinel : int, default -1
         Value to mark "not found".
-    %(size_hint)s\
+    {size_hint}\
 
     Returns
     -------
@@ -580,34 +603,6 @@ def _factorize_array(
     >>> uniques
     Index(['a', 'c'], dtype='object')
     """
-
-
-@Substitution(
-    values=dedent(
-        """\
-    values : sequence
-        A 1-D sequence. Sequences that aren't pandas objects are
-        coerced to ndarrays before factorization.
-    """
-    ),
-    sort=dedent(
-        """\
-    sort : bool, default False
-        Sort `uniques` and shuffle `codes` to maintain the
-        relationship.
-    """
-    ),
-    size_hint=dedent(
-        """\
-    size_hint : int, optional
-        Hint to the hashtable sizer.
-    """
-    ),
-)
-@Appender(_shared_docs["factorize"])
-def factorize(
-    values, sort: bool = False, na_sentinel: int = -1, size_hint: Optional[int] = None
-) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]:
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)
     # 2.) factorizing codes and uniques