diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 865f1ccae2c04..61f43146aba85 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -1200,14 +1200,14 @@ Regroup columns of a DataFrame according to their sum, and sum the aggregated on df df.groupby(df.sum(), axis=1).sum() -.. _groupby.multicolumn_factorization +.. _groupby.multicolumn_factorization: Multi-column factorization ~~~~~~~~~~~~~~~~~~~~~~~~~~ By using ``.ngroup()``, we can extract information about the groups in a way similar to :func:`factorize` (as described further in the -:ref:`reshaping API `) but which applies +:ref:`reshaping API `) but which applies naturally to multiple columns of mixed type and different sources. This can be useful as an intermediate categorical-like step in processing, when the relationships between the group rows are more diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fdf5d01484b98..7b56c30fcc9f6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1292,7 +1292,7 @@ def to_hdf(self, path_or_buf, key, **kwargs): As of v0.20.2 these additional compressors for Blosc are supported (default if no compressor specified: 'blosc:blosclz'): {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', - 'blosc:zlib', 'blosc:zstd'}. + 'blosc:zlib', 'blosc:zstd'}. Specifying a compression library which is not available issues a ValueError. fletcher32 : bool, default False diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9d6d2297f6ea0..c4b3e25acae7e 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1212,8 +1212,6 @@ def ohlc(self): lambda x: x._cython_agg_general('ohlc')) @Appender(DataFrame.describe.__doc__) - @Substitution(name='groupby') - @Appender(_doc_template) def describe(self, **kwargs): self._set_group_selection() result = self.apply(lambda x: x.describe(**kwargs)) diff --git a/pandas/core/series.py b/pandas/core/series.py index 129f291e5f843..74d4f3f955f26 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1847,7 +1847,8 @@ def argsort(self, axis=0, kind='quicksort', order=None): dtype='int64').__finalize__(self) def nlargest(self, n=5, keep='first'): - """Return the largest `n` elements. + """ + Return the largest `n` elements. Parameters ---------- @@ -1893,7 +1894,8 @@ def nlargest(self, n=5, keep='first'): return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() def nsmallest(self, n=5, keep='first'): - """Return the smallest `n` elements. + """ + Return the smallest `n` elements. Parameters ---------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9ec3f79e1ae70..0350849037391 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -152,7 +152,7 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" + fill("', '".join(sorted(_NA_VALUES)), - 70, subsequent_indent=" ") + """'`. + 70, subsequent_indent=" ") + """'. keep_default_na : bool, default True If na_values are specified and keep_default_na is False the default NaN values are overridden, otherwise they're appended to. @@ -181,22 +181,23 @@ Note: A fast-path exists for iso8601-formatted dates. infer_datetime_format : boolean, default False - If True and parse_dates is enabled, pandas will attempt to infer the format - of the datetime strings in the columns, and if it can be inferred, switch - to a faster method of parsing them. In some cases this can increase the - parsing speed by 5-10x. + If True and `parse_dates` is enabled, pandas will attempt to infer the + format of the datetime strings in the columns, and if it can be inferred, + switch to a faster method of parsing them. In some cases this can increase + the parsing speed by 5-10x. keep_date_col : boolean, default False - If True and parse_dates specifies combining multiple columns then + If True and `parse_dates` specifies combining multiple columns then keep the original columns. date_parser : function, default None Function to use for converting a sequence of string columns to an array of datetime instances. The default uses ``dateutil.parser.parser`` to do the - conversion. Pandas will try to call date_parser in three different ways, + conversion. Pandas will try to call `date_parser` in three different ways, advancing to the next if an exception occurs: 1) Pass one or more arrays - (as defined by parse_dates) as arguments; 2) concatenate (row-wise) the - string values from the columns defined by parse_dates into a single array - and pass that; and 3) call date_parser once for each row using one or more - strings (corresponding to the columns defined by parse_dates) as arguments. + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. dayfirst : boolean, default False DD/MM format dates, international and European format iterator : boolean, default False