diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4e8b4e3a6bec..b19391afc7c69 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7390,20 +7390,26 @@ def f(s): def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation='linear'): """ - Return values at the given quantile over requested axis. + Return value(s) at the given quantile over requested axis. + + This function returns the Series of 'q' quantile value(s) + from the DataFrame, dividing data points into groups + along `axis` axis + In case of insufficient number of data points for clean division + into groups, specify `interpolation` scheme to implement. Parameters ---------- - q : float or array-like, default 0.5 (50% quantile) - 0 <= q <= 1, the quantile(s) to compute - axis : {0, 1, 'index', 'columns'} (default 0) - 0 or 'index' for row-wise, 1 or 'columns' for column-wise - numeric_only : boolean, default True + q : float or array-like, default 0.5 + The quantile(s) to compute, + should be a float between 0 and 1 (inclusive), + 0.5 is equivalent to calculate 50% quantile value ie the median. + axis : {0 or 'index', 1 or 'columns'}, default 0 + For row-wise : 0 or'index', for column-wise : 1 or 'columns'. + numeric_only : bool, default True If False, the quantile of datetime and timedelta data will be - computed as well + computed as well. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - .. versionadded:: 0.18.0 - This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points `i` and `j`: @@ -7416,46 +7422,58 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Returns ------- - quantiles : Series or DataFrame - - - If ``q`` is an array, a DataFrame will be returned where the - index is ``q``, the columns are the columns of self, and the - values are the quantiles. - - If ``q`` is a float, a Series will be returned where the - index is the columns of self and the values are the quantiles. - - Examples - -------- - - >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), - columns=['a', 'b']) - >>> df.quantile(.1) - a 1.3 - b 3.7 - dtype: float64 - >>> df.quantile([.1, .5]) - a b - 0.1 1.3 3.7 - 0.5 2.5 55.0 - - Specifying `numeric_only=False` will also compute the quantile of - datetime and timedelta data. - - >>> df = pd.DataFrame({'A': [1, 2], - 'B': [pd.Timestamp('2010'), - pd.Timestamp('2011')], - 'C': [pd.Timedelta('1 days'), - pd.Timedelta('2 days')]}) - >>> df.quantile(0.5, numeric_only=False) - A 1.5 - B 2010-07-02 12:00:00 - C 1 days 12:00:00 - Name: 0.5, dtype: object + Series See Also -------- pandas.core.window.Rolling.quantile + Returns the rolling quantile for the DataFrame. numpy.percentile + Returns 'nth' percentile for numpy arrays. + + Examples + -------- + >>> import numpy as np + >>> d = {'animal':['Cheetah','Falcon','Eagle','Goose','Pigeon'], + ... 'class':['mammal','bird','bird','bird','bird'], + ... 'max_speed':[120,np.nan,320,142,150]} + >>> df = pd.DataFrame(data=d) + >>> df + animal class max_speed + 0 Cheetah mammal 120.0 + 1 Falcon bird NaN + 2 Eagle bird 320.0 + 3 Goose bird 142.0 + 4 Pigeon bird 150.0 + + The `max_speed` in sorted order:- + + >>> df['max_speed'].sort_values(ascending=False) + 2 320.0 + 4 150.0 + 3 142.0 + 0 120.0 + 1 NaN + Name: max_speed, dtype: float64 + + >>> df.quantile() + max_speed 146.0 + Name: 0.5, dtype: float64 + + The above was calculated by interpolating between available values. + + >>> df.quantile(q=[0.05,0.95]) + max_speed + 0.05 123.3 + 0.95 294.5 + >>> df.quantile(q=[0.05,0.95],interpolation="higher") + max_speed + 0.05 142.0 + 0.95 320.0 + >>> df.quantile(q=[0.05,0.95],interpolation="lower") + max_speed + 0.05 120.0 + 0.95 150.0 """ self._check_percentile(q)