From 72d80821153e3922a64cf6dff5ea3a3da4bc5812 Mon Sep 17 00:00:00 2001 From: "brute4s99 [LAPTOP]" Date: Sun, 30 Sep 2018 13:13:14 +0530 Subject: [PATCH 1/6] In this commit - Added extended summary for the function corrected output of example 1 Added summary for **See Also** functions Some typo fixes . --- pandas/core/frame.py | 45 ++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4e8b4e3a6bec..698dc82d6ed12 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7390,20 +7390,23 @@ def f(s): def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation='linear'): """ - Return values at the given quantile over requested axis. + Return value(s) at the given quantile over requested axis. + + This function calculates the 'q' quantile values on the dataframe, + dividing data points into groups along `axis` axis. + In case of insufficient number of data points for clean division + into groups, specify `interpolation` scheme to implement. Parameters ---------- - q : float or array-like, default 0.5 (50% quantile) - 0 <= q <= 1, the quantile(s) to compute - axis : {0, 1, 'index', 'columns'} (default 0) - 0 or 'index' for row-wise, 1 or 'columns' for column-wise + q : float or array-like, default 0.5 (50% quantile) [0 <= q <= 1] + The quantile(s) to compute. + axis : boolean{0, 1, 'index', 'columns'} (default 0) + For row-wise : 0 or'index', for column-wise : 1 or 'columns'. numeric_only : boolean, default True If False, the quantile of datetime and timedelta data will be - computed as well + computed as well. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} - .. versionadded:: 0.18.0 - This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points `i` and `j`: @@ -7418,21 +7421,20 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, ------- quantiles : Series or DataFrame - - If ``q`` is an array, a DataFrame will be returned where the - index is ``q``, the columns are the columns of self, and the + - If `q` is an array, a DataFrame will be returned where the + index is `q`, the columns are the columns of self, and the values are the quantiles. - - If ``q`` is a float, a Series will be returned where the + - If `q` is a float, a Series will be returned where the index is the columns of self and the values are the quantiles. Examples -------- - - >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), - columns=['a', 'b']) + >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), \ + columns=['a', 'b']) >>> df.quantile(.1) a 1.3 b 3.7 - dtype: float64 + Name: 0.1, dtype: float64 >>> df.quantile([.1, .5]) a b 0.1 1.3 3.7 @@ -7441,11 +7443,12 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Specifying `numeric_only=False` will also compute the quantile of datetime and timedelta data. - >>> df = pd.DataFrame({'A': [1, 2], - 'B': [pd.Timestamp('2010'), - pd.Timestamp('2011')], - 'C': [pd.Timedelta('1 days'), - pd.Timedelta('2 days')]}) + >>> df = pd.DataFrame({ 'A': [1, 2], \ + 'B': [pd.Timestamp('2010'), \ + pd.Timestamp('2011')], \ + 'C': [pd.Timedelta('1 days'), \ + pd.Timedelta('2 days')]}) + >>> df.quantile(0.5, numeric_only=False) A 1.5 B 2010-07-02 12:00:00 @@ -7455,7 +7458,9 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, See Also -------- pandas.core.window.Rolling.quantile + Returns the rolling quantile. numpy.percentile + Returns percentile. """ self._check_percentile(q) From 6ad3c1d8cc13ece3b3eb4ced74e022985c9e24a9 Mon Sep 17 00:00:00 2001 From: "brute4s99 [LAPTOP]" Date: Sun, 30 Sep 2018 13:20:07 +0530 Subject: [PATCH 2/6] corrected PEP8 issue --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 698dc82d6ed12..7c0eb409a877f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7429,7 +7429,8 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Examples -------- - >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), \ + >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], \ + [3, 100], [4, 100]]), \ columns=['a', 'b']) >>> df.quantile(.1) a 1.3 From dce5a3e77a87c1f8b3f1fd4c3e7b6e35b4a7dbbd Mon Sep 17 00:00:00 2001 From: "brute4s99 [LAPTOP]" Date: Sun, 30 Sep 2018 14:02:15 +0530 Subject: [PATCH 3/6] fixed **Returns** section --- pandas/core/frame.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7c0eb409a877f..91d9ed284c763 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7419,8 +7419,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Returns ------- - quantiles : Series or DataFrame - + scalar, Series or DataFrame - If `q` is an array, a DataFrame will be returned where the index is `q`, the columns are the columns of self, and the values are the quantiles. @@ -7459,9 +7458,9 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, See Also -------- pandas.core.window.Rolling.quantile - Returns the rolling quantile. + Returns the rolling quantile for the DataFrame. numpy.percentile - Returns percentile. + Returns 'nth' percentile for the DataFrame. """ self._check_percentile(q) From 4910bbe9c081604059f91ea21ecbb54fdb609c1b Mon Sep 17 00:00:00 2001 From: "brute4s99 [LAPTOP]" Date: Mon, 1 Oct 2018 13:07:12 +0530 Subject: [PATCH 4/6] More fixes and better examples --- pandas/core/frame.py | 96 +++++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 91d9ed284c763..89bdc26f634dd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7392,18 +7392,21 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, """ Return value(s) at the given quantile over requested axis. - This function calculates the 'q' quantile values on the dataframe, - dividing data points into groups along `axis` axis. + This function returns the Series of 'q' quantile value(s) + from the DataFrame, dividing data points into groups + along `axis` axis. In case of insufficient number of data points for clean division into groups, specify `interpolation` scheme to implement. Parameters ---------- - q : float or array-like, default 0.5 (50% quantile) [0 <= q <= 1] - The quantile(s) to compute. - axis : boolean{0, 1, 'index', 'columns'} (default 0) + q : float or array-like, default 0.5 + The quantile(s) to compute (0 <= q <= 1) (0.5 == 50% quantile) + If float is passed as `q`, scalar quantile is returned + If `array-like` is passed as `q`, Series is returned. + axis : {0 or 'index', 1 or 'columns'}, default 0 For row-wise : 0 or'index', for column-wise : 1 or 'columns'. - numeric_only : boolean, default True + numeric_only : bool, default True If False, the quantile of datetime and timedelta data will be computed as well. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} @@ -7419,48 +7422,69 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Returns ------- - scalar, Series or DataFrame + Series or DataFrame - If `q` is an array, a DataFrame will be returned where the index is `q`, the columns are the columns of self, and the values are the quantiles. - If `q` is a float, a Series will be returned where the index is the columns of self and the values are the quantiles. - Examples - -------- - >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], \ - [3, 100], [4, 100]]), \ - columns=['a', 'b']) - >>> df.quantile(.1) - a 1.3 - b 3.7 - Name: 0.1, dtype: float64 - >>> df.quantile([.1, .5]) - a b - 0.1 1.3 3.7 - 0.5 2.5 55.0 - - Specifying `numeric_only=False` will also compute the quantile of - datetime and timedelta data. - - >>> df = pd.DataFrame({ 'A': [1, 2], \ - 'B': [pd.Timestamp('2010'), \ - pd.Timestamp('2011')], \ - 'C': [pd.Timedelta('1 days'), \ - pd.Timedelta('2 days')]}) - - >>> df.quantile(0.5, numeric_only=False) - A 1.5 - B 2010-07-02 12:00:00 - C 1 days 12:00:00 - Name: 0.5, dtype: object - See Also -------- pandas.core.window.Rolling.quantile Returns the rolling quantile for the DataFrame. numpy.percentile Returns 'nth' percentile for the DataFrame. + + Examples + -------- + >>> import pandas as pd + >>> d = {'Data': [416, 493, 423, 859, 32, 548,\ + 33, 951, 450, 1001, 998]} + >>> df = pd.DataFrame(data=d) + >>> df + Data + 0 416 + 1 493 + 2 423 + 3 859 + 4 32 + 5 548 + 6 33 + 7 951 + 8 450 + 9 1001 + 10 998 + >>> for i in sorted(df['Data'],reverse=True): print(i) + 1001 + 998 + 951 + 859 + 548 + 493 + 450 + 423 + 416 + 33 + 32 + >>> df.quantile() + Data 493.0 + Name: 0.5, dtype: float64 + >>> type(df.quantile()) + + >>> df.quantile(q=0.7) + Data 859.0 + Name: 0.7, dtype: float64 + >>> df.quantile(q=[0.5,0.7]) + Data + 0.5 493.0 + 0.7 859.0 + >>> df.quantile(q=[0.55],interpolation='higher') + Data + 0.55 548 + >>> df.quantile(q=[0.55],interpolation='lower') + Data + 0.55 493 """ self._check_percentile(q) From ef09ccc03cacdb1eef2212aacbc2d70611f0fd2c Mon Sep 17 00:00:00 2001 From: "brute4s99 [LAPTOP]" Date: Mon, 1 Oct 2018 13:08:48 +0530 Subject: [PATCH 5/6] . --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 89bdc26f634dd..a9dec3e089714 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7475,7 +7475,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, >>> df.quantile(q=0.7) Data 859.0 Name: 0.7, dtype: float64 - >>> df.quantile(q=[0.5,0.7]) + >>> df.quantile(q=[0.5, 0.7]) Data 0.5 493.0 0.7 859.0 From 39081e145b8b8aef3385ddecf5a33bb34cbbebfb Mon Sep 17 00:00:00 2001 From: "brute4s99 [LAPTOP]" Date: Mon, 8 Oct 2018 15:06:43 +0530 Subject: [PATCH 6/6] fixed --- pandas/core/frame.py | 97 ++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 54 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a9dec3e089714..b19391afc7c69 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7394,16 +7394,16 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, This function returns the Series of 'q' quantile value(s) from the DataFrame, dividing data points into groups - along `axis` axis. + along `axis` axis In case of insufficient number of data points for clean division into groups, specify `interpolation` scheme to implement. Parameters ---------- q : float or array-like, default 0.5 - The quantile(s) to compute (0 <= q <= 1) (0.5 == 50% quantile) - If float is passed as `q`, scalar quantile is returned - If `array-like` is passed as `q`, Series is returned. + The quantile(s) to compute, + should be a float between 0 and 1 (inclusive), + 0.5 is equivalent to calculate 50% quantile value ie the median. axis : {0 or 'index', 1 or 'columns'}, default 0 For row-wise : 0 or'index', for column-wise : 1 or 'columns'. numeric_only : bool, default True @@ -7422,69 +7422,58 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, Returns ------- - Series or DataFrame - - If `q` is an array, a DataFrame will be returned where the - index is `q`, the columns are the columns of self, and the - values are the quantiles. - - If `q` is a float, a Series will be returned where the - index is the columns of self and the values are the quantiles. + Series See Also -------- pandas.core.window.Rolling.quantile Returns the rolling quantile for the DataFrame. numpy.percentile - Returns 'nth' percentile for the DataFrame. + Returns 'nth' percentile for numpy arrays. Examples -------- - >>> import pandas as pd - >>> d = {'Data': [416, 493, 423, 859, 32, 548,\ - 33, 951, 450, 1001, 998]} + >>> import numpy as np + >>> d = {'animal':['Cheetah','Falcon','Eagle','Goose','Pigeon'], + ... 'class':['mammal','bird','bird','bird','bird'], + ... 'max_speed':[120,np.nan,320,142,150]} >>> df = pd.DataFrame(data=d) >>> df - Data - 0 416 - 1 493 - 2 423 - 3 859 - 4 32 - 5 548 - 6 33 - 7 951 - 8 450 - 9 1001 - 10 998 - >>> for i in sorted(df['Data'],reverse=True): print(i) - 1001 - 998 - 951 - 859 - 548 - 493 - 450 - 423 - 416 - 33 - 32 + animal class max_speed + 0 Cheetah mammal 120.0 + 1 Falcon bird NaN + 2 Eagle bird 320.0 + 3 Goose bird 142.0 + 4 Pigeon bird 150.0 + + The `max_speed` in sorted order:- + + >>> df['max_speed'].sort_values(ascending=False) + 2 320.0 + 4 150.0 + 3 142.0 + 0 120.0 + 1 NaN + Name: max_speed, dtype: float64 + >>> df.quantile() - Data 493.0 + max_speed 146.0 Name: 0.5, dtype: float64 - >>> type(df.quantile()) - - >>> df.quantile(q=0.7) - Data 859.0 - Name: 0.7, dtype: float64 - >>> df.quantile(q=[0.5, 0.7]) - Data - 0.5 493.0 - 0.7 859.0 - >>> df.quantile(q=[0.55],interpolation='higher') - Data - 0.55 548 - >>> df.quantile(q=[0.55],interpolation='lower') - Data - 0.55 493 + + The above was calculated by interpolating between available values. + + >>> df.quantile(q=[0.05,0.95]) + max_speed + 0.05 123.3 + 0.95 294.5 + >>> df.quantile(q=[0.05,0.95],interpolation="higher") + max_speed + 0.05 142.0 + 0.95 320.0 + >>> df.quantile(q=[0.05,0.95],interpolation="lower") + max_speed + 0.05 120.0 + 0.95 150.0 """ self._check_percentile(q)