From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/16] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From 1d0ac65ac20427872e53e1beb01c5dbe6e55da30 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 22 Aug 2019 22:36:15 +0200 Subject: [PATCH 02/16] Fix issue 16748 --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/plotting/_matplotlib/boxplot.py | 16 +++++++- pandas/tests/plotting/test_boxplot_method.py | 43 ++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4decc99087a9e..70fa1a13fdb3a 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -169,6 +169,7 @@ Plotting - Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`) - Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`) - Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`) +- Bug in :meth:`DataFrameGroupby.boxplot` when ``subplots=False``, a KeyError would raise (:issue:`16748`) Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 8ff7441df5354..0620226b20709 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -9,6 +9,7 @@ import pandas as pd +from pandas import IndexSlice, Index, MultiIndex from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib import converter from pandas.plotting._matplotlib.core import LinePlot, MPLPlot @@ -314,10 +315,21 @@ def plot_group(keys, values, ax): with plt.rc_context(rc): ax = plt.gca() data = data._get_numeric_data() + + # if columns is None, use all numeric columns of data; if data columns + # is multiIndex, which means a groupby has been applied before, select + # data using new grouped column names; if data columns is Index, select + # data simply using columns if columns is None: columns = data.columns - else: - data = data[columns] + elif isinstance(data.columns, MultiIndex): + + # reselect columns with after-groupby multi-index columns + data = data.loc[:, IndexSlice[:, columns]] + columns = data.columns + elif isinstance(data.columns, Index): + data = data.loc[:, columns] + columns = data.columns result = plot_group(columns, data.values.T, ax) ax.grid(grid) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 5bbaff580c356..92d867d9a7b0f 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -412,3 +412,46 @@ def test_fontsize(self): self._check_ticks_props( df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16 ) + + @pytest.mark.parametrize( + "col, expected_xticklabel", + [ + ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]), + ( + ["v", "v1"], + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), + ], + ) + def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): + # GH 16748 + df = DataFrame( + { + "cat": np.random.choice(list("abcde"), 100), + "v": np.random.rand(100), + "v1": np.random.rand(100), + } + ) + grouped = df.groupby("cat") + + # check is boxplot with subplots=False works + axes = _check_plot_works( + grouped.boxplot, subplots=False, column=col, return_type="axes" + ) + + # check if xticks labels are plotted correctly + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel From af41084f73db7e8e8519a57c308538a3d354b3fe Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 23 Aug 2019 08:41:09 +0200 Subject: [PATCH 03/16] Code change based on review --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/plotting/_matplotlib/boxplot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 70fa1a13fdb3a..a950bc58a4e08 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -169,7 +169,7 @@ Plotting - Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`) - Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`) - Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`) -- Bug in :meth:`DataFrameGroupby.boxplot` when ``subplots=False``, a KeyError would raise (:issue:`16748`) +- Bug in :meth:`DataFrame.groupby.boxplot` when ``subplots=False``, a KeyError would raise (:issue:`16748`) Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 0620226b20709..85a5ef2f047c6 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -317,7 +317,7 @@ def plot_group(keys, values, ax): data = data._get_numeric_data() # if columns is None, use all numeric columns of data; if data columns - # is multiIndex, which means a groupby has been applied before, select + # is MultiIndex, which means a Groupby has been applied before, select # data using new grouped column names; if data columns is Index, select # data simply using columns if columns is None: From 193eb2cc5ae713368536ae86abc98d6327270b2c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 23 Aug 2019 09:03:40 +0200 Subject: [PATCH 04/16] Fix import sort linting --- pandas/plotting/_matplotlib/boxplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 85a5ef2f047c6..6ebc8a616e367 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -8,8 +8,8 @@ from pandas.core.dtypes.missing import remove_na_arraylike import pandas as pd +from pandas import Index, IndexSlice, MultiIndex -from pandas import IndexSlice, Index, MultiIndex from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib import converter from pandas.plotting._matplotlib.core import LinePlot, MPLPlot From dfc72b2a3eed13e4aeab491f4a223d96ec5939da Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 24 Aug 2019 16:47:58 +0200 Subject: [PATCH 05/16] Skip the failing test --- pandas/tests/plotting/test_converter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 7001264c41c05..931347c40d730 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -28,6 +28,7 @@ pytest.importorskip("matplotlib.pyplot") +@pytest.mark.skip(reason="There is no warnings in some versions.") def test_initial_warning(): code = ( "import pandas as pd; import matplotlib.pyplot as plt; " From 5c69d10de4f7cb28d9db380befeb0a16cf04eab9 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 30 Aug 2019 21:27:25 +0200 Subject: [PATCH 06/16] Remove skip --- pandas/tests/plotting/test_converter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 931347c40d730..7001264c41c05 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -28,7 +28,6 @@ pytest.importorskip("matplotlib.pyplot") -@pytest.mark.skip(reason="There is no warnings in some versions.") def test_initial_warning(): code = ( "import pandas as pd; import matplotlib.pyplot as plt; " From c08c2787a80489b54e356ad903d4f10976c121cc Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 30 Aug 2019 22:09:32 +0200 Subject: [PATCH 07/16] remove imports --- pandas/plotting/_matplotlib/boxplot.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 6ebc8a616e367..3ecf8afce3cfb 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.missing import remove_na_arraylike import pandas as pd -from pandas import Index, IndexSlice, MultiIndex from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib import converter @@ -322,12 +321,12 @@ def plot_group(keys, values, ax): # data simply using columns if columns is None: columns = data.columns - elif isinstance(data.columns, MultiIndex): + elif isinstance(data.columns, pd.MultiIndex): # reselect columns with after-groupby multi-index columns - data = data.loc[:, IndexSlice[:, columns]] + data = data.loc[:, pd.IndexSlice[:, columns]] columns = data.columns - elif isinstance(data.columns, Index): + elif isinstance(data.columns, pd.Index): data = data.loc[:, columns] columns = data.columns From 1df91dac8fca61b747d823241aa124fa90c89b7e Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 31 Aug 2019 22:11:57 +0200 Subject: [PATCH 08/16] More careful change --- pandas/plotting/_matplotlib/boxplot.py | 21 ++++++++-------- pandas/tests/plotting/test_boxplot_method.py | 25 ++++++++++++++++++++ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 3ecf8afce3cfb..62c98a9ae0bc2 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -315,21 +315,20 @@ def plot_group(keys, values, ax): ax = plt.gca() data = data._get_numeric_data() - # if columns is None, use all numeric columns of data; if data columns - # is MultiIndex, which means a Groupby has been applied before, select - # data using new grouped column names; if data columns is Index, select - # data simply using columns + # if columns is None, use all numeric columns of data, so directly pass + # if the given 'column' are subset of column index, no matter if data column + # is multiindex or index, get the subset of data directly + # if given 'column' is not subset, and data columns is multiindex, and then + # query the columns if column contains at least one element from 'columns' if columns is None: - columns = data.columns + pass + elif set(column).issubset(data.columns): + data = data.loc[:, columns] elif isinstance(data.columns, pd.MultiIndex): - - # reselect columns with after-groupby multi-index columns - data = data.loc[:, pd.IndexSlice[:, columns]] - columns = data.columns - elif isinstance(data.columns, pd.Index): + columns = [col for col in data.columns if set(columns).intersection(col)] data = data.loc[:, columns] - columns = data.columns + columns = data.columns result = plot_group(columns, data.values.T, ax) ax.grid(grid) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 92d867d9a7b0f..0f8c436ab42b7 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -455,3 +455,28 @@ def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): # check if xticks labels are plotted correctly result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] assert expected_xticklabel == result_xticklabel + + @pytest.mark.parametrize( + "col, expected_xticklabel", + [ + ([("bar", "one"), ("bar", "two")], ["(bar, one)", "(bar, two)"]), + ("bar", ["(bar, one)", "(bar, two)"]), + (["two"], ["(bar, two)", "(baz, two)", "(foo, two)", "(qux, two)"]), + ], + ) + def test_boxplot_multiindex_column(self, col, expected_xticklabel): + # this is test the boxplot on multi-index column cases + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + # check if df.boxplot works + axes = _check_plot_works(df.boxplot, column=col, return_type="axes") + + # check if xticks labels are plotted correctly + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel From 9cce7f7ed28980dc0733e2830f86780e95f095d5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 9 Sep 2019 08:44:24 +0200 Subject: [PATCH 09/16] keep the change --- pandas/plotting/_matplotlib/boxplot.py | 25 ++++++++++++-------- pandas/tests/plotting/test_boxplot_method.py | 14 +++++++---- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 62c98a9ae0bc2..f0a8bd68c73cc 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -318,17 +318,22 @@ def plot_group(keys, values, ax): # if columns is None, use all numeric columns of data, so directly pass # if the given 'column' are subset of column index, no matter if data column # is multiindex or index, get the subset of data directly - # if given 'column' is not subset, and data columns is multiindex, and then - # query the columns if column contains at least one element from 'columns' + # if columns is None: + # pass + # elif isinstance(data.columns, pd.MultiIndex): + # + # # if multiindex columns are passed, then the inner level columns will be + # # called for subset + # data = data.loc[:, pd.IndexSlice[:, columns]] + # elif isinstance(data.columns, pd.Index): + # + # # if a normal column is passed, then select specified columns for subset + # data = data.loc[:, columns] if columns is None: - pass - elif set(column).issubset(data.columns): - data = data.loc[:, columns] - elif isinstance(data.columns, pd.MultiIndex): - columns = [col for col in data.columns if set(columns).intersection(col)] - data = data.loc[:, columns] - - columns = data.columns + columns = data.columns + else: + data = data[columns] +# columns = data.columns result = plot_group(columns, data.values.T, ax) ax.grid(grid) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 0f8c436ab42b7..4405d4ccfceb0 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -475,8 +475,14 @@ def test_boxplot_multiindex_column(self, col, expected_xticklabel): df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) # check if df.boxplot works - axes = _check_plot_works(df.boxplot, column=col, return_type="axes") + if col != "bar": + axes = _check_plot_works(df.boxplot, column=col, return_type="axes") - # check if xticks labels are plotted correctly - result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] - assert expected_xticklabel == result_xticklabel + # check if xticks labels are plotted correctly + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel + + else: + msg = "['bar'] not in index" + with pytest.raises(KeyError, match=msg): + _check_plot_works(df.boxplot, column=col, return_type="axes") From 2f99b6769f6e7941568b64891f206327c3d9016c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 19 Oct 2019 20:10:54 +0200 Subject: [PATCH 10/16] update solution --- pandas/plotting/_matplotlib/boxplot.py | 36 ++++++++++++-------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index bfc3b65a176ec..f8f8231404a4e 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -339,25 +339,23 @@ def plot_group(keys, values, ax): ax = plt.gca() data = data._get_numeric_data() - # if columns is None, use all numeric columns of data, so directly pass - # if the given 'column' are subset of column index, no matter if data column - # is multiindex or index, get the subset of data directly - # if columns is None: - # pass - # elif isinstance(data.columns, pd.MultiIndex): - # - # # if multiindex columns are passed, then the inner level columns will be - # # called for subset - # data = data.loc[:, pd.IndexSlice[:, columns]] - # elif isinstance(data.columns, pd.Index): - # - # # if a normal column is passed, then select specified columns for subset - # data = data.loc[:, columns] - if columns is None: - columns = data.columns - else: - data = data[columns] -# columns = data.columns + if columns: + + # this is to align the current behavior of boxplot on columns, so if users + # explicitly specify column names and it is in df columns, then take the + # subset; Or if it is columns is in the first level, this is set mainly to + # avoid API change + if set(columns).issubset(data.columns) or set(columns).issubset( + data.columns.levels[0] + ): + data = data[columns] + else: + + # this loop is set for groupby situation, because user specified column + # will go to the second level of column index + data = data.loc[:, pd.IndexSlice[:, columns]] + columns = data.columns + result = plot_group(columns, data.values.T, ax) ax.grid(grid) From 3819f85e31b10ca49b23a17ccb08e9796770a4ea Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 19 Oct 2019 20:26:41 +0200 Subject: [PATCH 11/16] update test --- pandas/tests/plotting/test_boxplot_method.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index eb4fb165c3f80..c3c418cf961eb 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -488,7 +488,7 @@ def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): "col, expected_xticklabel", [ ([("bar", "one"), ("bar", "two")], ["(bar, one)", "(bar, two)"]), - ("bar", ["(bar, one)", "(bar, two)"]), + ("bar", ["bar", None]), (["two"], ["(bar, two)", "(baz, two)", "(foo, two)", "(qux, two)"]), ], ) From 3cbba0fd807c7023308f7d5e92453097315d4fbd Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 20 Oct 2019 11:14:35 +0200 Subject: [PATCH 12/16] fix test --- pandas/plotting/_matplotlib/boxplot.py | 2 ++ pandas/tests/plotting/test_boxplot_method.py | 16 +++++----------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index f8f8231404a4e..c37ac3ca5d4c7 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -355,6 +355,8 @@ def plot_group(keys, values, ax): # will go to the second level of column index data = data.loc[:, pd.IndexSlice[:, columns]] columns = data.columns + else: + columns = data.columns result = plot_group(columns, data.values.T, ax) ax.grid(grid) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index c3c418cf961eb..1bbf89acfd724 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -488,7 +488,7 @@ def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): "col, expected_xticklabel", [ ([("bar", "one"), ("bar", "two")], ["(bar, one)", "(bar, two)"]), - ("bar", ["bar", None]), + ("bar", ["bar", ""]), (["two"], ["(bar, two)", "(baz, two)", "(foo, two)", "(qux, two)"]), ], ) @@ -503,14 +503,8 @@ def test_boxplot_multiindex_column(self, col, expected_xticklabel): df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) # check if df.boxplot works - if col != "bar": - axes = _check_plot_works(df.boxplot, column=col, return_type="axes") + axes = _check_plot_works(df.boxplot, column=col, return_type="axes") - # check if xticks labels are plotted correctly - result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] - assert expected_xticklabel == result_xticklabel - - else: - msg = "['bar'] not in index" - with pytest.raises(KeyError, match=msg): - _check_plot_works(df.boxplot, column=col, return_type="axes") + # check if xticks labels are plotted correctly + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel From a1d84b9fc78af1cf251eb9d1ccc3c99f228cfa2f Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 3 Oct 2020 11:56:02 +0200 Subject: [PATCH 13/16] format --- pandas/plotting/_matplotlib/boxplot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 09ecc34dd6c59..e1c48b8b12d69 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -356,7 +356,6 @@ def plot_group(keys, values, ax: "Axes"): with plt.rc_context(rc): ax = plt.gca() data = data._get_numeric_data() - if column is None: columns = data.columns else: From 4eecae8fe799f758ecc2d1870ae6b3a6658776d9 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 3 Oct 2020 11:56:41 +0200 Subject: [PATCH 14/16] typo --- pandas/plotting/_matplotlib/boxplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index e1c48b8b12d69..edf28a7449b54 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -356,7 +356,7 @@ def plot_group(keys, values, ax: "Axes"): with plt.rc_context(rc): ax = plt.gca() data = data._get_numeric_data() - if column is None: + if columns is None: columns = data.columns else: data = data[columns] From b6d1b4c30a3b83304416c20e3090c13d47ca536e Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 3 Oct 2020 12:12:33 +0200 Subject: [PATCH 15/16] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5e150a245e927..ab8e9f5055849 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -393,7 +393,7 @@ Plotting - Bug in :meth:`DataFrame.plot` was rotating xticklabels when ``subplots=True``, even if the x-axis wasn't an irregular time series (:issue:`29460`) - Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`) - Twinned axes were losing their tick labels which should only happen to all but the last row or column of 'externally' shared axes (:issue:`33819`) -- Bug in :meth:`DataFrame.groupby.boxplot` when ``subplots=False``, a KeyError would raise (:issue:`16748`) +- Bug in :meth:`DataFrameGroupBy.boxplot` when ``subplots=False``, a KeyError would raise (:issue:`16748`) Groupby/resample/rolling From 8ab2db3f17c108b0609d1b39ed20ee48b0589b1f Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 3 Oct 2020 13:24:44 +0200 Subject: [PATCH 16/16] commit one more --- pandas/plotting/_matplotlib/boxplot.py | 7 ++++--- pandas/tests/plotting/test_boxplot_method.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index edf28a7449b54..3d0e30f8b9234 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -445,9 +445,10 @@ def boxplot_frame_groupby( else: df = frames[0] - # GH 16748, DataFrameGroupby fails when subplots=False, and in this case, since - # `df` here becomes MI after groupby, so we need to couple the keys (grouped - # values) and column (original df column) together to search for subset to plot + # GH 16748, DataFrameGroupby fails when subplots=False and `column` argument + # is assigned, and in this case, since `df` here becomes MI after groupby, + # so we need to couple the keys (grouped values) and column (original df + # column) together to search for subset to plot if column is not None: column = com.convert_to_list_like(column) multi_key = pd.MultiIndex.from_product([keys, column]) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 60bf206353f04..28ca5f3dd8642 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -477,6 +477,21 @@ def test_fontsize(self): "(e, v1)", ], ), + ( + None, + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), ], ) def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel):