From 7847116b167ab3a782eef5cccde31044f31b5136 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Sat, 31 Oct 2020 18:32:27 +0300 Subject: [PATCH 001/147] Moving the file test_frame.py to a new directory --- pandas/tests/plotting/frame/__init__.py | 0 pandas/tests/plotting/{ => frame}/test_frame.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/plotting/frame/__init__.py rename pandas/tests/plotting/{ => frame}/test_frame.py (100%) diff --git a/pandas/tests/plotting/frame/__init__.py b/pandas/tests/plotting/frame/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/frame/test_frame.py similarity index 100% rename from pandas/tests/plotting/test_frame.py rename to pandas/tests/plotting/frame/test_frame.py From 418551a12d569906325de6ab749212b2a5a214f1 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Sun, 1 Nov 2020 13:26:33 +0300 Subject: [PATCH 002/147] =?UTF-8?q?=D0=A1reated=20file=20test=5Fframe=5Fco?= =?UTF-8?q?lor.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/plotting/frame/test_frame_color.py | 3492 +++++++++++++++++ 1 file changed, 3492 insertions(+) create mode 100644 pandas/tests/plotting/frame/test_frame_color.py diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py new file mode 100644 index 0000000000000..d4d2256d209cf --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -0,0 +1,3492 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +from numpy.random import rand, randn +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + @pytest.mark.slow + def test_plot(self): + from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0 + + df = self.tdf + _check_plot_works(df.plot, grid=False) + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, subplots=True) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, subplots=True, use_index=False) + self._check_ticks_props(axes, xrot=0) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + if mpl_ge_3_1_0(): + msg = "'Line2D' object has no property 'blarg'" + else: + msg = "Unknown property blarg" + with pytest.raises(AttributeError, match=msg): + df.plot.line(blarg=True) + + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + ax = _check_plot_works(df.plot, use_index=True) + self._check_ticks_props(ax, xrot=0) + _check_plot_works(df.plot, sort_columns=False) + _check_plot_works(df.plot, yticks=[1, 5, 10]) + _check_plot_works(df.plot, xticks=[1, 5, 10]) + _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) + + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.plot, subplots=True, title="blah") + + # We have to redo it here because _check_plot_works does two plots, + # once without an ax kwarg and once with an ax kwarg and the new sharex + # behaviour does not remove the visibility of the latter axis (as ax is + # present). see: https://github.com/pandas-dev/pandas/issues/9737 + + axes = df.plot(subplots=True, title="blah") + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + # axes[0].figure.savefig("test.png") + for ax in axes[:2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible([ax.xaxis.get_label()], visible=False) + for ax in [axes[2]]: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible([ax.xaxis.get_label()]) + self._check_ticks_props(ax, xrot=0) + + _check_plot_works(df.plot, title="blah") + + tuples = zip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + ax = _check_plot_works(df.plot, use_index=True) + self._check_ticks_props(ax, xrot=0) + + # unicode + index = MultiIndex.from_tuples( + [ + ("\u03b1", 0), + ("\u03b1", 1), + ("\u03b2", 2), + ("\u03b2", 3), + ("\u03b3", 4), + ("\u03b3", 5), + ("\u03b4", 6), + ("\u03b4", 7), + ], + names=["i0", "i1"], + ) + columns = MultiIndex.from_tuples( + [("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"] + ) + df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) + _check_plot_works(df.plot, title="\u03A3") + + # GH 6951 + # Test with single column + df = DataFrame({"x": np.random.rand(10)}) + axes = _check_plot_works(df.plot.bar, subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + axes = _check_plot_works(df.plot.bar, subplots=True, layout=(-1, 1)) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + # When ax is supplied and required number of axes is 1, + # passed ax should be used: + fig, ax = self.plt.subplots() + axes = df.plot.bar(subplots=True, ax=ax) + assert len(axes) == 1 + result = ax.axes + assert result is axes[0] + + def test_integer_array_plot(self): + # GH 25587 + arr = integer_array([1, 2, 3, 4], dtype="UInt32") + + s = Series(arr) + _check_plot_works(s.plot.line) + _check_plot_works(s.plot.bar) + _check_plot_works(s.plot.hist) + _check_plot_works(s.plot.pie) + + df = DataFrame({"x": arr, "y": arr}) + _check_plot_works(df.plot.line) + _check_plot_works(df.plot.bar) + _check_plot_works(df.plot.hist) + _check_plot_works(df.plot.pie, y="y") + _check_plot_works(df.plot.scatter, x="x", y="y") + _check_plot_works(df.plot.hexbin, x="x", y="y") + + def test_mpl2_color_cycle_str(self): + # GH 15516 + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", "MatplotlibDeprecationWarning") + + for color in colors: + _check_plot_works(df.plot, color=color) + + # if warning is raised, check that it is the exact problematic one + # GH 36972 + if w: + match = "Support for uppercase single-letter colors is deprecated" + warning_message = str(w[0].message) + msg = "MatplotlibDeprecationWarning related to CN colors was raised" + assert match not in warning_message, msg + + def test_color_single_series_list(self): + # GH 3486 + df = DataFrame({"A": [1, 2, 3]}) + _check_plot_works(df.plot, color=["red"]) + + def test_rgb_tuple_color(self): + # GH 16695 + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) + + def test_color_empty_string(self): + df = DataFrame(randn(10, 2)) + with pytest.raises(ValueError): + df.plot(color="") + + def test_color_and_style_arguments(self): + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + # passing both 'color' and 'style' arguments should be allowed + # if there is no color symbol in the style strings: + ax = df.plot(color=["red", "black"], style=["-", "--"]) + # check that the linestyles are correctly set: + linestyle = [line.get_linestyle() for line in ax.lines] + assert linestyle == ["-", "--"] + # check that the colors are correctly set: + color = [line.get_color() for line in ax.lines] + assert color == ["red", "black"] + # passing both 'color' and 'style' arguments should not be allowed + # if there is a color symbol in the style strings: + with pytest.raises(ValueError): + df.plot(color=["red", "black"], style=["k-", "r--"]) + + @pytest.mark.parametrize( + "color, expected", + [ + ("green", ["green"] * 4), + (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]), + ], + ) + def test_color_and_marker(self, color, expected): + # GH 21003 + df = DataFrame(np.random.random((7, 4))) + ax = df.plot(color=color, style="d--") + # check colors + result = [i.get_color() for i in ax.lines] + assert result == expected + # check markers and linestyles + assert all(i.get_linestyle() == "--" for i in ax.lines) + assert all(i.get_marker() == "d" for i in ax.lines) + + def test_nonnumeric_exclude(self): + df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}) + ax = df.plot() + assert len(ax.get_lines()) == 1 # B was plotted + + @pytest.mark.slow + def test_implicit_label(self): + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + ax = df.plot(x="a", y="b") + self._check_text_labels(ax.xaxis.get_label(), "a") + + @pytest.mark.slow + def test_donot_overwrite_index_name(self): + # GH 8494 + df = DataFrame(randn(2, 2), columns=["a", "b"]) + df.index.name = "NAME" + df.plot(y="b", label="LABEL") + assert df.index.name == "NAME" + + @pytest.mark.slow + def test_plot_xy(self): + # columns.inferred_type == 'string' + df = self.tdf + self._check_data(df.plot(x=0, y=1), df.set_index("A")["B"].plot()) + self._check_data(df.plot(x=0), df.set_index("A").plot()) + self._check_data(df.plot(y=0), df.B.plot()) + self._check_data(df.plot(x="A", y="B"), df.set_index("A").B.plot()) + self._check_data(df.plot(x="A"), df.set_index("A").plot()) + self._check_data(df.plot(y="B"), df.B.plot()) + + # columns.inferred_type == 'integer' + df.columns = np.arange(1, len(df.columns) + 1) + self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) + self._check_data(df.plot(x=1), df.set_index(1).plot()) + self._check_data(df.plot(y=1), df[1].plot()) + + # figsize and title + ax = df.plot(x=1, y=2, title="Test", figsize=(16, 8)) + self._check_text_labels(ax.title, "Test") + self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16.0, 8.0)) + + # columns.inferred_type == 'mixed' + # TODO add MultiIndex test + + @pytest.mark.slow + @pytest.mark.parametrize( + "input_log, expected_log", [(True, "log"), ("sym", "symlog")] + ) + def test_logscales(self, input_log, expected_log): + df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) + + ax = df.plot(logy=input_log) + self._check_ax_scales(ax, yaxis=expected_log) + assert ax.get_yscale() == expected_log + + ax = df.plot(logx=input_log) + self._check_ax_scales(ax, xaxis=expected_log) + assert ax.get_xscale() == expected_log + + ax = df.plot(loglog=input_log) + self._check_ax_scales(ax, xaxis=expected_log, yaxis=expected_log) + assert ax.get_xscale() == expected_log + assert ax.get_yscale() == expected_log + + @pytest.mark.parametrize("input_param", ["logx", "logy", "loglog"]) + def test_invalid_logscale(self, input_param): + # GH: 24867 + df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) + + msg = "Boolean, None and 'sym' are valid options, 'sm' is given." + with pytest.raises(ValueError, match=msg): + df.plot(**{input_param: "sm"}) + + @pytest.mark.slow + def test_xcompat(self): + import pandas as pd + + df = self.tdf + ax = df.plot(x_compat=True) + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + self._check_ticks_props(ax, xrot=30) + + tm.close() + pd.plotting.plot_params["xaxis.compat"] = True + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + self._check_ticks_props(ax, xrot=30) + + tm.close() + pd.plotting.plot_params["x_compat"] = False + + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + + tm.close() + # useful if you're plotting a bunch together + with pd.plotting.plot_params.use("x_compat", True): + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + self._check_ticks_props(ax, xrot=30) + + tm.close() + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + self._check_ticks_props(ax, xrot=0) + + def test_period_compat(self): + # GH 9012 + # period-array conversions + df = DataFrame( + np.random.rand(21, 2), + index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)), + columns=["a", "b"], + ) + + df.plot() + self.plt.axhline(y=0) + tm.close() + + def test_unsorted_index(self): + df = DataFrame( + {"y": np.arange(100)}, index=np.arange(99, -1, -1), dtype=np.int64 + ) + ax = df.plot() + lines = ax.get_lines()[0] + rs = lines.get_xydata() + rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") + tm.assert_series_equal(rs, df.y, check_index_type=False) + tm.close() + + df.index = pd.Index(np.arange(99, -1, -1), dtype=np.float64) + ax = df.plot() + lines = ax.get_lines()[0] + rs = lines.get_xydata() + rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") + tm.assert_series_equal(rs, df.y) + + def test_unsorted_index_lims(self): + df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0]}, index=[1.0, 0.0, 3.0, 2.0]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame( + {"y": [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0]}, + index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0], + ) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0], "z": [91.0, 90.0, 93.0, 92.0]}) + ax = df.plot(x="z", y="y") + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + + def test_negative_log(self): + df = -DataFrame( + rand(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + with pytest.raises(ValueError): + df.plot.area(logy=True) + with pytest.raises(ValueError): + df.plot.area(loglog=True) + + def _compare_stacked_y_cood(self, normal_lines, stacked_lines): + base = np.zeros(len(normal_lines[0].get_data()[1])) + for nl, sl in zip(normal_lines, stacked_lines): + base += nl.get_data()[1] # get y coordinates + sy = sl.get_data()[1] + tm.assert_numpy_array_equal(base, sy) + + def test_line_area_stacked(self): + with tm.RNGContext(42): + df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) + neg_df = -df + # each column has either positive or negative value + sep_df = DataFrame( + {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} + ) + # each column has positive-negative mixed value + mixed_df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["w", "x", "y", "z"], + ) + + for kind in ["line", "area"]: + ax1 = _check_plot_works(df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines, ax2.lines) + + ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines, ax2.lines) + + ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2]) + self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:]) + + _check_plot_works(mixed_df.plot, stacked=False) + with pytest.raises(ValueError): + mixed_df.plot(stacked=True) + + # Use an index with strictly positive values, preventing + # matplotlib from warning about ignoring xlim + df2 = df.set_index(df.index + 1) + _check_plot_works(df2.plot, kind=kind, logx=True, stacked=True) + + def test_line_area_nan_df(self): + values1 = [1, 2, np.nan, 3] + values2 = [3, np.nan, 2, 1] + df = DataFrame({"a": values1, "b": values2}) + tdf = DataFrame({"a": values1, "b": values2}, index=tm.makeDateIndex(k=4)) + + for d in [df, tdf]: + ax = _check_plot_works(d.plot) + masked1 = ax.lines[0].get_ydata() + masked2 = ax.lines[1].get_ydata() + # remove nan for comparison purpose + + exp = np.array([1, 2, 3], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp) + + exp = np.array([3, 2, 1], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp) + tm.assert_numpy_array_equal( + masked1.mask, np.array([False, False, True, False]) + ) + tm.assert_numpy_array_equal( + masked2.mask, np.array([False, True, False, False]) + ) + + expected1 = np.array([1, 2, 0, 3], dtype=np.float64) + expected2 = np.array([3, 0, 2, 1], dtype=np.float64) + + ax = _check_plot_works(d.plot, stacked=True) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + + ax = _check_plot_works(d.plot.area) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + + ax = _check_plot_works(d.plot.area, stacked=False) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) + + def test_line_lim(self): + df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + ax = df.plot(secondary_y=True) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + axes = df.plot(secondary_y=True, subplots=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + for ax in axes: + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + def test_area_lim(self): + df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) + + neg_df = -df + for stacked in [True, False]: + ax = _check_plot_works(df.plot.area, stacked=stacked) + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + assert ymin == 0 + + ax = _check_plot_works(neg_df.plot.area, stacked=stacked) + ymin, ymax = ax.get_ylim() + assert ymax == 0 + + @pytest.mark.slow + def test_bar_colors(self): + import matplotlib.pyplot as plt + + default_colors = self._unpack_cycler(plt.rcParams) + + df = DataFrame(randn(5, 5)) + ax = df.plot.bar() + self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.bar(color=custom_colors) + self._check_colors(ax.patches[::5], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.bar(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.bar(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + tm.close() + + ax = df.plot(kind="bar", color="green") + self._check_colors(ax.patches[::5], facecolors=["green"] * 5) + tm.close() + + def test_bar_user_colors(self): + df = pd.DataFrame( + {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} + ) + # This should *only* work when `y` is specified, else + # we use one color per column + ax = df.plot.bar(y="A", color=df["color"]) + result = [p.get_facecolor() for p in ax.patches] + expected = [ + (1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (1.0, 0.0, 0.0, 1.0), + ] + assert result == expected + + @pytest.mark.slow + def test_bar_linewidth(self): + df = DataFrame(randn(5, 5)) + + # regular + ax = df.plot.bar(linewidth=2) + for r in ax.patches: + assert r.get_linewidth() == 2 + + # stacked + ax = df.plot.bar(stacked=True, linewidth=2) + for r in ax.patches: + assert r.get_linewidth() == 2 + + # subplots + axes = df.plot.bar(linewidth=2, subplots=True) + self._check_axes_shape(axes, axes_num=5, layout=(5, 1)) + for ax in axes: + for r in ax.patches: + assert r.get_linewidth() == 2 + + @pytest.mark.slow + def test_bar_barwidth(self): + df = DataFrame(randn(5, 5)) + + width = 0.9 + + # regular + ax = df.plot.bar(width=width) + for r in ax.patches: + assert r.get_width() == width / len(df.columns) + + # stacked + ax = df.plot.bar(stacked=True, width=width) + for r in ax.patches: + assert r.get_width() == width + + # horizontal regular + ax = df.plot.barh(width=width) + for r in ax.patches: + assert r.get_height() == width / len(df.columns) + + # horizontal stacked + ax = df.plot.barh(stacked=True, width=width) + for r in ax.patches: + assert r.get_height() == width + + # subplots + axes = df.plot.bar(width=width, subplots=True) + for ax in axes: + for r in ax.patches: + assert r.get_width() == width + + # horizontal subplots + axes = df.plot.barh(width=width, subplots=True) + for ax in axes: + for r in ax.patches: + assert r.get_height() == width + + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + + @pytest.mark.slow + def test_bar_bottom_left(self): + df = DataFrame(rand(5, 5)) + ax = df.plot.bar(stacked=False, bottom=1) + result = [p.get_y() for p in ax.patches] + assert result == [1] * 25 + + ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5]) + result = [p.get_y() for p in ax.patches[:5]] + assert result == [-1, -2, -3, -4, -5] + + ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) + result = [p.get_x() for p in ax.patches] + assert result == [1] * 25 + + ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5]) + result = [p.get_x() for p in ax.patches[:5]] + assert result == [1, 2, 3, 4, 5] + + axes = df.plot.bar(subplots=True, bottom=-1) + for ax in axes: + result = [p.get_y() for p in ax.patches] + assert result == [-1] * 5 + + axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1])) + for ax in axes: + result = [p.get_x() for p in ax.patches] + assert result == [1] * 5 + + @pytest.mark.slow + def test_bar_nan(self): + df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]}) + ax = df.plot.bar() + expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] + result = [p.get_height() for p in ax.patches] + assert result == expected + + ax = df.plot.bar(stacked=True) + result = [p.get_height() for p in ax.patches] + assert result == expected + + result = [p.get_y() for p in ax.patches] + expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] + assert result == expected + + @pytest.mark.slow + def test_bar_categorical(self): + # GH 13019 + df1 = pd.DataFrame( + np.random.randn(6, 5), + index=pd.Index(list("ABCDEF")), + columns=pd.Index(list("abcde")), + ) + # categorical index must behave the same + df2 = pd.DataFrame( + np.random.randn(6, 5), + index=pd.CategoricalIndex(list("ABCDEF")), + columns=pd.CategoricalIndex(list("abcde")), + ) + + for df in [df1, df2]: + ax = df.plot.bar() + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) + assert ax.get_xlim() == (-0.5, 5.5) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 5.15 + + ax = df.plot.bar(stacked=True) + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) + assert ax.get_xlim() == (-0.5, 5.5) + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 4.75 + + @pytest.mark.slow + def test_plot_scatter(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + _check_plot_works(df.plot.scatter, x="x", y="y") + _check_plot_works(df.plot.scatter, x=1, y=2) + + with pytest.raises(TypeError): + df.plot.scatter(x="x") + with pytest.raises(TypeError): + df.plot.scatter(y="y") + + # GH 6951 + axes = df.plot(x="x", y="y", kind="scatter", subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + def test_raise_error_on_datetime_time_data(self): + # GH 8113, datetime.time type is not supported by matplotlib in scatter + df = pd.DataFrame(np.random.randn(10), columns=["a"]) + df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time + msg = "must be a string or a number, not 'datetime.time'" + + with pytest.raises(TypeError, match=msg): + df.plot(kind="scatter", x="dtime", y="a") + + def test_scatterplot_datetime_data(self): + # GH 30391 + dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") + vals = np.random.normal(0, 1, len(dates)) + df = pd.DataFrame({"dates": dates, "vals": vals}) + + _check_plot_works(df.plot.scatter, x="dates", y="vals") + _check_plot_works(df.plot.scatter, x=0, y=1) + + def test_scatterplot_object_data(self): + # GH 18755 + df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + + _check_plot_works(df.plot.scatter, x="a", y="b") + _check_plot_works(df.plot.scatter, x=0, y=1) + + df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + + _check_plot_works(df.plot.scatter, x="a", y="b") + _check_plot_works(df.plot.scatter, x=0, y=1) + + @pytest.mark.slow + def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): + # addressing issue #10611, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax1 = df.plot.scatter(x="A label", y="B label") + ax2 = df.plot.scatter(x="A label", y="B label", c="C label") + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] + assert vis1 == vis2 + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] + assert vis1 == vis2 + + assert ( + ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() + ) + + @pytest.mark.slow + def test_if_hexbin_xaxis_label_is_visible(self): + # addressing issue #10678, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax = df.plot.hexbin("A label", "B label", gridsize=12) + assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) + assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) + assert ax.xaxis.get_label().get_visible() + + @pytest.mark.slow + def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): + import matplotlib.pyplot as plt + + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + + fig, axes = plt.subplots(1, 2) + df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) + df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) + plt.tight_layout() + + points = np.array([ax.get_position().get_points() for ax in fig.axes]) + axes_x_coords = points[:, :, 0] + parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] + colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] + assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() + + @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) + @pytest.mark.slow + def test_plot_scatter_with_categorical_data(self, x, y): + # after fixing GH 18755, should be able to plot categorical data + df = pd.DataFrame( + {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} + ) + + _check_plot_works(df.plot.scatter, x=x, y=y) + + @pytest.mark.slow + def test_plot_scatter_with_c(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + axes = [df.plot.scatter(x="x", y="y", c="z"), df.plot.scatter(x=0, y=1, c=2)] + for ax in axes: + # default to Greys + assert ax.collections[0].cmap.name == "Greys" + + # n.b. there appears to be no public method + # to get the colorbar label + assert ax.collections[0].colorbar._label == "z" + + cm = "cubehelix" + ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm) + assert ax.collections[0].cmap.name == cm + + # verify turning off colorbar works + ax = df.plot.scatter(x="x", y="y", c="z", colorbar=False) + assert ax.collections[0].colorbar is None + + # verify that we can still plot a solid color + ax = df.plot.scatter(x=0, y=1, c="red") + assert ax.collections[0].colorbar is None + self._check_colors(ax.collections, facecolors=["r"]) + + # Ensure that we can pass an np.array straight through to matplotlib, + # this functionality was accidentally removed previously. + # See https://github.com/pandas-dev/pandas/issues/8852 for bug report + # + # Exercise colormap path and non-colormap path as they are independent + # + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + red_rgba = [1.0, 0.0, 0.0, 1.0] + green_rgba = [0.0, 1.0, 0.0, 1.0] + rgba_array = np.array([red_rgba, green_rgba]) + ax = df.plot.scatter(x="A", y="B", c=rgba_array) + # expect the face colors of the points in the non-colormap path to be + # identical to the values we supplied, normally we'd be on shaky ground + # comparing floats for equality but here we expect them to be + # identical. + tm.assert_numpy_array_equal(ax.collections[0].get_facecolor(), rgba_array) + # we don't test the colors of the faces in this next plot because they + # are dependent on the spring colormap, which may change its colors + # later. + float_array = np.array([0.0, 1.0]) + df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") + + @pytest.mark.parametrize("cmap", [None, "Greys"]) + def test_scatter_with_c_column_name_with_colors(self, cmap): + # https://github.com/pandas-dev/pandas/issues/34316 + df = pd.DataFrame( + [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], + columns=["length", "width"], + ) + df["species"] = ["r", "r", "g", "g", "b"] + ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) + assert ax.collections[0].colorbar is None + + def test_plot_scatter_with_s(self): + # this refers to GH 32904 + df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) + + ax = df.plot.scatter(x="a", y="b", s="c") + tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) + + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + + @pytest.mark.slow + def test_plot_bar(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + + _check_plot_works(df.plot.bar) + _check_plot_works(df.plot.bar, legend=False) + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.plot.bar, subplots=True) + _check_plot_works(df.plot.bar, stacked=True) + + df = DataFrame( + randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) + ) + _check_plot_works(df.plot.bar) + + df = DataFrame({"a": [0, 1], "b": [1, 0]}) + ax = _check_plot_works(df.plot.bar) + self._check_ticks_props(ax, xrot=90) + + ax = df.plot.bar(rot=35, fontsize=10) + self._check_ticks_props(ax, xrot=35, xlabelsize=10, ylabelsize=10) + + ax = _check_plot_works(df.plot.barh) + self._check_ticks_props(ax, yrot=0) + + ax = df.plot.barh(rot=55, fontsize=11) + self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) + + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_boxplot(self): + df = self.hist_df + series = df["height"] + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + + ax = _check_plot_works(df.plot.box) + self._check_text_labels(ax.get_xticklabels(), labels) + tm.assert_numpy_array_equal( + ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1) + ) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + tm.close() + + axes = series.plot.box(rot=40) + self._check_ticks_props(axes, xrot=40, yrot=0) + tm.close() + + ax = _check_plot_works(series.plot.box) + + positions = np.array([1, 6, 7]) + ax = df.plot.box(positions=positions) + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + self._check_text_labels(ax.get_xticklabels(), labels) + tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + @pytest.mark.slow + def test_boxplot_vertical(self): + df = self.hist_df + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + + # if horizontal, yticklabels are rotated + ax = df.plot.box(rot=50, fontsize=8, vert=False) + self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8) + self._check_text_labels(ax.get_yticklabels(), labels) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot.box, subplots=True, vert=False, logx=True) + self._check_axes_shape(axes, axes_num=3, layout=(1, 3)) + self._check_ax_scales(axes, xaxis="log") + for ax, label in zip(axes, labels): + self._check_text_labels(ax.get_yticklabels(), [label]) + assert len(ax.lines) == self.bp_n_objects + + positions = np.array([3, 2, 8]) + ax = df.plot.box(positions=positions, vert=False) + self._check_text_labels(ax.get_yticklabels(), labels) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + @pytest.mark.slow + def test_boxplot_return_type(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + with pytest.raises(ValueError): + df.plot.box(return_type="NOTATYPE") + + result = df.plot.box(return_type="dict") + self._check_box_return_type(result, "dict") + + result = df.plot.box(return_type="axes") + self._check_box_return_type(result, "axes") + + result = df.plot.box() # default axes + self._check_box_return_type(result, "axes") + + result = df.plot.box(return_type="both") + self._check_box_return_type(result, "both") + + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_df(self): + df = DataFrame(randn(100, 4)) + ax = _check_plot_works(df.plot, kind="kde") + expected = [pprint_thing(c) for c in df.columns] + self._check_legend_labels(ax, labels=expected) + self._check_ticks_props(ax, xrot=0) + + ax = df.plot(kind="kde", rot=20, fontsize=5) + self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, kind="kde", subplots=True) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = df.plot(kind="kde", logy=True, subplots=True) + self._check_ax_scales(axes, yaxis="log") + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_missing_vals(self): + df = DataFrame(np.random.uniform(size=(100, 4))) + df.loc[0, 0] = np.nan + _check_plot_works(df.plot, kind="kde") + + @pytest.mark.slow + def test_hist_df(self): + from matplotlib.patches import Rectangle + + df = DataFrame(randn(100, 4)) + series = df[0] + + ax = _check_plot_works(df.plot.hist) + expected = [pprint_thing(c) for c in df.columns] + self._check_legend_labels(ax, labels=expected) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot.hist, subplots=True, logy=True) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + self._check_ax_scales(axes, yaxis="log") + + axes = series.plot.hist(rot=40) + self._check_ticks_props(axes, xrot=40, yrot=0) + tm.close() + + ax = series.plot.hist(cumulative=True, bins=4, density=True) + # height of last bin (index 5) must be 1.0 + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + tm.assert_almost_equal(rects[-1].get_height(), 1.0) + tm.close() + + ax = series.plot.hist(cumulative=True, bins=4) + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + + tm.assert_almost_equal(rects[-2].get_height(), 100.0) + tm.close() + + # if horizontal, yticklabels are rotated + axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal") + self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8) + + @pytest.mark.parametrize( + "weights", [0.1 * np.ones(shape=(100,)), 0.1 * np.ones(shape=(100, 2))] + ) + def test_hist_weights(self, weights): + # GH 33173 + np.random.seed(0) + df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + + ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) + ax2 = _check_plot_works(df.plot, kind="hist") + + patch_height_with_weights = [patch.get_height() for patch in ax1.patches] + + # original heights with no weights, and we manually multiply with example + # weights, so after multiplication, they should be almost same + expected_patch_height = [0.1 * patch.get_height() for patch in ax2.patches] + + tm.assert_almost_equal(patch_height_with_weights, expected_patch_height) + + def _check_box_coord( + self, + patches, + expected_y=None, + expected_h=None, + expected_x=None, + expected_w=None, + ): + result_y = np.array([p.get_y() for p in patches]) + result_height = np.array([p.get_height() for p in patches]) + result_x = np.array([p.get_x() for p in patches]) + result_width = np.array([p.get_width() for p in patches]) + # dtype is depending on above values, no need to check + + if expected_y is not None: + tm.assert_numpy_array_equal(result_y, expected_y, check_dtype=False) + if expected_h is not None: + tm.assert_numpy_array_equal(result_height, expected_h, check_dtype=False) + if expected_x is not None: + tm.assert_numpy_array_equal(result_x, expected_x, check_dtype=False) + if expected_w is not None: + tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False) + + @pytest.mark.slow + def test_hist_df_coord(self): + normal_df = DataFrame( + { + "A": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])), + "B": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([8, 8, 8, 8, 8])), + "C": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])), + }, + columns=["A", "B", "C"], + ) + + nan_df = DataFrame( + { + "A": np.repeat( + np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6]) + ), + "B": np.repeat( + np.array([1, np.nan, 2, 3, 4, 5]), np.array([8, 3, 8, 8, 8, 8]) + ), + "C": np.repeat( + np.array([1, 2, 3, np.nan, 4, 5]), np.array([6, 7, 8, 3, 9, 10]) + ), + }, + columns=["A", "B", "C"], + ) + + for df in [normal_df, nan_df]: + ax = df.plot.hist(bins=5) + self._check_box_coord( + ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + ax = df.plot.hist(bins=5, stacked=True) + self._check_box_coord( + ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_y=np.array([10, 9, 8, 7, 6]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_y=np.array([18, 17, 16, 15, 14]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + axes = df.plot.hist(bins=5, stacked=True, subplots=True) + self._check_box_coord( + axes[0].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + axes[1].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + axes[2].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + # horizontal + ax = df.plot.hist(bins=5, orientation="horizontal") + self._check_box_coord( + ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + ax = df.plot.hist(bins=5, stacked=True, orientation="horizontal") + self._check_box_coord( + ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_x=np.array([10, 9, 8, 7, 6]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_x=np.array([18, 17, 16, 15, 14]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + axes = df.plot.hist( + bins=5, stacked=True, subplots=True, orientation="horizontal" + ) + self._check_box_coord( + axes[0].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + axes[1].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + axes[2].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + @pytest.mark.slow + def test_plot_int_columns(self): + df = DataFrame(randn(100, 4)).cumsum() + _check_plot_works(df.plot, legend=True) + + @pytest.mark.slow + def test_df_legend_labels(self): + kinds = ["line", "bar", "barh", "kde", "area", "hist"] + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) + + for kind in kinds: + + ax = df.plot(kind=kind, legend=True) + self._check_legend_labels(ax, labels=df.columns) + + ax = df2.plot(kind=kind, legend=False, ax=ax) + self._check_legend_labels(ax, labels=df.columns) + + ax = df3.plot(kind=kind, legend=True, ax=ax) + self._check_legend_labels(ax, labels=df.columns.union(df3.columns)) + + ax = df4.plot(kind=kind, legend="reverse", ax=ax) + expected = list(df.columns.union(df3.columns)) + list(reversed(df4.columns)) + self._check_legend_labels(ax, labels=expected) + + # Secondary Y + ax = df.plot(legend=True, secondary_y="b") + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df2.plot(legend=False, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df3.plot(kind="bar", legend=True, secondary_y="h", ax=ax) + self._check_legend_labels( + ax, labels=["a", "b (right)", "c", "g", "h (right)", "i"] + ) + + # Time Series + ind = date_range("1/1/2014", periods=3) + df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) + ax = df.plot(legend=True, secondary_y="b") + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df2.plot(legend=False, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df3.plot(legend=True, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c", "g", "h", "i"]) + + # scatter + ax = df.plot.scatter(x="a", y="b", label="data1") + self._check_legend_labels(ax, labels=["data1"]) + ax = df2.plot.scatter(x="d", y="e", legend=False, label="data2", ax=ax) + self._check_legend_labels(ax, labels=["data1"]) + ax = df3.plot.scatter(x="g", y="h", label="data3", ax=ax) + self._check_legend_labels(ax, labels=["data1", "data3"]) + + # ensure label args pass through and + # index name does not mutate + # column names don't mutate + df5 = df.set_index("a") + ax = df5.plot(y="b") + self._check_legend_labels(ax, labels=["b"]) + ax = df5.plot(y="b", label="LABEL_b") + self._check_legend_labels(ax, labels=["LABEL_b"]) + self._check_text_labels(ax.xaxis.get_label(), "a") + ax = df5.plot(y="c", label="LABEL_c", ax=ax) + self._check_legend_labels(ax, labels=["LABEL_b", "LABEL_c"]) + assert df5.columns.tolist() == ["b", "c"] + + def test_missing_marker_multi_plots_on_same_ax(self): + # GH 18222 + df = pd.DataFrame( + data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] + ) + fig, ax = self.plt.subplots(nrows=1, ncols=3) + # Left plot + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[0]) + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[0]) + self._check_legend_labels(ax[0], labels=["r", "g", "b"]) + self._check_legend_marker(ax[0], expected_markers=["o", "x", "o"]) + # Center plot + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[1]) + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[1]) + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[1]) + self._check_legend_labels(ax[1], labels=["b", "r", "g"]) + self._check_legend_marker(ax[1], expected_markers=["o", "o", "x"]) + # Right plot + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[2]) + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[2]) + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[2]) + self._check_legend_labels(ax[2], labels=["g", "b", "r"]) + self._check_legend_marker(ax[2], expected_markers=["x", "o", "o"]) + + def test_legend_name(self): + multi = DataFrame( + randn(4, 4), + columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], + ) + multi.columns.names = ["group", "individual"] + + ax = multi.plot() + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + df = DataFrame(randn(5, 5)) + ax = df.plot(legend=True, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + df.columns.name = "new" + ax = df.plot(legend=False, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + ax = df.plot(legend=True, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "new") + + @pytest.mark.slow + def test_no_legend(self): + kinds = ["line", "bar", "barh", "kde", "area", "hist"] + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + + for kind in kinds: + + ax = df.plot(kind=kind, legend=False) + self._check_legend_labels(ax, visible=False) + + @pytest.mark.slow + def test_style_by_column(self): + import matplotlib.pyplot as plt + + fig = plt.gcf() + + df = DataFrame(randn(100, 3)) + for markers in [ + {0: "^", 1: "+", 2: "o"}, + {0: "^", 1: "+"}, + ["^", "+", "o"], + ["^", "+"], + ]: + fig.clf() + fig.add_subplot(111) + ax = df.plot(style=markers) + for i, l in enumerate(ax.get_lines()[: len(markers)]): + assert l.get_marker() == markers[i] + + @pytest.mark.slow + def test_line_label_none(self): + s = Series([1, 2]) + ax = s.plot() + assert ax.get_legend() is None + + ax = s.plot(legend=True) + assert ax.get_legend().get_texts()[0].get_text() == "None" + + @pytest.mark.slow + def test_line_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(randn(5, 5)) + + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + + tm.close() + + ax2 = df.plot(color=custom_colors) + lines2 = ax2.get_lines() + + for l1, l2 in zip(ax.get_lines(), lines2): + assert l1.get_color() == l2.get_color() + + tm.close() + + ax = df.plot(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + ax = df.loc[:, [0]].plot(color="DodgerBlue") + self._check_colors(ax.lines, linecolors=["DodgerBlue"]) + + ax = df.plot(color="red") + self._check_colors(ax.get_lines(), linecolors=["red"] * 5) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + @pytest.mark.slow + def test_dont_modify_colors(self): + colors = ["r", "g", "b"] + pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + assert len(colors) == 3 + + @pytest.mark.slow + def test_line_colors_and_styles_subplots(self): + # GH 9894 + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + + axes = df.plot(subplots=True) + for ax, c in zip(axes, list(default_colors)): + c = [c] + self._check_colors(ax.get_lines(), linecolors=c) + tm.close() + + # single color char + axes = df.plot(subplots=True, color="k") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(subplots=True, color="green") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["green"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + axes = df.plot(color=list(custom_colors), subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_area_colors(self): + from matplotlib import cm + from matplotlib.collections import PolyCollection + + custom_colors = "rgcby" + df = DataFrame(rand(5, 5)) + + ax = df.plot.area(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=custom_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=custom_colors) + + for h in handles: + assert h.get_alpha() is None + tm.close() + + ax = df.plot.area(colormap="jet") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=jet_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=jet_colors) + for h in handles: + assert h.get_alpha() is None + tm.close() + + # When stacked=False, alpha is set to 0.5 + ax = df.plot.area(colormap=cm.jet, stacked=False) + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] + self._check_colors(poly, facecolors=jet_with_alpha) + + handles, labels = ax.get_legend_handles_labels() + linecolors = jet_with_alpha + self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) + for h in handles: + assert h.get_alpha() == 0.5 + + @pytest.mark.slow + def test_hist_colors(self): + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + ax = df.plot.hist() + self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.hist(color=custom_colors) + self._check_colors(ax.patches[::10], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.hist(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.hist(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + + ax = df.plot(kind="hist", color="green") + self._check_colors(ax.patches[::10], facecolors=["green"] * 5) + tm.close() + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(rand(5, 5)) + + ax = df.plot.kde(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + ax = df.plot.kde(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot.kde(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors_and_styles_subplots(self): + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + + axes = df.plot(kind="kde", subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(kind="kde", color="k", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(kind="kde", color="red", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["red"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(kind="kde", color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(kind="kde", colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(kind="kde", style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(kind="kde", style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_boxplot_colors(self): + def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): + # TODO: outside this func? + if fliers_c is None: + fliers_c = "k" + self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) + self._check_colors( + bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) + ) + self._check_colors( + bp["medians"], linecolors=[medians_c] * len(bp["medians"]) + ) + self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) + self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + bp = df.plot.box(return_type="dict") + _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) + tm.close() + + dict_colors = dict( + boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456" + ) + bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") + _check_colors( + bp, + dict_colors["boxes"], + dict_colors["whiskers"], + dict_colors["medians"], + dict_colors["caps"], + "r", + ) + tm.close() + + # partial colors + dict_colors = dict(whiskers="c", medians="m") + bp = df.plot.box(color=dict_colors, return_type="dict") + _check_colors(bp, default_colors[0], "c", "m") + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + bp = df.plot.box(colormap="jet", return_type="dict") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # Test colormap functionality + bp = df.plot.box(colormap=cm.jet, return_type="dict") + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # string color is applied to all artists except fliers + bp = df.plot.box(color="DodgerBlue", return_type="dict") + _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") + + # tuple is also applied to all artists except fliers + bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") + _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") + + with pytest.raises(ValueError): + # Color contains invalid key results in ValueError + df.plot.box(color=dict(boxes="red", xxxx="blue")) + + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + + def test_default_color_cycle(self): + import cycler + import matplotlib.pyplot as plt + + colors = list("rgbk") + plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) + + df = DataFrame(randn(5, 3)) + ax = df.plot() + + expected = self._unpack_cycler(plt.rcParams)[:3] + self._check_colors(ax.get_lines(), linecolors=expected) + + def test_unordered_ts(self): + df = DataFrame( + np.array([3.0, 2.0, 1.0]), + index=[date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)], + columns=["test"], + ) + ax = df.plot() + xticks = ax.lines[0].get_xdata() + assert xticks[0] < xticks[1] + ydata = ax.lines[0].get_ydata() + tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0])) + + @td.skip_if_no_scipy + def test_kind_both_ways(self): + df = DataFrame({"x": [1, 2, 3]}) + for kind in plotting.PlotAccessor._common_kinds: + + df.plot(kind=kind) + getattr(df.plot, kind)() + for kind in ["scatter", "hexbin"]: + df.plot("x", "x", kind=kind) + getattr(df.plot, kind)("x", "x") + + def test_all_invalid_plot_data(self): + df = DataFrame(list("abcd")) + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df.plot(kind=kind) + + @pytest.mark.slow + def test_partially_invalid_plot_data(self): + with tm.RNGContext(42): + df = DataFrame(randn(10, 2), dtype=object) + df[np.random.rand(df.shape[0]) > 0.5] = "a" + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df.plot(kind=kind) + + with tm.RNGContext(42): + # area plot doesn't support positive/negative mixed data + kinds = ["area"] + df = DataFrame(rand(10, 2), dtype=object) + df[np.random.rand(df.shape[0]) > 0.5] = "a" + for kind in kinds: + with pytest.raises(TypeError): + df.plot(kind=kind) + + def test_invalid_kind(self): + df = DataFrame(randn(10, 2)) + with pytest.raises(ValueError): + df.plot(kind="aasdf") + + @pytest.mark.parametrize( + "x,y,lbl", + [ + (["B", "C"], "A", "a"), + (["A"], ["B", "C"], ["b", "c"]), + ("A", ["B", "C"], "badlabel"), + ], + ) + def test_invalid_xy_args(self, x, y, lbl): + # GH 18671, 19699 allows y to be list-like but not x + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + with pytest.raises(ValueError): + df.plot(x=x, y=y, label=lbl) + + @pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")]) + def test_invalid_xy_args_dup_cols(self, x, y): + # GH 18671, 19699 allows y to be list-like but not x + df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB")) + with pytest.raises(ValueError): + df.plot(x=x, y=y) + + @pytest.mark.parametrize( + "x,y,lbl,colors", + [ + ("A", ["B"], ["b"], ["red"]), + ("A", ["B", "C"], ["b", "c"], ["red", "blue"]), + (0, [1, 2], ["bokeh", "cython"], ["green", "yellow"]), + ], + ) + def test_y_listlike(self, x, y, lbl, colors): + # GH 19699: tests list-like y and verifies lbls & colors + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + _check_plot_works(df.plot, x="A", y=y, label=lbl) + + ax = df.plot(x=x, y=y, label=lbl, color=colors) + assert len(ax.lines) == len(y) + self._check_colors(ax.get_lines(), linecolors=colors) + + @pytest.mark.parametrize("x,y,colnames", [(0, 1, ["A", "B"]), (1, 0, [0, 1])]) + def test_xy_args_integer(self, x, y, colnames): + # GH 20056: tests integer args for xy and checks col names + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.columns = colnames + _check_plot_works(df.plot, x=x, y=y) + + @pytest.mark.slow + def test_hexbin_basic(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", gridsize=10) + # TODO: need better way to test. This just does existence. + assert len(ax.collections) == 1 + + # GH 6951 + axes = df.plot.hexbin(x="A", y="B", subplots=True) + # hexbin should have 2 axes in the figure, 1 for plotting and another + # is colorbar + assert len(axes[0].figure.axes) == 2 + # return value is single axes + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_hexbin_with_c(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", C="C") + assert len(ax.collections) == 1 + + ax = df.plot.hexbin(x="A", y="B", C="C", reduce_C_function=np.std) + assert len(ax.collections) == 1 + + @pytest.mark.slow + def test_hexbin_cmap(self): + df = self.hexbin_df + + # Default to BuGn + ax = df.plot.hexbin(x="A", y="B") + assert ax.collections[0].cmap.name == "BuGn" + + cm = "cubehelix" + ax = df.plot.hexbin(x="A", y="B", colormap=cm) + assert ax.collections[0].cmap.name == cm + + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + @pytest.mark.slow + def test_allow_cmap(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", cmap="YlGn") + assert ax.collections[0].cmap.name == "YlGn" + + with pytest.raises(TypeError): + df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") + + @pytest.mark.slow + def test_pie_df(self): + df = DataFrame( + np.random.rand(5, 3), + columns=["X", "Y", "Z"], + index=["a", "b", "c", "d", "e"], + ) + with pytest.raises(ValueError): + df.plot.pie() + + ax = _check_plot_works(df.plot.pie, y="Y") + self._check_text_labels(ax.texts, df.index) + + ax = _check_plot_works(df.plot.pie, y=2) + self._check_text_labels(ax.texts, df.index) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot.pie, subplots=True) + assert len(axes) == len(df.columns) + for ax in axes: + self._check_text_labels(ax.texts, df.index) + for ax, ylabel in zip(axes, df.columns): + assert ax.get_ylabel() == ylabel + + labels = ["A", "B", "C", "D", "E"] + color_args = ["r", "g", "b", "c", "m"] + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.plot.pie, subplots=True, labels=labels, colors=color_args + ) + assert len(axes) == len(df.columns) + + for ax in axes: + self._check_text_labels(ax.texts, labels) + self._check_colors(ax.patches, facecolors=color_args) + + def test_pie_df_nan(self): + df = DataFrame(np.random.rand(4, 4)) + for i in range(4): + df.iloc[i, i] = np.nan + fig, axes = self.plt.subplots(ncols=4) + df.plot.pie(subplots=True, ax=axes, legend=True) + + base_expected = ["0", "1", "2", "3"] + for i, ax in enumerate(axes): + expected = list(base_expected) # force copy + expected[i] = "" + result = [x.get_text() for x in ax.texts] + assert result == expected + # legend labels + # NaN's not included in legend with subplots + # see https://github.com/pandas-dev/pandas/issues/8390 + assert [x.get_text() for x in ax.get_legend().get_texts()] == base_expected[ + :i + ] + base_expected[i + 1 :] + + @pytest.mark.slow + def test_errorbar_plot(self): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + + # check line plots + ax = _check_plot_works(df.plot, yerr=df_err, logy=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works( + (df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True + ) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + # yerr is raw error values + ax = _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + # yerr is column name + for yerr in ["yerr", "誤差"]: + s_df = df.copy() + s_df[yerr] = np.ones(12) * 0.2 + + ax = _check_plot_works(s_df.plot, yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(s_df.plot, y="y", x="x", yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + with pytest.raises(ValueError): + df.plot(yerr=np.random.randn(11)) + + df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12}) + with pytest.raises((ValueError, TypeError)): + df.plot(yerr=df_err) + + @pytest.mark.slow + @pytest.mark.parametrize("kind", ["line", "bar", "barh"]) + def test_errorbar_plot_different_kinds(self, kind): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + + ax = _check_plot_works(df.plot, yerr=df_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + ax = _check_plot_works(df.plot, yerr=df_err["x"], xerr=df_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared + # Similar warnings were observed in GH #13188 + axes = _check_plot_works( + df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind + ) + self._check_has_errorbars(axes, xerr=1, yerr=1) + + @pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError) + @pytest.mark.slow + def test_errorbar_plot_iterator(self): + with warnings.catch_warnings(): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + + # yerr is iterator + ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df))) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + @pytest.mark.slow + def test_errorbar_with_integer_column_names(self): + # test with integer column names + df = DataFrame(np.random.randn(10, 2)) + df_err = DataFrame(np.random.randn(10, 2)) + ax = _check_plot_works(df.plot, yerr=df_err) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, y=0, yerr=1) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + @pytest.mark.slow + def test_errorbar_with_partial_columns(self): + df = DataFrame(np.random.randn(10, 3)) + df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2]) + kinds = ["line", "bar"] + for kind in kinds: + ax = _check_plot_works(df.plot, yerr=df_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ix = date_range("1/1/2000", periods=10, freq="M") + df.set_index(ix, inplace=True) + df_err.set_index(ix, inplace=True) + ax = _check_plot_works(df.plot, yerr=df_err, kind="line") + self._check_has_errorbars(ax, xerr=0, yerr=2) + + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + for err in [d_err, df_err]: + ax = _check_plot_works(df.plot, yerr=err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + @pytest.mark.slow + @pytest.mark.parametrize("kind", ["line", "bar", "barh"]) + def test_errorbar_timeseries(self, kind): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + + # check time-series plots + ix = date_range("1/1/2000", "1/1/2001", freq="M") + tdf = DataFrame(d, index=ix) + tdf_err = DataFrame(d_err, index=ix) + + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ax = _check_plot_works(tdf.plot, y="y", yerr=tdf_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + ax = _check_plot_works(tdf.plot, y="y", yerr="x", kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared + # Similar warnings were observed in GH #13188 + axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) + self._check_has_errorbars(axes, xerr=0, yerr=1) + + def test_errorbar_asymmetrical(self): + + np.random.seed(0) + err = np.random.rand(3, 2, 5) + + # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]... + df = DataFrame(np.arange(15).reshape(3, 5)).T + + ax = df.plot(yerr=err, xerr=err / 2) + + yerr_0_0 = ax.collections[1].get_paths()[0].vertices[:, 1] + expected_0_0 = err[0, :, 0] * np.array([-1, 1]) + tm.assert_almost_equal(yerr_0_0, expected_0_0) + + with pytest.raises(ValueError): + df.plot(yerr=err.T) + + tm.close() + + def test_table(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + _check_plot_works(df.plot, table=True) + _check_plot_works(df.plot, table=df) + + # GH 35945 UserWarning + with tm.assert_produces_warning(None): + ax = df.plot() + assert len(ax.tables) == 0 + plotting.table(ax, df.T) + assert len(ax.tables) == 1 + + def test_errorbar_scatter(self): + df = DataFrame(np.random.randn(5, 2), index=range(5), columns=["x", "y"]) + df_err = DataFrame( + np.random.randn(5, 2) / 5, index=range(5), columns=["x", "y"] + ) + + ax = _check_plot_works(df.plot.scatter, x="x", y="y") + self._check_has_errorbars(ax, xerr=0, yerr=0) + ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err) + self._check_has_errorbars(ax, xerr=1, yerr=0) + + ax = _check_plot_works(df.plot.scatter, x="x", y="y", yerr=df_err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err, yerr=df_err) + self._check_has_errorbars(ax, xerr=1, yerr=1) + + def _check_errorbar_color(containers, expected, has_err="has_xerr"): + lines = [] + errs = [c.lines for c in ax.containers if getattr(c, has_err, False)][0] + for el in errs: + if is_list_like(el): + lines.extend(el) + else: + lines.append(el) + err_lines = [x for x in lines if x in ax.collections] + self._check_colors( + err_lines, linecolors=np.array([expected] * len(err_lines)) + ) + + # GH 8081 + df = DataFrame(np.random.randn(10, 5), columns=["a", "b", "c", "d", "e"]) + ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red") + self._check_has_errorbars(ax, xerr=1, yerr=1) + _check_errorbar_color(ax.containers, "red", has_err="has_xerr") + _check_errorbar_color(ax.containers, "red", has_err="has_yerr") + + ax = df.plot.scatter(x="a", y="b", yerr="e", color="green") + self._check_has_errorbars(ax, xerr=0, yerr=1) + _check_errorbar_color(ax.containers, "green", has_err="has_yerr") + + @pytest.mark.slow + def test_sharex_and_ax(self): + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed + import matplotlib.pyplot as plt + + plt.close("all") + gs, axes = _generate_4_axes_via_gridspec() + + df = DataFrame( + { + "a": [1, 2, 3, 4, 5, 6], + "b": [1, 2, 3, 4, 5, 6], + "c": [1, 2, 3, 4, 5, 6], + "d": [1, 2, 3, 4, 5, 6], + } + ) + + def _check(axes): + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[0], axes[2]]: + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + for ax in [axes[1], axes[3]]: + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax, sharex=True) + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + # without sharex, no labels should be touched! + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax) + + gs.tight_layout(plt.gcf()) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + @pytest.mark.slow + def test_sharey_and_ax(self): + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed + import matplotlib.pyplot as plt + + gs, axes = _generate_4_axes_via_gridspec() + + df = DataFrame( + { + "a": [1, 2, 3, 4, 5, 6], + "b": [1, 2, 3, 4, 5, 6], + "c": [1, 2, 3, 4, 5, 6], + "d": [1, 2, 3, 4, 5, 6], + } + ) + + def _check(axes): + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + for ax in [axes[0], axes[1]]: + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[2], axes[3]]: + self._check_visible(ax.get_yticklabels(), visible=False) + + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax, sharey=True) + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharey=True) + + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + # without sharex, no labels should be touched! + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax) + + gs.tight_layout(plt.gcf()) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + + @td.skip_if_no_scipy + def test_memory_leak(self): + """ Check that every plot type gets properly collected. """ + import gc + import weakref + + results = {} + for kind in plotting.PlotAccessor._all_kinds: + + args = {} + if kind in ["hexbin", "scatter", "pie"]: + df = self.hexbin_df + args = {"x": "A", "y": "B"} + elif kind == "area": + df = self.tdf.abs() + else: + df = self.tdf + + # Use a weakref so we can see if the object gets collected without + # also preventing it from being collected + results[kind] = weakref.proxy(df.plot(kind=kind, **args)) + + # have matplotlib delete all the figures + tm.close() + # force a garbage collection + gc.collect() + for key in results: + # check that every plot was collected + with pytest.raises(ReferenceError): + # need to actually access something to get an error + results[key].lines + + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + @pytest.mark.slow + def test_df_gridspec_patterns(self): + # GH 10819 + import matplotlib.gridspec as gridspec + import matplotlib.pyplot as plt + + ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) + + df = DataFrame(np.random.randn(10, 2), index=ts.index, columns=list("AB")) + + def _get_vertical_grid(): + gs = gridspec.GridSpec(3, 1) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:2, :]) + ax2 = fig.add_subplot(gs[2, :]) + return ax1, ax2 + + def _get_horizontal_grid(): + gs = gridspec.GridSpec(1, 3) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:, :2]) + ax2 = fig.add_subplot(gs[:, 2]) + return ax1, ax2 + + for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: + ax1 = ts.plot(ax=ax1) + assert len(ax1.lines) == 1 + ax2 = df.plot(ax=ax2) + assert len(ax2.lines) == 2 + for ax in [ax1, ax2]: + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # subplots=True + for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: + axes = df.plot(subplots=True, ax=[ax1, ax2]) + assert len(ax1.lines) == 1 + assert len(ax2.lines) == 1 + for ax in axes: + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # vertical / subplots / sharex=True / sharey=True + ax1, ax2 = _get_vertical_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 + for ax in [ax1, ax2]: + # yaxis are visible because there is only one column + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of axes0 (top) are hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # horizontal / subplots / sharex=True / sharey=True + ax1, ax2 = _get_horizontal_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 + self._check_visible(axes[0].get_yticklabels(), visible=True) + # yaxis of axes1 (right) are hidden + self._check_visible(axes[1].get_yticklabels(), visible=False) + for ax in [ax1, ax2]: + # xaxis are visible because there is only one column + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # boxed + def _get_boxed_grid(): + gs = gridspec.GridSpec(3, 3) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:2, :2]) + ax2 = fig.add_subplot(gs[:2, 2]) + ax3 = fig.add_subplot(gs[2, :2]) + ax4 = fig.add_subplot(gs[2, 2]) + return ax1, ax2, ax3, ax4 + + axes = _get_boxed_grid() + df = DataFrame(np.random.randn(10, 4), index=ts.index, columns=list("ABCD")) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + # axis are visible because these are not shared + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # subplots / sharex=True / sharey=True + axes = _get_boxed_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True) + for ax in axes: + assert len(ax.lines) == 1 + for ax in [axes[0], axes[2]]: # left column + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[1], axes[3]]: # right column + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in [axes[0], axes[1]]: # top row + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + for ax in [axes[2], axes[3]]: # bottom row + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + @pytest.mark.slow + def test_df_grid_settings(self): + # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 + self._check_grid_settings( + DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}), + plotting.PlotAccessor._dataframe_kinds, + kws={"x": "a", "y": "b"}, + ) + + def test_invalid_colormap(self): + df = DataFrame(randn(3, 2), columns=["A", "B"]) + + with pytest.raises(ValueError): + df.plot(colormap="invalid_colormap") + + def test_plain_axes(self): + + # supplied ax itself is a SubplotAxes, but figure contains also + # a plain Axes object (GH11556) + fig, ax = self.plt.subplots() + fig.add_axes([0.2, 0.2, 0.2, 0.2]) + Series(rand(10)).plot(ax=ax) + + # supplied ax itself is a plain Axes, but because the cmap keyword + # a new ax is created for the colorbar -> also multiples axes (GH11520) + df = DataFrame({"a": randn(8), "b": randn(8)}) + fig = self.plt.figure() + ax = fig.add_axes((0, 0, 1, 1)) + df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") + + # other examples + fig, ax = self.plt.subplots() + from mpl_toolkits.axes_grid1 import make_axes_locatable + + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=cax) + + fig, ax = self.plt.subplots() + from mpl_toolkits.axes_grid1.inset_locator import inset_axes + + iax = inset_axes(ax, width="30%", height=1.0, loc=3) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=iax) + + def test_passed_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + colormap = mpl.colors.ListedColormap(color_tuples) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_rcParams_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + @pytest.mark.parametrize("method", ["line", "barh", "bar"]) + def test_secondary_axis_font_size(self, method): + # GH: 12565 + df = ( + pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) + .assign(C=lambda df: df.B.cumsum()) + .assign(D=lambda df: df.C * 1.1) + ) + + fontsize = 20 + sy = ["C", "D"] + + kwargs = dict(secondary_y=sy, fontsize=fontsize, mark_right=True) + ax = getattr(df.plot, method)(**kwargs) + self._check_ticks_props(axes=ax.right_ax, ylabelsize=fontsize) + + @pytest.mark.slow + def test_x_string_values_ticks(self): + # Test if string plot index have a fixed xtick position + # GH: 7612, GH: 22334 + df = pd.DataFrame( + { + "sales": [3, 2, 3], + "visits": [20, 42, 28], + "day": ["Monday", "Tuesday", "Wednesday"], + } + ) + ax = df.plot.area(x="day") + ax.set_xlim(-1, 3) + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + labels_position = dict(zip(xticklabels, ax.get_xticks())) + # Testing if the label stayed at the right position + assert labels_position["Monday"] == 0.0 + assert labels_position["Tuesday"] == 1.0 + assert labels_position["Wednesday"] == 2.0 + + @pytest.mark.slow + def test_x_multiindex_values_ticks(self): + # Test if multiindex plot index have a fixed xtick position + # GH: 15912 + index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) + df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + ax = df.plot() + ax.set_xlim(-1, 4) + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + labels_position = dict(zip(xticklabels, ax.get_xticks())) + # Testing if the label stayed at the right position + assert labels_position["(2012, 1)"] == 0.0 + assert labels_position["(2012, 2)"] == 1.0 + assert labels_position["(2013, 1)"] == 2.0 + assert labels_position["(2013, 2)"] == 3.0 + + @pytest.mark.parametrize("kind", ["line", "area"]) + def test_xlim_plot_line(self, kind): + # test if xlim is set correctly in plot.line and plot.area + # GH 27686 + df = pd.DataFrame([2, 4], index=[1, 2]) + ax = df.plot(kind=kind) + xlims = ax.get_xlim() + assert xlims[0] < 1 + assert xlims[1] > 2 + + def test_xlim_plot_line_correctly_in_mixed_plot_type(self): + # test if xlim is set correctly when ax contains multiple different kinds + # of plots, GH 27686 + fig, ax = self.plt.subplots() + + indexes = ["k1", "k2", "k3", "k4"] + df = pd.DataFrame( + { + "s1": [1000, 2000, 1500, 2000], + "s2": [900, 1400, 2000, 3000], + "s3": [1500, 1500, 1600, 1200], + "secondary_y": [1, 3, 4, 3], + }, + index=indexes, + ) + df[["s1", "s2", "s3"]].plot.bar(ax=ax, stacked=False) + df[["secondary_y"]].plot(ax=ax, secondary_y=True) + + xlims = ax.get_xlim() + assert xlims[0] < 0 + assert xlims[1] > 3 + + # make sure axis labels are plotted correctly as well + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + assert xticklabels == indexes + + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = pd.DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + + def test_plot_no_rows(self): + # GH 27758 + df = pd.DataFrame(columns=["foo"], dtype=int) + assert df.empty + ax = df.plot() + assert len(ax.get_lines()) == 1 + line = ax.get_lines()[0] + assert len(line.get_xdata()) == 0 + assert len(line.get_ydata()) == 0 + + def test_plot_no_numeric_data(self): + df = pd.DataFrame(["a", "b", "c"]) + with pytest.raises(TypeError): + df.plot() + + def test_missing_markers_legend(self): + # 14958 + df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + ax = df.plot(y=["A"], marker="x", linestyle="solid") + df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) + df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) + + self._check_legend_labels(ax, labels=["A", "B", "C"]) + self._check_legend_marker(ax, expected_markers=["x", "o", "<"]) + + def test_missing_markers_legend_using_style(self): + # 14563 + df = pd.DataFrame( + { + "A": [1, 2, 3, 4, 5, 6], + "B": [2, 4, 1, 3, 2, 4], + "C": [3, 3, 2, 6, 4, 2], + "X": [1, 2, 3, 4, 5, 6], + } + ) + + fig, ax = self.plt.subplots() + for kind in "ABC": + df.plot("X", kind, label=kind, ax=ax, style=".") + + self._check_legend_labels(ax, labels=["A", "B", "C"]) + self._check_legend_marker(ax, expected_markers=[".", ".", "."]) + + def test_colors_of_columns_with_same_name(self): + # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 + # Creating a DataFrame with duplicate column labels and testing colors of them. + df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = pd.DataFrame({"a": [2, 4, 6]}) + df_concat = pd.concat([df, df1], axis=1) + result = df_concat.plot() + for legend, line in zip(result.get_legend().legendHandles, result.lines): + assert legend.get_color() == line.get_color() + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [ + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), + ], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_single_plot( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + ax = df.plot(kind=kind) + assert ax.get_xlabel() == old_label + assert ax.get_ylabel() == "" + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label) + assert ax.get_ylabel() == str(new_label) + assert ax.get_xlabel() == str(new_label) + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [ + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), + ], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) + + +def _generate_4_axes_via_gridspec(): + import matplotlib as mpl + import matplotlib.gridspec + import matplotlib.pyplot as plt + + gs = mpl.gridspec.GridSpec(2, 2) + ax_tl = plt.subplot(gs[0, 0]) + ax_ll = plt.subplot(gs[1, 0]) + ax_tr = plt.subplot(gs[0, 1]) + ax_lr = plt.subplot(gs[1, 1]) + + return gs, [ax_tl, ax_ll, ax_tr, ax_lr] From 8f6cde0e38431621efa70f0e66bd1d0b0ebf7dfe Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:17:53 +0300 Subject: [PATCH 003/147] Transfer tests of test_frame.py to test_frame_color.py --- pandas/tests/plotting/frame/test_frame.py | 582 ---- .../tests/plotting/frame/test_frame_color.py | 2917 +---------------- 2 files changed, 29 insertions(+), 3470 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index d4d2256d209cf..4d339b93fd30d 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -168,74 +168,6 @@ def test_integer_array_plot(self): _check_plot_works(df.plot.scatter, x="x", y="y") _check_plot_works(df.plot.hexbin, x="x", y="y") - def test_mpl2_color_cycle_str(self): - # GH 15516 - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) - colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always", "MatplotlibDeprecationWarning") - - for color in colors: - _check_plot_works(df.plot, color=color) - - # if warning is raised, check that it is the exact problematic one - # GH 36972 - if w: - match = "Support for uppercase single-letter colors is deprecated" - warning_message = str(w[0].message) - msg = "MatplotlibDeprecationWarning related to CN colors was raised" - assert match not in warning_message, msg - - def test_color_single_series_list(self): - # GH 3486 - df = DataFrame({"A": [1, 2, 3]}) - _check_plot_works(df.plot, color=["red"]) - - def test_rgb_tuple_color(self): - # GH 16695 - df = DataFrame({"x": [1, 2], "y": [3, 4]}) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) - - def test_color_empty_string(self): - df = DataFrame(randn(10, 2)) - with pytest.raises(ValueError): - df.plot(color="") - - def test_color_and_style_arguments(self): - df = DataFrame({"x": [1, 2], "y": [3, 4]}) - # passing both 'color' and 'style' arguments should be allowed - # if there is no color symbol in the style strings: - ax = df.plot(color=["red", "black"], style=["-", "--"]) - # check that the linestyles are correctly set: - linestyle = [line.get_linestyle() for line in ax.lines] - assert linestyle == ["-", "--"] - # check that the colors are correctly set: - color = [line.get_color() for line in ax.lines] - assert color == ["red", "black"] - # passing both 'color' and 'style' arguments should not be allowed - # if there is a color symbol in the style strings: - with pytest.raises(ValueError): - df.plot(color=["red", "black"], style=["k-", "r--"]) - - @pytest.mark.parametrize( - "color, expected", - [ - ("green", ["green"] * 4), - (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]), - ], - ) - def test_color_and_marker(self, color, expected): - # GH 21003 - df = DataFrame(np.random.random((7, 4))) - ax = df.plot(color=color, style="d--") - # check colors - result = [i.get_color() for i in ax.lines] - assert result == expected - # check markers and linestyles - assert all(i.get_linestyle() == "--" for i in ax.lines) - assert all(i.get_marker() == "d" for i in ax.lines) - def test_nonnumeric_exclude(self): df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}) ax = df.plot() @@ -948,60 +880,6 @@ def test_area_lim(self): ymin, ymax = ax.get_ylim() assert ymax == 0 - @pytest.mark.slow - def test_bar_colors(self): - import matplotlib.pyplot as plt - - default_colors = self._unpack_cycler(plt.rcParams) - - df = DataFrame(randn(5, 5)) - ax = df.plot.bar() - self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) - tm.close() - - custom_colors = "rgcby" - ax = df.plot.bar(color=custom_colors) - self._check_colors(ax.patches[::5], facecolors=custom_colors) - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - ax = df.plot.bar(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::5], facecolors=rgba_colors) - tm.close() - - # Test colormap functionality - ax = df.plot.bar(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::5], facecolors=rgba_colors) - tm.close() - - ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") - self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) - tm.close() - - ax = df.plot(kind="bar", color="green") - self._check_colors(ax.patches[::5], facecolors=["green"] * 5) - tm.close() - - def test_bar_user_colors(self): - df = pd.DataFrame( - {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} - ) - # This should *only* work when `y` is specified, else - # we use one color per column - ax = df.plot.bar(y="A", color=df["color"]) - result = [p.get_facecolor() for p in ax.patches] - expected = [ - (1.0, 0.0, 0.0, 1.0), - (0.0, 0.0, 1.0, 1.0), - (0.0, 0.0, 1.0, 1.0), - (1.0, 0.0, 0.0, 1.0), - ] - assert result == expected - @pytest.mark.slow def test_bar_linewidth(self): df = DataFrame(randn(5, 5)) @@ -1226,28 +1104,6 @@ def test_scatterplot_object_data(self): _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow - def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): - # addressing issue #10611, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax1 = df.plot.scatter(x="A label", y="B label") - ax2 = df.plot.scatter(x="A label", y="B label", c="C label") - - vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] - vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] - assert vis1 == vis2 - - vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] - vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] - assert vis1 == vis2 - - assert ( - ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() - ) @pytest.mark.slow def test_if_hexbin_xaxis_label_is_visible(self): @@ -1262,24 +1118,6 @@ def test_if_hexbin_xaxis_label_is_visible(self): assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) assert ax.xaxis.get_label().get_visible() - @pytest.mark.slow - def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): - import matplotlib.pyplot as plt - - random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) - - fig, axes = plt.subplots(1, 2) - df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) - df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) - plt.tight_layout() - - points = np.array([ax.get_position().get_points() for ax in fig.axes]) - axes_x_coords = points[:, :, 0] - parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] - colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] - assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() - @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): @@ -1342,17 +1180,6 @@ def test_plot_scatter_with_c(self): float_array = np.array([0.0, 1.0]) df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") - @pytest.mark.parametrize("cmap", [None, "Greys"]) - def test_scatter_with_c_column_name_with_colors(self, cmap): - # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( - [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], - columns=["length", "width"], - ) - df["species"] = ["r", "r", "g", "g", "b"] - ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) - assert ax.collections[0].colorbar is None - def test_plot_scatter_with_s(self): # this refers to GH 32904 df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) @@ -2077,383 +1904,6 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" - @pytest.mark.slow - def test_line_colors(self): - from matplotlib import cm - - custom_colors = "rgcby" - df = DataFrame(randn(5, 5)) - - ax = df.plot(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - - tm.close() - - ax2 = df.plot(color=custom_colors) - lines2 = ax2.get_lines() - - for l1, l2 in zip(ax.get_lines(), lines2): - assert l1.get_color() == l2.get_color() - - tm.close() - - ax = df.plot(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - ax = df.plot(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - ax = df.loc[:, [0]].plot(color="DodgerBlue") - self._check_colors(ax.lines, linecolors=["DodgerBlue"]) - - ax = df.plot(color="red") - self._check_colors(ax.get_lines(), linecolors=["red"] * 5) - tm.close() - - # GH 10299 - custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] - ax = df.plot(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - tm.close() - - @pytest.mark.slow - def test_dont_modify_colors(self): - colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) - assert len(colors) == 3 - - @pytest.mark.slow - def test_line_colors_and_styles_subplots(self): - # GH 9894 - from matplotlib import cm - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(randn(5, 5)) - - axes = df.plot(subplots=True) - for ax, c in zip(axes, list(default_colors)): - c = [c] - self._check_colors(ax.get_lines(), linecolors=c) - tm.close() - - # single color char - axes = df.plot(subplots=True, color="k") - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["k"]) - tm.close() - - # single color str - axes = df.plot(subplots=True, color="green") - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["green"]) - tm.close() - - custom_colors = "rgcby" - axes = df.plot(color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - axes = df.plot(color=list(custom_colors), subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # GH 10299 - custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] - axes = df.plot(color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - for cmap in ["jet", cm.jet]: - axes = df.plot(colormap=cmap, subplots=True) - for ax, c in zip(axes, rgba_colors): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) - self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) - - # single character style - axes = df.plot(style="r", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["r"]) - tm.close() - - # list of styles - styles = list("rgcby") - axes = df.plot(style=styles, subplots=True) - for ax, c in zip(axes, styles): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - @pytest.mark.slow - def test_area_colors(self): - from matplotlib import cm - from matplotlib.collections import PolyCollection - - custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) - - ax = df.plot.area(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - self._check_colors(poly, facecolors=custom_colors) - - handles, labels = ax.get_legend_handles_labels() - self._check_colors(handles, facecolors=custom_colors) - - for h in handles: - assert h.get_alpha() is None - tm.close() - - ax = df.plot.area(colormap="jet") - jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=jet_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - self._check_colors(poly, facecolors=jet_colors) - - handles, labels = ax.get_legend_handles_labels() - self._check_colors(handles, facecolors=jet_colors) - for h in handles: - assert h.get_alpha() is None - tm.close() - - # When stacked=False, alpha is set to 0.5 - ax = df.plot.area(colormap=cm.jet, stacked=False) - self._check_colors(ax.get_lines(), linecolors=jet_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] - self._check_colors(poly, facecolors=jet_with_alpha) - - handles, labels = ax.get_legend_handles_labels() - linecolors = jet_with_alpha - self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) - for h in handles: - assert h.get_alpha() == 0.5 - - @pytest.mark.slow - def test_hist_colors(self): - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(randn(5, 5)) - ax = df.plot.hist() - self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) - tm.close() - - custom_colors = "rgcby" - ax = df.plot.hist(color=custom_colors) - self._check_colors(ax.patches[::10], facecolors=custom_colors) - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - ax = df.plot.hist(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::10], facecolors=rgba_colors) - tm.close() - - # Test colormap functionality - ax = df.plot.hist(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::10], facecolors=rgba_colors) - tm.close() - - ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") - self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) - - ax = df.plot(kind="hist", color="green") - self._check_colors(ax.patches[::10], facecolors=["green"] * 5) - tm.close() - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_colors(self): - from matplotlib import cm - - custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) - - ax = df.plot.kde(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - tm.close() - - ax = df.plot.kde(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - ax = df.plot.kde(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_colors_and_styles_subplots(self): - from matplotlib import cm - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(randn(5, 5)) - - axes = df.plot(kind="kde", subplots=True) - for ax, c in zip(axes, list(default_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # single color char - axes = df.plot(kind="kde", color="k", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["k"]) - tm.close() - - # single color str - axes = df.plot(kind="kde", color="red", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["red"]) - tm.close() - - custom_colors = "rgcby" - axes = df.plot(kind="kde", color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - for cmap in ["jet", cm.jet]: - axes = df.plot(kind="kde", colormap=cmap, subplots=True) - for ax, c in zip(axes, rgba_colors): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) - self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) - - # single character style - axes = df.plot(kind="kde", style="r", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["r"]) - tm.close() - - # list of styles - styles = list("rgcby") - axes = df.plot(kind="kde", style=styles, subplots=True) - for ax, c in zip(axes, styles): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - @pytest.mark.slow - def test_boxplot_colors(self): - def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): - # TODO: outside this func? - if fliers_c is None: - fliers_c = "k" - self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) - self._check_colors( - bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) - ) - self._check_colors( - bp["medians"], linecolors=[medians_c] * len(bp["medians"]) - ) - self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) - self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(randn(5, 5)) - bp = df.plot.box(return_type="dict") - _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) - tm.close() - - dict_colors = dict( - boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456" - ) - bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") - _check_colors( - bp, - dict_colors["boxes"], - dict_colors["whiskers"], - dict_colors["medians"], - dict_colors["caps"], - "r", - ) - tm.close() - - # partial colors - dict_colors = dict(whiskers="c", medians="m") - bp = df.plot.box(color=dict_colors, return_type="dict") - _check_colors(bp, default_colors[0], "c", "m") - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - bp = df.plot.box(colormap="jet", return_type="dict") - jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] - _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) - tm.close() - - # Test colormap functionality - bp = df.plot.box(colormap=cm.jet, return_type="dict") - _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) - tm.close() - - # string color is applied to all artists except fliers - bp = df.plot.box(color="DodgerBlue", return_type="dict") - _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") - - # tuple is also applied to all artists except fliers - bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") - _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") - - with pytest.raises(ValueError): - # Color contains invalid key results in ValueError - df.plot.box(color=dict(boxes="red", xxxx="blue")) - - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - - def test_default_color_cycle(self): - import cycler - import matplotlib.pyplot as plt - - colors = list("rgbk") - plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - - df = DataFrame(randn(5, 3)) - ax = df.plot() - - expected = self._unpack_cycler(plt.rcParams)[:3] - self._check_colors(ax.get_lines(), linecolors=expected) - def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -3211,12 +2661,6 @@ def test_df_grid_settings(self): kws={"x": "a", "y": "b"}, ) - def test_invalid_colormap(self): - df = DataFrame(randn(3, 2), columns=["A", "B"]) - - with pytest.raises(ValueError): - df.plot(colormap="invalid_colormap") - def test_plain_axes(self): # supplied ax itself is a SubplotAxes, but figure contains also @@ -3248,22 +2692,6 @@ def test_plain_axes(self): Series(rand(10)).plot(ax=ax) Series(rand(10)).plot(ax=iax) - def test_passed_bar_colors(self): - import matplotlib as mpl - - color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) - assert color_tuples == [c.get_facecolor() for c in barplot.patches] - - def test_rcParams_bar_colors(self): - import matplotlib as mpl - - color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") - assert color_tuples == [c.get_facecolor() for c in barplot.patches] - @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 @@ -3411,16 +2839,6 @@ def test_missing_markers_legend_using_style(self): self._check_legend_labels(ax, labels=["A", "B", "C"]) self._check_legend_marker(ax, expected_markers=[".", ".", "."]) - def test_colors_of_columns_with_same_name(self): - # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 - # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) - df_concat = pd.concat([df, df1], axis=1) - result = df_concat.plot() - for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() - @pytest.mark.parametrize( "index_name, old_label, new_label", [ diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index d4d2256d209cf..41c6578743bbf 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -24,7 +24,7 @@ @td.skip_if_no_mpl -class TestDataFramePlots(TestPlotBase): +class TestDataFrameColor(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl @@ -48,125 +48,6 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - @pytest.mark.slow - def test_plot(self): - from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0 - - df = self.tdf - _check_plot_works(df.plot, grid=False) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, subplots=True) - self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) - - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) - - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, subplots=True, use_index=False) - self._check_ticks_props(axes, xrot=0) - self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) - - df = DataFrame({"x": [1, 2], "y": [3, 4]}) - if mpl_ge_3_1_0(): - msg = "'Line2D' object has no property 'blarg'" - else: - msg = "Unknown property blarg" - with pytest.raises(AttributeError, match=msg): - df.plot.line(blarg=True) - - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - ax = _check_plot_works(df.plot, use_index=True) - self._check_ticks_props(ax, xrot=0) - _check_plot_works(df.plot, sort_columns=False) - _check_plot_works(df.plot, yticks=[1, 5, 10]) - _check_plot_works(df.plot, xticks=[1, 5, 10]) - _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) - - with tm.assert_produces_warning(UserWarning): - _check_plot_works(df.plot, subplots=True, title="blah") - - # We have to redo it here because _check_plot_works does two plots, - # once without an ax kwarg and once with an ax kwarg and the new sharex - # behaviour does not remove the visibility of the latter axis (as ax is - # present). see: https://github.com/pandas-dev/pandas/issues/9737 - - axes = df.plot(subplots=True, title="blah") - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - # axes[0].figure.savefig("test.png") - for ax in axes[:2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible([ax.xaxis.get_label()], visible=False) - for ax in [axes[2]]: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible([ax.xaxis.get_label()]) - self._check_ticks_props(ax, xrot=0) - - _check_plot_works(df.plot, title="blah") - - tuples = zip(string.ascii_letters[:10], range(10)) - df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) - ax = _check_plot_works(df.plot, use_index=True) - self._check_ticks_props(ax, xrot=0) - - # unicode - index = MultiIndex.from_tuples( - [ - ("\u03b1", 0), - ("\u03b1", 1), - ("\u03b2", 2), - ("\u03b2", 3), - ("\u03b3", 4), - ("\u03b3", 5), - ("\u03b4", 6), - ("\u03b4", 7), - ], - names=["i0", "i1"], - ) - columns = MultiIndex.from_tuples( - [("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"] - ) - df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) - _check_plot_works(df.plot, title="\u03A3") - - # GH 6951 - # Test with single column - df = DataFrame({"x": np.random.rand(10)}) - axes = _check_plot_works(df.plot.bar, subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - - axes = _check_plot_works(df.plot.bar, subplots=True, layout=(-1, 1)) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - # When ax is supplied and required number of axes is 1, - # passed ax should be used: - fig, ax = self.plt.subplots() - axes = df.plot.bar(subplots=True, ax=ax) - assert len(axes) == 1 - result = ax.axes - assert result is axes[0] - - def test_integer_array_plot(self): - # GH 25587 - arr = integer_array([1, 2, 3, 4], dtype="UInt32") - - s = Series(arr) - _check_plot_works(s.plot.line) - _check_plot_works(s.plot.bar) - _check_plot_works(s.plot.hist) - _check_plot_works(s.plot.pie) - - df = DataFrame({"x": arr, "y": arr}) - _check_plot_works(df.plot.line) - _check_plot_works(df.plot.bar) - _check_plot_works(df.plot.hist) - _check_plot_works(df.plot.pie, y="y") - _check_plot_works(df.plot.scatter, x="x", y="y") - _check_plot_works(df.plot.hexbin, x="x", y="y") def test_mpl2_color_cycle_str(self): # GH 15516 @@ -236,718 +117,6 @@ def test_color_and_marker(self, color, expected): assert all(i.get_linestyle() == "--" for i in ax.lines) assert all(i.get_marker() == "d" for i in ax.lines) - def test_nonnumeric_exclude(self): - df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}) - ax = df.plot() - assert len(ax.get_lines()) == 1 # B was plotted - - @pytest.mark.slow - def test_implicit_label(self): - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) - ax = df.plot(x="a", y="b") - self._check_text_labels(ax.xaxis.get_label(), "a") - - @pytest.mark.slow - def test_donot_overwrite_index_name(self): - # GH 8494 - df = DataFrame(randn(2, 2), columns=["a", "b"]) - df.index.name = "NAME" - df.plot(y="b", label="LABEL") - assert df.index.name == "NAME" - - @pytest.mark.slow - def test_plot_xy(self): - # columns.inferred_type == 'string' - df = self.tdf - self._check_data(df.plot(x=0, y=1), df.set_index("A")["B"].plot()) - self._check_data(df.plot(x=0), df.set_index("A").plot()) - self._check_data(df.plot(y=0), df.B.plot()) - self._check_data(df.plot(x="A", y="B"), df.set_index("A").B.plot()) - self._check_data(df.plot(x="A"), df.set_index("A").plot()) - self._check_data(df.plot(y="B"), df.B.plot()) - - # columns.inferred_type == 'integer' - df.columns = np.arange(1, len(df.columns) + 1) - self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) - self._check_data(df.plot(x=1), df.set_index(1).plot()) - self._check_data(df.plot(y=1), df[1].plot()) - - # figsize and title - ax = df.plot(x=1, y=2, title="Test", figsize=(16, 8)) - self._check_text_labels(ax.title, "Test") - self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16.0, 8.0)) - - # columns.inferred_type == 'mixed' - # TODO add MultiIndex test - - @pytest.mark.slow - @pytest.mark.parametrize( - "input_log, expected_log", [(True, "log"), ("sym", "symlog")] - ) - def test_logscales(self, input_log, expected_log): - df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) - - ax = df.plot(logy=input_log) - self._check_ax_scales(ax, yaxis=expected_log) - assert ax.get_yscale() == expected_log - - ax = df.plot(logx=input_log) - self._check_ax_scales(ax, xaxis=expected_log) - assert ax.get_xscale() == expected_log - - ax = df.plot(loglog=input_log) - self._check_ax_scales(ax, xaxis=expected_log, yaxis=expected_log) - assert ax.get_xscale() == expected_log - assert ax.get_yscale() == expected_log - - @pytest.mark.parametrize("input_param", ["logx", "logy", "loglog"]) - def test_invalid_logscale(self, input_param): - # GH: 24867 - df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) - - msg = "Boolean, None and 'sym' are valid options, 'sm' is given." - with pytest.raises(ValueError, match=msg): - df.plot(**{input_param: "sm"}) - - @pytest.mark.slow - def test_xcompat(self): - import pandas as pd - - df = self.tdf - ax = df.plot(x_compat=True) - lines = ax.get_lines() - assert not isinstance(lines[0].get_xdata(), PeriodIndex) - self._check_ticks_props(ax, xrot=30) - - tm.close() - pd.plotting.plot_params["xaxis.compat"] = True - ax = df.plot() - lines = ax.get_lines() - assert not isinstance(lines[0].get_xdata(), PeriodIndex) - self._check_ticks_props(ax, xrot=30) - - tm.close() - pd.plotting.plot_params["x_compat"] = False - - ax = df.plot() - lines = ax.get_lines() - assert not isinstance(lines[0].get_xdata(), PeriodIndex) - assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) - - tm.close() - # useful if you're plotting a bunch together - with pd.plotting.plot_params.use("x_compat", True): - ax = df.plot() - lines = ax.get_lines() - assert not isinstance(lines[0].get_xdata(), PeriodIndex) - self._check_ticks_props(ax, xrot=30) - - tm.close() - ax = df.plot() - lines = ax.get_lines() - assert not isinstance(lines[0].get_xdata(), PeriodIndex) - assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) - self._check_ticks_props(ax, xrot=0) - - def test_period_compat(self): - # GH 9012 - # period-array conversions - df = DataFrame( - np.random.rand(21, 2), - index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)), - columns=["a", "b"], - ) - - df.plot() - self.plt.axhline(y=0) - tm.close() - - def test_unsorted_index(self): - df = DataFrame( - {"y": np.arange(100)}, index=np.arange(99, -1, -1), dtype=np.int64 - ) - ax = df.plot() - lines = ax.get_lines()[0] - rs = lines.get_xydata() - rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") - tm.assert_series_equal(rs, df.y, check_index_type=False) - tm.close() - - df.index = pd.Index(np.arange(99, -1, -1), dtype=np.float64) - ax = df.plot() - lines = ax.get_lines()[0] - rs = lines.get_xydata() - rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") - tm.assert_series_equal(rs, df.y) - - def test_unsorted_index_lims(self): - df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0]}, index=[1.0, 0.0, 3.0, 2.0]) - ax = df.plot() - xmin, xmax = ax.get_xlim() - lines = ax.get_lines() - assert xmin <= np.nanmin(lines[0].get_data()[0]) - assert xmax >= np.nanmax(lines[0].get_data()[0]) - - df = DataFrame( - {"y": [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0]}, - index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0], - ) - ax = df.plot() - xmin, xmax = ax.get_xlim() - lines = ax.get_lines() - assert xmin <= np.nanmin(lines[0].get_data()[0]) - assert xmax >= np.nanmax(lines[0].get_data()[0]) - - df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0], "z": [91.0, 90.0, 93.0, 92.0]}) - ax = df.plot(x="z", y="y") - xmin, xmax = ax.get_xlim() - lines = ax.get_lines() - assert xmin <= np.nanmin(lines[0].get_data()[0]) - assert xmax >= np.nanmax(lines[0].get_data()[0]) - - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = pd.DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 - - def test_negative_log(self): - df = -DataFrame( - rand(6, 4), - index=list(string.ascii_letters[:6]), - columns=["x", "y", "z", "four"], - ) - - with pytest.raises(ValueError): - df.plot.area(logy=True) - with pytest.raises(ValueError): - df.plot.area(loglog=True) - - def _compare_stacked_y_cood(self, normal_lines, stacked_lines): - base = np.zeros(len(normal_lines[0].get_data()[1])) - for nl, sl in zip(normal_lines, stacked_lines): - base += nl.get_data()[1] # get y coordinates - sy = sl.get_data()[1] - tm.assert_numpy_array_equal(base, sy) - - def test_line_area_stacked(self): - with tm.RNGContext(42): - df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) - neg_df = -df - # each column has either positive or negative value - sep_df = DataFrame( - {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} - ) - # each column has positive-negative mixed value - mixed_df = DataFrame( - randn(6, 4), - index=list(string.ascii_letters[:6]), - columns=["w", "x", "y", "z"], - ) - - for kind in ["line", "area"]: - ax1 = _check_plot_works(df.plot, kind=kind, stacked=False) - ax2 = _check_plot_works(df.plot, kind=kind, stacked=True) - self._compare_stacked_y_cood(ax1.lines, ax2.lines) - - ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False) - ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True) - self._compare_stacked_y_cood(ax1.lines, ax2.lines) - - ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False) - ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True) - self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2]) - self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:]) - - _check_plot_works(mixed_df.plot, stacked=False) - with pytest.raises(ValueError): - mixed_df.plot(stacked=True) - - # Use an index with strictly positive values, preventing - # matplotlib from warning about ignoring xlim - df2 = df.set_index(df.index + 1) - _check_plot_works(df2.plot, kind=kind, logx=True, stacked=True) - - def test_line_area_nan_df(self): - values1 = [1, 2, np.nan, 3] - values2 = [3, np.nan, 2, 1] - df = DataFrame({"a": values1, "b": values2}) - tdf = DataFrame({"a": values1, "b": values2}, index=tm.makeDateIndex(k=4)) - - for d in [df, tdf]: - ax = _check_plot_works(d.plot) - masked1 = ax.lines[0].get_ydata() - masked2 = ax.lines[1].get_ydata() - # remove nan for comparison purpose - - exp = np.array([1, 2, 3], dtype=np.float64) - tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp) - - exp = np.array([3, 2, 1], dtype=np.float64) - tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp) - tm.assert_numpy_array_equal( - masked1.mask, np.array([False, False, True, False]) - ) - tm.assert_numpy_array_equal( - masked2.mask, np.array([False, True, False, False]) - ) - - expected1 = np.array([1, 2, 0, 3], dtype=np.float64) - expected2 = np.array([3, 0, 2, 1], dtype=np.float64) - - ax = _check_plot_works(d.plot, stacked=True) - tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) - - ax = _check_plot_works(d.plot.area) - tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) - - ax = _check_plot_works(d.plot.area, stacked=False) - tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) - - def test_line_lim(self): - df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) - ax = df.plot() - xmin, xmax = ax.get_xlim() - lines = ax.get_lines() - assert xmin <= lines[0].get_data()[0][0] - assert xmax >= lines[0].get_data()[0][-1] - - ax = df.plot(secondary_y=True) - xmin, xmax = ax.get_xlim() - lines = ax.get_lines() - assert xmin <= lines[0].get_data()[0][0] - assert xmax >= lines[0].get_data()[0][-1] - - axes = df.plot(secondary_y=True, subplots=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - for ax in axes: - assert hasattr(ax, "left_ax") - assert not hasattr(ax, "right_ax") - xmin, xmax = ax.get_xlim() - lines = ax.get_lines() - assert xmin <= lines[0].get_data()[0][0] - assert xmax >= lines[0].get_data()[0][-1] - - def test_area_lim(self): - df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) - - neg_df = -df - for stacked in [True, False]: - ax = _check_plot_works(df.plot.area, stacked=stacked) - xmin, xmax = ax.get_xlim() - ymin, ymax = ax.get_ylim() - lines = ax.get_lines() - assert xmin <= lines[0].get_data()[0][0] - assert xmax >= lines[0].get_data()[0][-1] - assert ymin == 0 - - ax = _check_plot_works(neg_df.plot.area, stacked=stacked) - ymin, ymax = ax.get_ylim() - assert ymax == 0 - @pytest.mark.slow def test_bar_colors(self): import matplotlib.pyplot as plt @@ -1002,230 +171,6 @@ def test_bar_user_colors(self): ] assert result == expected - @pytest.mark.slow - def test_bar_linewidth(self): - df = DataFrame(randn(5, 5)) - - # regular - ax = df.plot.bar(linewidth=2) - for r in ax.patches: - assert r.get_linewidth() == 2 - - # stacked - ax = df.plot.bar(stacked=True, linewidth=2) - for r in ax.patches: - assert r.get_linewidth() == 2 - - # subplots - axes = df.plot.bar(linewidth=2, subplots=True) - self._check_axes_shape(axes, axes_num=5, layout=(5, 1)) - for ax in axes: - for r in ax.patches: - assert r.get_linewidth() == 2 - - @pytest.mark.slow - def test_bar_barwidth(self): - df = DataFrame(randn(5, 5)) - - width = 0.9 - - # regular - ax = df.plot.bar(width=width) - for r in ax.patches: - assert r.get_width() == width / len(df.columns) - - # stacked - ax = df.plot.bar(stacked=True, width=width) - for r in ax.patches: - assert r.get_width() == width - - # horizontal regular - ax = df.plot.barh(width=width) - for r in ax.patches: - assert r.get_height() == width / len(df.columns) - - # horizontal stacked - ax = df.plot.barh(stacked=True, width=width) - for r in ax.patches: - assert r.get_height() == width - - # subplots - axes = df.plot.bar(width=width, subplots=True) - for ax in axes: - for r in ax.patches: - assert r.get_width() == width - - # horizontal subplots - axes = df.plot.barh(width=width, subplots=True) - for ax in axes: - for r in ax.patches: - assert r.get_height() == width - - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - - @pytest.mark.slow - def test_bar_bottom_left(self): - df = DataFrame(rand(5, 5)) - ax = df.plot.bar(stacked=False, bottom=1) - result = [p.get_y() for p in ax.patches] - assert result == [1] * 25 - - ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5]) - result = [p.get_y() for p in ax.patches[:5]] - assert result == [-1, -2, -3, -4, -5] - - ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) - result = [p.get_x() for p in ax.patches] - assert result == [1] * 25 - - ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5]) - result = [p.get_x() for p in ax.patches[:5]] - assert result == [1, 2, 3, 4, 5] - - axes = df.plot.bar(subplots=True, bottom=-1) - for ax in axes: - result = [p.get_y() for p in ax.patches] - assert result == [-1] * 5 - - axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1])) - for ax in axes: - result = [p.get_x() for p in ax.patches] - assert result == [1] * 5 - - @pytest.mark.slow - def test_bar_nan(self): - df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]}) - ax = df.plot.bar() - expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] - result = [p.get_height() for p in ax.patches] - assert result == expected - - ax = df.plot.bar(stacked=True) - result = [p.get_height() for p in ax.patches] - assert result == expected - - result = [p.get_y() for p in ax.patches] - expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] - assert result == expected - - @pytest.mark.slow - def test_bar_categorical(self): - # GH 13019 - df1 = pd.DataFrame( - np.random.randn(6, 5), - index=pd.Index(list("ABCDEF")), - columns=pd.Index(list("abcde")), - ) - # categorical index must behave the same - df2 = pd.DataFrame( - np.random.randn(6, 5), - index=pd.CategoricalIndex(list("ABCDEF")), - columns=pd.CategoricalIndex(list("abcde")), - ) - - for df in [df1, df2]: - ax = df.plot.bar() - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) - assert ax.get_xlim() == (-0.5, 5.5) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.25 - assert ax.patches[-1].get_x() == 5.15 - - ax = df.plot.bar(stacked=True) - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) - assert ax.get_xlim() == (-0.5, 5.5) - assert ax.patches[0].get_x() == -0.25 - assert ax.patches[-1].get_x() == 4.75 - - @pytest.mark.slow - def test_plot_scatter(self): - df = DataFrame( - randn(6, 4), - index=list(string.ascii_letters[:6]), - columns=["x", "y", "z", "four"], - ) - - _check_plot_works(df.plot.scatter, x="x", y="y") - _check_plot_works(df.plot.scatter, x=1, y=2) - - with pytest.raises(TypeError): - df.plot.scatter(x="x") - with pytest.raises(TypeError): - df.plot.scatter(y="y") - - # GH 6951 - axes = df.plot(x="x", y="y", kind="scatter", subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - - def test_raise_error_on_datetime_time_data(self): - # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = pd.DataFrame(np.random.randn(10), columns=["a"]) - df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time - msg = "must be a string or a number, not 'datetime.time'" - - with pytest.raises(TypeError, match=msg): - df.plot(kind="scatter", x="dtime", y="a") - - def test_scatterplot_datetime_data(self): - # GH 30391 - dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") - vals = np.random.normal(0, 1, len(dates)) - df = pd.DataFrame({"dates": dates, "vals": vals}) - - _check_plot_works(df.plot.scatter, x="dates", y="vals") - _check_plot_works(df.plot.scatter, x=0, y=1) - - def test_scatterplot_object_data(self): - # GH 18755 - df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) - - _check_plot_works(df.plot.scatter, x="a", y="b") - _check_plot_works(df.plot.scatter, x=0, y=1) - - df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) - - _check_plot_works(df.plot.scatter, x="a", y="b") - _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # addressing issue #10611, to ensure colobar does not @@ -1243,839 +188,40 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] - assert vis1 == vis2 - - assert ( - ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() - ) - - @pytest.mark.slow - def test_if_hexbin_xaxis_label_is_visible(self): - # addressing issue #10678, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax = df.plot.hexbin("A label", "B label", gridsize=12) - assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) - assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) - assert ax.xaxis.get_label().get_visible() - - @pytest.mark.slow - def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): - import matplotlib.pyplot as plt - - random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) - - fig, axes = plt.subplots(1, 2) - df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) - df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) - plt.tight_layout() - - points = np.array([ax.get_position().get_points() for ax in fig.axes]) - axes_x_coords = points[:, :, 0] - parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] - colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] - assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() - - @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) - @pytest.mark.slow - def test_plot_scatter_with_categorical_data(self, x, y): - # after fixing GH 18755, should be able to plot categorical data - df = pd.DataFrame( - {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} - ) - - _check_plot_works(df.plot.scatter, x=x, y=y) - - @pytest.mark.slow - def test_plot_scatter_with_c(self): - df = DataFrame( - randn(6, 4), - index=list(string.ascii_letters[:6]), - columns=["x", "y", "z", "four"], - ) - - axes = [df.plot.scatter(x="x", y="y", c="z"), df.plot.scatter(x=0, y=1, c=2)] - for ax in axes: - # default to Greys - assert ax.collections[0].cmap.name == "Greys" - - # n.b. there appears to be no public method - # to get the colorbar label - assert ax.collections[0].colorbar._label == "z" - - cm = "cubehelix" - ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm) - assert ax.collections[0].cmap.name == cm - - # verify turning off colorbar works - ax = df.plot.scatter(x="x", y="y", c="z", colorbar=False) - assert ax.collections[0].colorbar is None - - # verify that we can still plot a solid color - ax = df.plot.scatter(x=0, y=1, c="red") - assert ax.collections[0].colorbar is None - self._check_colors(ax.collections, facecolors=["r"]) - - # Ensure that we can pass an np.array straight through to matplotlib, - # this functionality was accidentally removed previously. - # See https://github.com/pandas-dev/pandas/issues/8852 for bug report - # - # Exercise colormap path and non-colormap path as they are independent - # - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - red_rgba = [1.0, 0.0, 0.0, 1.0] - green_rgba = [0.0, 1.0, 0.0, 1.0] - rgba_array = np.array([red_rgba, green_rgba]) - ax = df.plot.scatter(x="A", y="B", c=rgba_array) - # expect the face colors of the points in the non-colormap path to be - # identical to the values we supplied, normally we'd be on shaky ground - # comparing floats for equality but here we expect them to be - # identical. - tm.assert_numpy_array_equal(ax.collections[0].get_facecolor(), rgba_array) - # we don't test the colors of the faces in this next plot because they - # are dependent on the spring colormap, which may change its colors - # later. - float_array = np.array([0.0, 1.0]) - df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") - - @pytest.mark.parametrize("cmap", [None, "Greys"]) - def test_scatter_with_c_column_name_with_colors(self, cmap): - # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( - [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], - columns=["length", "width"], - ) - df["species"] = ["r", "r", "g", "g", "b"] - ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) - assert ax.collections[0].colorbar is None - - def test_plot_scatter_with_s(self): - # this refers to GH 32904 - df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) - - ax = df.plot.scatter(x="a", y="b", s="c") - tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) - - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - - @pytest.mark.slow - def test_plot_bar(self): - df = DataFrame( - randn(6, 4), - index=list(string.ascii_letters[:6]), - columns=["one", "two", "three", "four"], - ) - - _check_plot_works(df.plot.bar) - _check_plot_works(df.plot.bar, legend=False) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - _check_plot_works(df.plot.bar, subplots=True) - _check_plot_works(df.plot.bar, stacked=True) - - df = DataFrame( - randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) - ) - _check_plot_works(df.plot.bar) - - df = DataFrame({"a": [0, 1], "b": [1, 0]}) - ax = _check_plot_works(df.plot.bar) - self._check_ticks_props(ax, xrot=90) - - ax = df.plot.bar(rot=35, fontsize=10) - self._check_ticks_props(ax, xrot=35, xlabelsize=10, ylabelsize=10) - - ax = _check_plot_works(df.plot.barh) - self._check_ticks_props(ax, yrot=0) - - ax = df.plot.barh(rot=55, fontsize=11) - self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) - - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - - @pytest.mark.slow - def test_bar_log_no_subplots(self): - # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 - # regressions in 1.2.1 - expected = np.array([0.1, 1.0, 10.0, 100]) - - # no subplots - df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) - ax = df.plot.bar(grid=True, log=True) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_bar_log_subplots(self): - expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) - - ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( - log=True, subplots=True - ) - - tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) - tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_boxplot(self): - df = self.hist_df - series = df["height"] - numeric_cols = df._get_numeric_data().columns - labels = [pprint_thing(c) for c in numeric_cols] - - ax = _check_plot_works(df.plot.box) - self._check_text_labels(ax.get_xticklabels(), labels) - tm.assert_numpy_array_equal( - ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1) - ) - assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - tm.close() - - axes = series.plot.box(rot=40) - self._check_ticks_props(axes, xrot=40, yrot=0) - tm.close() - - ax = _check_plot_works(series.plot.box) - - positions = np.array([1, 6, 7]) - ax = df.plot.box(positions=positions) - numeric_cols = df._get_numeric_data().columns - labels = [pprint_thing(c) for c in numeric_cols] - self._check_text_labels(ax.get_xticklabels(), labels) - tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) - assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - - @pytest.mark.slow - def test_boxplot_vertical(self): - df = self.hist_df - numeric_cols = df._get_numeric_data().columns - labels = [pprint_thing(c) for c in numeric_cols] - - # if horizontal, yticklabels are rotated - ax = df.plot.box(rot=50, fontsize=8, vert=False) - self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8) - self._check_text_labels(ax.get_yticklabels(), labels) - assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot.box, subplots=True, vert=False, logx=True) - self._check_axes_shape(axes, axes_num=3, layout=(1, 3)) - self._check_ax_scales(axes, xaxis="log") - for ax, label in zip(axes, labels): - self._check_text_labels(ax.get_yticklabels(), [label]) - assert len(ax.lines) == self.bp_n_objects - - positions = np.array([3, 2, 8]) - ax = df.plot.box(positions=positions, vert=False) - self._check_text_labels(ax.get_yticklabels(), labels) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions) - assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - - @pytest.mark.slow - def test_boxplot_return_type(self): - df = DataFrame( - randn(6, 4), - index=list(string.ascii_letters[:6]), - columns=["one", "two", "three", "four"], - ) - with pytest.raises(ValueError): - df.plot.box(return_type="NOTATYPE") - - result = df.plot.box(return_type="dict") - self._check_box_return_type(result, "dict") - - result = df.plot.box(return_type="axes") - self._check_box_return_type(result, "axes") - - result = df.plot.box() # default axes - self._check_box_return_type(result, "axes") - - result = df.plot.box(return_type="both") - self._check_box_return_type(result, "both") - - @pytest.mark.slow - def test_boxplot_subplots_return_type(self): - df = self.hist_df - - # normal style: return_type=None - result = df.plot.box(subplots=True) - assert isinstance(result, Series) - self._check_box_return_type( - result, None, expected_keys=["height", "weight", "category"] - ) - - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_df(self): - df = DataFrame(randn(100, 4)) - ax = _check_plot_works(df.plot, kind="kde") - expected = [pprint_thing(c) for c in df.columns] - self._check_legend_labels(ax, labels=expected) - self._check_ticks_props(ax, xrot=0) - - ax = df.plot(kind="kde", rot=20, fontsize=5) - self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) - - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, kind="kde", subplots=True) - self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) - - axes = df.plot(kind="kde", logy=True, subplots=True) - self._check_ax_scales(axes, yaxis="log") - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_missing_vals(self): - df = DataFrame(np.random.uniform(size=(100, 4))) - df.loc[0, 0] = np.nan - _check_plot_works(df.plot, kind="kde") - - @pytest.mark.slow - def test_hist_df(self): - from matplotlib.patches import Rectangle - - df = DataFrame(randn(100, 4)) - series = df[0] - - ax = _check_plot_works(df.plot.hist) - expected = [pprint_thing(c) for c in df.columns] - self._check_legend_labels(ax, labels=expected) - - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot.hist, subplots=True, logy=True) - self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) - self._check_ax_scales(axes, yaxis="log") - - axes = series.plot.hist(rot=40) - self._check_ticks_props(axes, xrot=40, yrot=0) - tm.close() - - ax = series.plot.hist(cumulative=True, bins=4, density=True) - # height of last bin (index 5) must be 1.0 - rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - tm.assert_almost_equal(rects[-1].get_height(), 1.0) - tm.close() - - ax = series.plot.hist(cumulative=True, bins=4) - rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] - - tm.assert_almost_equal(rects[-2].get_height(), 100.0) - tm.close() - - # if horizontal, yticklabels are rotated - axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal") - self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8) - - @pytest.mark.parametrize( - "weights", [0.1 * np.ones(shape=(100,)), 0.1 * np.ones(shape=(100, 2))] - ) - def test_hist_weights(self, weights): - # GH 33173 - np.random.seed(0) - df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) - - ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) - ax2 = _check_plot_works(df.plot, kind="hist") - - patch_height_with_weights = [patch.get_height() for patch in ax1.patches] - - # original heights with no weights, and we manually multiply with example - # weights, so after multiplication, they should be almost same - expected_patch_height = [0.1 * patch.get_height() for patch in ax2.patches] - - tm.assert_almost_equal(patch_height_with_weights, expected_patch_height) - - def _check_box_coord( - self, - patches, - expected_y=None, - expected_h=None, - expected_x=None, - expected_w=None, - ): - result_y = np.array([p.get_y() for p in patches]) - result_height = np.array([p.get_height() for p in patches]) - result_x = np.array([p.get_x() for p in patches]) - result_width = np.array([p.get_width() for p in patches]) - # dtype is depending on above values, no need to check - - if expected_y is not None: - tm.assert_numpy_array_equal(result_y, expected_y, check_dtype=False) - if expected_h is not None: - tm.assert_numpy_array_equal(result_height, expected_h, check_dtype=False) - if expected_x is not None: - tm.assert_numpy_array_equal(result_x, expected_x, check_dtype=False) - if expected_w is not None: - tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False) - - @pytest.mark.slow - def test_hist_df_coord(self): - normal_df = DataFrame( - { - "A": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])), - "B": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([8, 8, 8, 8, 8])), - "C": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])), - }, - columns=["A", "B", "C"], - ) - - nan_df = DataFrame( - { - "A": np.repeat( - np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6]) - ), - "B": np.repeat( - np.array([1, np.nan, 2, 3, 4, 5]), np.array([8, 3, 8, 8, 8, 8]) - ), - "C": np.repeat( - np.array([1, 2, 3, np.nan, 4, 5]), np.array([6, 7, 8, 3, 9, 10]) - ), - }, - columns=["A", "B", "C"], - ) - - for df in [normal_df, nan_df]: - ax = df.plot.hist(bins=5) - self._check_box_coord( - ax.patches[:5], - expected_y=np.array([0, 0, 0, 0, 0]), - expected_h=np.array([10, 9, 8, 7, 6]), - ) - self._check_box_coord( - ax.patches[5:10], - expected_y=np.array([0, 0, 0, 0, 0]), - expected_h=np.array([8, 8, 8, 8, 8]), - ) - self._check_box_coord( - ax.patches[10:], - expected_y=np.array([0, 0, 0, 0, 0]), - expected_h=np.array([6, 7, 8, 9, 10]), - ) - - ax = df.plot.hist(bins=5, stacked=True) - self._check_box_coord( - ax.patches[:5], - expected_y=np.array([0, 0, 0, 0, 0]), - expected_h=np.array([10, 9, 8, 7, 6]), - ) - self._check_box_coord( - ax.patches[5:10], - expected_y=np.array([10, 9, 8, 7, 6]), - expected_h=np.array([8, 8, 8, 8, 8]), - ) - self._check_box_coord( - ax.patches[10:], - expected_y=np.array([18, 17, 16, 15, 14]), - expected_h=np.array([6, 7, 8, 9, 10]), - ) - - axes = df.plot.hist(bins=5, stacked=True, subplots=True) - self._check_box_coord( - axes[0].patches, - expected_y=np.array([0, 0, 0, 0, 0]), - expected_h=np.array([10, 9, 8, 7, 6]), - ) - self._check_box_coord( - axes[1].patches, - expected_y=np.array([0, 0, 0, 0, 0]), - expected_h=np.array([8, 8, 8, 8, 8]), - ) - self._check_box_coord( - axes[2].patches, - expected_y=np.array([0, 0, 0, 0, 0]), - expected_h=np.array([6, 7, 8, 9, 10]), - ) - - # horizontal - ax = df.plot.hist(bins=5, orientation="horizontal") - self._check_box_coord( - ax.patches[:5], - expected_x=np.array([0, 0, 0, 0, 0]), - expected_w=np.array([10, 9, 8, 7, 6]), - ) - self._check_box_coord( - ax.patches[5:10], - expected_x=np.array([0, 0, 0, 0, 0]), - expected_w=np.array([8, 8, 8, 8, 8]), - ) - self._check_box_coord( - ax.patches[10:], - expected_x=np.array([0, 0, 0, 0, 0]), - expected_w=np.array([6, 7, 8, 9, 10]), - ) - - ax = df.plot.hist(bins=5, stacked=True, orientation="horizontal") - self._check_box_coord( - ax.patches[:5], - expected_x=np.array([0, 0, 0, 0, 0]), - expected_w=np.array([10, 9, 8, 7, 6]), - ) - self._check_box_coord( - ax.patches[5:10], - expected_x=np.array([10, 9, 8, 7, 6]), - expected_w=np.array([8, 8, 8, 8, 8]), - ) - self._check_box_coord( - ax.patches[10:], - expected_x=np.array([18, 17, 16, 15, 14]), - expected_w=np.array([6, 7, 8, 9, 10]), - ) - - axes = df.plot.hist( - bins=5, stacked=True, subplots=True, orientation="horizontal" - ) - self._check_box_coord( - axes[0].patches, - expected_x=np.array([0, 0, 0, 0, 0]), - expected_w=np.array([10, 9, 8, 7, 6]), - ) - self._check_box_coord( - axes[1].patches, - expected_x=np.array([0, 0, 0, 0, 0]), - expected_w=np.array([8, 8, 8, 8, 8]), - ) - self._check_box_coord( - axes[2].patches, - expected_x=np.array([0, 0, 0, 0, 0]), - expected_w=np.array([6, 7, 8, 9, 10]), - ) - - @pytest.mark.slow - def test_plot_int_columns(self): - df = DataFrame(randn(100, 4)).cumsum() - _check_plot_works(df.plot, legend=True) - - @pytest.mark.slow - def test_df_legend_labels(self): - kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) - - for kind in kinds: - - ax = df.plot(kind=kind, legend=True) - self._check_legend_labels(ax, labels=df.columns) - - ax = df2.plot(kind=kind, legend=False, ax=ax) - self._check_legend_labels(ax, labels=df.columns) - - ax = df3.plot(kind=kind, legend=True, ax=ax) - self._check_legend_labels(ax, labels=df.columns.union(df3.columns)) - - ax = df4.plot(kind=kind, legend="reverse", ax=ax) - expected = list(df.columns.union(df3.columns)) + list(reversed(df4.columns)) - self._check_legend_labels(ax, labels=expected) - - # Secondary Y - ax = df.plot(legend=True, secondary_y="b") - self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) - ax = df2.plot(legend=False, ax=ax) - self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) - ax = df3.plot(kind="bar", legend=True, secondary_y="h", ax=ax) - self._check_legend_labels( - ax, labels=["a", "b (right)", "c", "g", "h (right)", "i"] - ) - - # Time Series - ind = date_range("1/1/2014", periods=3) - df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) - ax = df.plot(legend=True, secondary_y="b") - self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) - ax = df2.plot(legend=False, ax=ax) - self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) - ax = df3.plot(legend=True, ax=ax) - self._check_legend_labels(ax, labels=["a", "b (right)", "c", "g", "h", "i"]) - - # scatter - ax = df.plot.scatter(x="a", y="b", label="data1") - self._check_legend_labels(ax, labels=["data1"]) - ax = df2.plot.scatter(x="d", y="e", legend=False, label="data2", ax=ax) - self._check_legend_labels(ax, labels=["data1"]) - ax = df3.plot.scatter(x="g", y="h", label="data3", ax=ax) - self._check_legend_labels(ax, labels=["data1", "data3"]) - - # ensure label args pass through and - # index name does not mutate - # column names don't mutate - df5 = df.set_index("a") - ax = df5.plot(y="b") - self._check_legend_labels(ax, labels=["b"]) - ax = df5.plot(y="b", label="LABEL_b") - self._check_legend_labels(ax, labels=["LABEL_b"]) - self._check_text_labels(ax.xaxis.get_label(), "a") - ax = df5.plot(y="c", label="LABEL_c", ax=ax) - self._check_legend_labels(ax, labels=["LABEL_b", "LABEL_c"]) - assert df5.columns.tolist() == ["b", "c"] - - def test_missing_marker_multi_plots_on_same_ax(self): - # GH 18222 - df = pd.DataFrame( - data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] - ) - fig, ax = self.plt.subplots(nrows=1, ncols=3) - # Left plot - df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) - df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[0]) - df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[0]) - self._check_legend_labels(ax[0], labels=["r", "g", "b"]) - self._check_legend_marker(ax[0], expected_markers=["o", "x", "o"]) - # Center plot - df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[1]) - df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[1]) - df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[1]) - self._check_legend_labels(ax[1], labels=["b", "r", "g"]) - self._check_legend_marker(ax[1], expected_markers=["o", "o", "x"]) - # Right plot - df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[2]) - df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[2]) - df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[2]) - self._check_legend_labels(ax[2], labels=["g", "b", "r"]) - self._check_legend_marker(ax[2], expected_markers=["x", "o", "o"]) - - def test_legend_name(self): - multi = DataFrame( - randn(4, 4), - columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], - ) - multi.columns.names = ["group", "individual"] - - ax = multi.plot() - leg_title = ax.legend_.get_title() - self._check_text_labels(leg_title, "group,individual") - - df = DataFrame(randn(5, 5)) - ax = df.plot(legend=True, ax=ax) - leg_title = ax.legend_.get_title() - self._check_text_labels(leg_title, "group,individual") - - df.columns.name = "new" - ax = df.plot(legend=False, ax=ax) - leg_title = ax.legend_.get_title() - self._check_text_labels(leg_title, "group,individual") - - ax = df.plot(legend=True, ax=ax) - leg_title = ax.legend_.get_title() - self._check_text_labels(leg_title, "new") - - @pytest.mark.slow - def test_no_legend(self): - kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) - - for kind in kinds: + assert vis1 == vis2 - ax = df.plot(kind=kind, legend=False) - self._check_legend_labels(ax, visible=False) + assert ( + ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() + ) @pytest.mark.slow - def test_style_by_column(self): + def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt - fig = plt.gcf() - - df = DataFrame(randn(100, 3)) - for markers in [ - {0: "^", 1: "+", 2: "o"}, - {0: "^", 1: "+"}, - ["^", "+", "o"], - ["^", "+"], - ]: - fig.clf() - fig.add_subplot(111) - ax = df.plot(style=markers) - for i, l in enumerate(ax.get_lines()[: len(markers)]): - assert l.get_marker() == markers[i] + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) - @pytest.mark.slow - def test_line_label_none(self): - s = Series([1, 2]) - ax = s.plot() - assert ax.get_legend() is None + fig, axes = plt.subplots(1, 2) + df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) + df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) + plt.tight_layout() + + points = np.array([ax.get_position().get_points() for ax in fig.axes]) + axes_x_coords = points[:, :, 0] + parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] + colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] + assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() - ax = s.plot(legend=True) - assert ax.get_legend().get_texts()[0].get_text() == "None" + @pytest.mark.parametrize("cmap", [None, "Greys"]) + def test_scatter_with_c_column_name_with_colors(self, cmap): + # https://github.com/pandas-dev/pandas/issues/34316 + df = pd.DataFrame( + [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], + columns=["length", "width"], + ) + df["species"] = ["r", "r", "g", "g", "b"] + ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) + assert ax.collections[0].colorbar is None @pytest.mark.slow def test_line_colors(self): @@ -2454,800 +600,12 @@ def test_default_color_cycle(self): expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) - def test_unordered_ts(self): - df = DataFrame( - np.array([3.0, 2.0, 1.0]), - index=[date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)], - columns=["test"], - ) - ax = df.plot() - xticks = ax.lines[0].get_xdata() - assert xticks[0] < xticks[1] - ydata = ax.lines[0].get_ydata() - tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0])) - - @td.skip_if_no_scipy - def test_kind_both_ways(self): - df = DataFrame({"x": [1, 2, 3]}) - for kind in plotting.PlotAccessor._common_kinds: - - df.plot(kind=kind) - getattr(df.plot, kind)() - for kind in ["scatter", "hexbin"]: - df.plot("x", "x", kind=kind) - getattr(df.plot, kind)("x", "x") - - def test_all_invalid_plot_data(self): - df = DataFrame(list("abcd")) - for kind in plotting.PlotAccessor._common_kinds: - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - df.plot(kind=kind) - - @pytest.mark.slow - def test_partially_invalid_plot_data(self): - with tm.RNGContext(42): - df = DataFrame(randn(10, 2), dtype=object) - df[np.random.rand(df.shape[0]) > 0.5] = "a" - for kind in plotting.PlotAccessor._common_kinds: - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - df.plot(kind=kind) - - with tm.RNGContext(42): - # area plot doesn't support positive/negative mixed data - kinds = ["area"] - df = DataFrame(rand(10, 2), dtype=object) - df[np.random.rand(df.shape[0]) > 0.5] = "a" - for kind in kinds: - with pytest.raises(TypeError): - df.plot(kind=kind) - - def test_invalid_kind(self): - df = DataFrame(randn(10, 2)) - with pytest.raises(ValueError): - df.plot(kind="aasdf") - - @pytest.mark.parametrize( - "x,y,lbl", - [ - (["B", "C"], "A", "a"), - (["A"], ["B", "C"], ["b", "c"]), - ("A", ["B", "C"], "badlabel"), - ], - ) - def test_invalid_xy_args(self, x, y, lbl): - # GH 18671, 19699 allows y to be list-like but not x - df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) - with pytest.raises(ValueError): - df.plot(x=x, y=y, label=lbl) - - @pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")]) - def test_invalid_xy_args_dup_cols(self, x, y): - # GH 18671, 19699 allows y to be list-like but not x - df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB")) - with pytest.raises(ValueError): - df.plot(x=x, y=y) - - @pytest.mark.parametrize( - "x,y,lbl,colors", - [ - ("A", ["B"], ["b"], ["red"]), - ("A", ["B", "C"], ["b", "c"], ["red", "blue"]), - (0, [1, 2], ["bokeh", "cython"], ["green", "yellow"]), - ], - ) - def test_y_listlike(self, x, y, lbl, colors): - # GH 19699: tests list-like y and verifies lbls & colors - df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) - _check_plot_works(df.plot, x="A", y=y, label=lbl) - - ax = df.plot(x=x, y=y, label=lbl, color=colors) - assert len(ax.lines) == len(y) - self._check_colors(ax.get_lines(), linecolors=colors) - - @pytest.mark.parametrize("x,y,colnames", [(0, 1, ["A", "B"]), (1, 0, [0, 1])]) - def test_xy_args_integer(self, x, y, colnames): - # GH 20056: tests integer args for xy and checks col names - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - df.columns = colnames - _check_plot_works(df.plot, x=x, y=y) - - @pytest.mark.slow - def test_hexbin_basic(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", gridsize=10) - # TODO: need better way to test. This just does existence. - assert len(ax.collections) == 1 - - # GH 6951 - axes = df.plot.hexbin(x="A", y="B", subplots=True) - # hexbin should have 2 axes in the figure, 1 for plotting and another - # is colorbar - assert len(axes[0].figure.axes) == 2 - # return value is single axes - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - - @pytest.mark.slow - def test_hexbin_with_c(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", C="C") - assert len(ax.collections) == 1 - - ax = df.plot.hexbin(x="A", y="B", C="C", reduce_C_function=np.std) - assert len(ax.collections) == 1 - - @pytest.mark.slow - def test_hexbin_cmap(self): - df = self.hexbin_df - - # Default to BuGn - ax = df.plot.hexbin(x="A", y="B") - assert ax.collections[0].cmap.name == "BuGn" - - cm = "cubehelix" - ax = df.plot.hexbin(x="A", y="B", colormap=cm) - assert ax.collections[0].cmap.name == cm - - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - - @pytest.mark.slow - def test_allow_cmap(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", cmap="YlGn") - assert ax.collections[0].cmap.name == "YlGn" - - with pytest.raises(TypeError): - df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") - - @pytest.mark.slow - def test_pie_df(self): - df = DataFrame( - np.random.rand(5, 3), - columns=["X", "Y", "Z"], - index=["a", "b", "c", "d", "e"], - ) - with pytest.raises(ValueError): - df.plot.pie() - - ax = _check_plot_works(df.plot.pie, y="Y") - self._check_text_labels(ax.texts, df.index) - - ax = _check_plot_works(df.plot.pie, y=2) - self._check_text_labels(ax.texts, df.index) - - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot.pie, subplots=True) - assert len(axes) == len(df.columns) - for ax in axes: - self._check_text_labels(ax.texts, df.index) - for ax, ylabel in zip(axes, df.columns): - assert ax.get_ylabel() == ylabel - - labels = ["A", "B", "C", "D", "E"] - color_args = ["r", "g", "b", "c", "m"] - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works( - df.plot.pie, subplots=True, labels=labels, colors=color_args - ) - assert len(axes) == len(df.columns) - - for ax in axes: - self._check_text_labels(ax.texts, labels) - self._check_colors(ax.patches, facecolors=color_args) - - def test_pie_df_nan(self): - df = DataFrame(np.random.rand(4, 4)) - for i in range(4): - df.iloc[i, i] = np.nan - fig, axes = self.plt.subplots(ncols=4) - df.plot.pie(subplots=True, ax=axes, legend=True) - - base_expected = ["0", "1", "2", "3"] - for i, ax in enumerate(axes): - expected = list(base_expected) # force copy - expected[i] = "" - result = [x.get_text() for x in ax.texts] - assert result == expected - # legend labels - # NaN's not included in legend with subplots - # see https://github.com/pandas-dev/pandas/issues/8390 - assert [x.get_text() for x in ax.get_legend().get_texts()] == base_expected[ - :i - ] + base_expected[i + 1 :] - - @pytest.mark.slow - def test_errorbar_plot(self): - d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} - df = DataFrame(d) - d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} - df_err = DataFrame(d_err) - - # check line plots - ax = _check_plot_works(df.plot, yerr=df_err, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works( - (df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True - ) - self._check_has_errorbars(ax, xerr=2, yerr=2) - - # yerr is raw error values - ax = _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=1) - - ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - # yerr is column name - for yerr in ["yerr", "誤差"]: - s_df = df.copy() - s_df[yerr] = np.ones(12) * 0.2 - - ax = _check_plot_works(s_df.plot, yerr=yerr) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works(s_df.plot, y="y", x="x", yerr=yerr) - self._check_has_errorbars(ax, xerr=0, yerr=1) - - with pytest.raises(ValueError): - df.plot(yerr=np.random.randn(11)) - - df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12}) - with pytest.raises((ValueError, TypeError)): - df.plot(yerr=df_err) - - @pytest.mark.slow - @pytest.mark.parametrize("kind", ["line", "bar", "barh"]) - def test_errorbar_plot_different_kinds(self, kind): - d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} - df = DataFrame(d) - d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} - df_err = DataFrame(d_err) - - ax = _check_plot_works(df.plot, yerr=df_err["x"], kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - - ax = _check_plot_works(df.plot, yerr=df_err["x"], xerr=df_err["x"], kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - - ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) - self._check_has_errorbars(ax, xerr=2, yerr=2) - - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared - # Similar warnings were observed in GH #13188 - axes = _check_plot_works( - df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind - ) - self._check_has_errorbars(axes, xerr=1, yerr=1) - - @pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError) - @pytest.mark.slow - def test_errorbar_plot_iterator(self): - with warnings.catch_warnings(): - d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} - df = DataFrame(d) - - # yerr is iterator - ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df))) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - @pytest.mark.slow - def test_errorbar_with_integer_column_names(self): - # test with integer column names - df = DataFrame(np.random.randn(10, 2)) - df_err = DataFrame(np.random.randn(10, 2)) - ax = _check_plot_works(df.plot, yerr=df_err) - self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, y=0, yerr=1) - self._check_has_errorbars(ax, xerr=0, yerr=1) - - @pytest.mark.slow - def test_errorbar_with_partial_columns(self): - df = DataFrame(np.random.randn(10, 3)) - df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2]) - kinds = ["line", "bar"] - for kind in kinds: - ax = _check_plot_works(df.plot, yerr=df_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ix = date_range("1/1/2000", periods=10, freq="M") - df.set_index(ix, inplace=True) - df_err.set_index(ix, inplace=True) - ax = _check_plot_works(df.plot, yerr=df_err, kind="line") - self._check_has_errorbars(ax, xerr=0, yerr=2) - - d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} - df = DataFrame(d) - d_err = {"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4} - df_err = DataFrame(d_err) - for err in [d_err, df_err]: - ax = _check_plot_works(df.plot, yerr=err) - self._check_has_errorbars(ax, xerr=0, yerr=1) - - @pytest.mark.slow - @pytest.mark.parametrize("kind", ["line", "bar", "barh"]) - def test_errorbar_timeseries(self, kind): - d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} - d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} - - # check time-series plots - ix = date_range("1/1/2000", "1/1/2001", freq="M") - tdf = DataFrame(d, index=ix) - tdf_err = DataFrame(d_err, index=ix) - - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - ax = _check_plot_works(tdf.plot, y="y", yerr=tdf_err["x"], kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - - ax = _check_plot_works(tdf.plot, y="y", yerr="x", kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=1) - - ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) - self._check_has_errorbars(ax, xerr=0, yerr=2) - - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared - # Similar warnings were observed in GH #13188 - axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) - self._check_has_errorbars(axes, xerr=0, yerr=1) - - def test_errorbar_asymmetrical(self): - - np.random.seed(0) - err = np.random.rand(3, 2, 5) - - # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]... - df = DataFrame(np.arange(15).reshape(3, 5)).T - - ax = df.plot(yerr=err, xerr=err / 2) - - yerr_0_0 = ax.collections[1].get_paths()[0].vertices[:, 1] - expected_0_0 = err[0, :, 0] * np.array([-1, 1]) - tm.assert_almost_equal(yerr_0_0, expected_0_0) - - with pytest.raises(ValueError): - df.plot(yerr=err.T) - - tm.close() - - def test_table(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - _check_plot_works(df.plot, table=True) - _check_plot_works(df.plot, table=df) - - # GH 35945 UserWarning - with tm.assert_produces_warning(None): - ax = df.plot() - assert len(ax.tables) == 0 - plotting.table(ax, df.T) - assert len(ax.tables) == 1 - - def test_errorbar_scatter(self): - df = DataFrame(np.random.randn(5, 2), index=range(5), columns=["x", "y"]) - df_err = DataFrame( - np.random.randn(5, 2) / 5, index=range(5), columns=["x", "y"] - ) - - ax = _check_plot_works(df.plot.scatter, x="x", y="y") - self._check_has_errorbars(ax, xerr=0, yerr=0) - ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err) - self._check_has_errorbars(ax, xerr=1, yerr=0) - - ax = _check_plot_works(df.plot.scatter, x="x", y="y", yerr=df_err) - self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err, yerr=df_err) - self._check_has_errorbars(ax, xerr=1, yerr=1) - - def _check_errorbar_color(containers, expected, has_err="has_xerr"): - lines = [] - errs = [c.lines for c in ax.containers if getattr(c, has_err, False)][0] - for el in errs: - if is_list_like(el): - lines.extend(el) - else: - lines.append(el) - err_lines = [x for x in lines if x in ax.collections] - self._check_colors( - err_lines, linecolors=np.array([expected] * len(err_lines)) - ) - - # GH 8081 - df = DataFrame(np.random.randn(10, 5), columns=["a", "b", "c", "d", "e"]) - ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red") - self._check_has_errorbars(ax, xerr=1, yerr=1) - _check_errorbar_color(ax.containers, "red", has_err="has_xerr") - _check_errorbar_color(ax.containers, "red", has_err="has_yerr") - - ax = df.plot.scatter(x="a", y="b", yerr="e", color="green") - self._check_has_errorbars(ax, xerr=0, yerr=1) - _check_errorbar_color(ax.containers, "green", has_err="has_yerr") - - @pytest.mark.slow - def test_sharex_and_ax(self): - # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, - # the axis in fig.get_axis() are sorted differently than pandas - # expected them, so make sure that only the right ones are removed - import matplotlib.pyplot as plt - - plt.close("all") - gs, axes = _generate_4_axes_via_gridspec() - - df = DataFrame( - { - "a": [1, 2, 3, 4, 5, 6], - "b": [1, 2, 3, 4, 5, 6], - "c": [1, 2, 3, 4, 5, 6], - "d": [1, 2, 3, 4, 5, 6], - } - ) - - def _check(axes): - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - for ax in [axes[0], axes[2]]: - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - for ax in [axes[1], axes[3]]: - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - - for ax in axes: - df.plot(x="a", y="b", title="title", ax=ax, sharex=True) - gs.tight_layout(plt.gcf()) - _check(axes) - tm.close() - - gs, axes = _generate_4_axes_via_gridspec() - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - _check(axes) - tm.close() - - gs, axes = _generate_4_axes_via_gridspec() - # without sharex, no labels should be touched! - for ax in axes: - df.plot(x="a", y="b", title="title", ax=ax) - - gs.tight_layout(plt.gcf()) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - - @pytest.mark.slow - def test_sharey_and_ax(self): - # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, - # the axis in fig.get_axis() are sorted differently than pandas - # expected them, so make sure that only the right ones are removed - import matplotlib.pyplot as plt - - gs, axes = _generate_4_axes_via_gridspec() - - df = DataFrame( - { - "a": [1, 2, 3, 4, 5, 6], - "b": [1, 2, 3, 4, 5, 6], - "c": [1, 2, 3, 4, 5, 6], - "d": [1, 2, 3, 4, 5, 6], - } - ) - - def _check(axes): - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - for ax in [axes[0], axes[1]]: - self._check_visible(ax.get_yticklabels(), visible=True) - for ax in [axes[2], axes[3]]: - self._check_visible(ax.get_yticklabels(), visible=False) - - for ax in axes: - df.plot(x="a", y="b", title="title", ax=ax, sharey=True) - gs.tight_layout(plt.gcf()) - _check(axes) - tm.close() - - gs, axes = _generate_4_axes_via_gridspec() - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharey=True) - - gs.tight_layout(plt.gcf()) - _check(axes) - tm.close() - - gs, axes = _generate_4_axes_via_gridspec() - # without sharex, no labels should be touched! - for ax in axes: - df.plot(x="a", y="b", title="title", ax=ax) - - gs.tight_layout(plt.gcf()) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - - @td.skip_if_no_scipy - def test_memory_leak(self): - """ Check that every plot type gets properly collected. """ - import gc - import weakref - - results = {} - for kind in plotting.PlotAccessor._all_kinds: - - args = {} - if kind in ["hexbin", "scatter", "pie"]: - df = self.hexbin_df - args = {"x": "A", "y": "B"} - elif kind == "area": - df = self.tdf.abs() - else: - df = self.tdf - - # Use a weakref so we can see if the object gets collected without - # also preventing it from being collected - results[kind] = weakref.proxy(df.plot(kind=kind, **args)) - - # have matplotlib delete all the figures - tm.close() - # force a garbage collection - gc.collect() - for key in results: - # check that every plot was collected - with pytest.raises(ReferenceError): - # need to actually access something to get an error - results[key].lines - - @pytest.mark.slow - def test_df_subplots_patterns_minorticks(self): - # GH 10657 - import matplotlib.pyplot as plt - - df = DataFrame( - np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), - columns=list("AB"), - ) - - # shared subplots - fig, axes = plt.subplots(2, 1, sharex=True) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - fig, axes = plt.subplots(2, 1) - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # not shared - fig, axes = plt.subplots(2, 1) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - - @pytest.mark.slow - def test_df_gridspec_patterns(self): - # GH 10819 - import matplotlib.gridspec as gridspec - import matplotlib.pyplot as plt - - ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) - - df = DataFrame(np.random.randn(10, 2), index=ts.index, columns=list("AB")) - - def _get_vertical_grid(): - gs = gridspec.GridSpec(3, 1) - fig = plt.figure() - ax1 = fig.add_subplot(gs[:2, :]) - ax2 = fig.add_subplot(gs[2, :]) - return ax1, ax2 - - def _get_horizontal_grid(): - gs = gridspec.GridSpec(1, 3) - fig = plt.figure() - ax1 = fig.add_subplot(gs[:, :2]) - ax2 = fig.add_subplot(gs[:, 2]) - return ax1, ax2 - - for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: - ax1 = ts.plot(ax=ax1) - assert len(ax1.lines) == 1 - ax2 = df.plot(ax=ax2) - assert len(ax2.lines) == 2 - for ax in [ax1, ax2]: - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - - # subplots=True - for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: - axes = df.plot(subplots=True, ax=[ax1, ax2]) - assert len(ax1.lines) == 1 - assert len(ax2.lines) == 1 - for ax in axes: - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - - # vertical / subplots / sharex=True / sharey=True - ax1, ax2 = _get_vertical_grid() - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) - assert len(axes[0].lines) == 1 - assert len(axes[1].lines) == 1 - for ax in [ax1, ax2]: - # yaxis are visible because there is only one column - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of axes0 (top) are hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # horizontal / subplots / sharex=True / sharey=True - ax1, ax2 = _get_horizontal_grid() - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) - assert len(axes[0].lines) == 1 - assert len(axes[1].lines) == 1 - self._check_visible(axes[0].get_yticklabels(), visible=True) - # yaxis of axes1 (right) are hidden - self._check_visible(axes[1].get_yticklabels(), visible=False) - for ax in [ax1, ax2]: - # xaxis are visible because there is only one column - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - - # boxed - def _get_boxed_grid(): - gs = gridspec.GridSpec(3, 3) - fig = plt.figure() - ax1 = fig.add_subplot(gs[:2, :2]) - ax2 = fig.add_subplot(gs[:2, 2]) - ax3 = fig.add_subplot(gs[2, :2]) - ax4 = fig.add_subplot(gs[2, 2]) - return ax1, ax2, ax3, ax4 - - axes = _get_boxed_grid() - df = DataFrame(np.random.randn(10, 4), index=ts.index, columns=list("ABCD")) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - # axis are visible because these are not shared - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - - # subplots / sharex=True / sharey=True - axes = _get_boxed_grid() - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True) - for ax in axes: - assert len(ax.lines) == 1 - for ax in [axes[0], axes[2]]: # left column - self._check_visible(ax.get_yticklabels(), visible=True) - for ax in [axes[1], axes[3]]: # right column - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in [axes[0], axes[1]]: # top row - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - for ax in [axes[2], axes[3]]: # bottom row - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - - @pytest.mark.slow - def test_df_grid_settings(self): - # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 - self._check_grid_settings( - DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}), - plotting.PlotAccessor._dataframe_kinds, - kws={"x": "a", "y": "b"}, - ) - def test_invalid_colormap(self): df = DataFrame(randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") - def test_plain_axes(self): - - # supplied ax itself is a SubplotAxes, but figure contains also - # a plain Axes object (GH11556) - fig, ax = self.plt.subplots() - fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(rand(10)).plot(ax=ax) - - # supplied ax itself is a plain Axes, but because the cmap keyword - # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": randn(8), "b": randn(8)}) - fig = self.plt.figure() - ax = fig.add_axes((0, 0, 1, 1)) - df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") - - # other examples - fig, ax = self.plt.subplots() - from mpl_toolkits.axes_grid1 import make_axes_locatable - - divider = make_axes_locatable(ax) - cax = divider.append_axes("right", size="5%", pad=0.05) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=cax) - - fig, ax = self.plt.subplots() - from mpl_toolkits.axes_grid1.inset_locator import inset_axes - - iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=iax) - def test_passed_bar_colors(self): import matplotlib as mpl @@ -3264,153 +622,6 @@ def test_rcParams_bar_colors(self): barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] - @pytest.mark.parametrize("method", ["line", "barh", "bar"]) - def test_secondary_axis_font_size(self, method): - # GH: 12565 - df = ( - pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) - .assign(C=lambda df: df.B.cumsum()) - .assign(D=lambda df: df.C * 1.1) - ) - - fontsize = 20 - sy = ["C", "D"] - - kwargs = dict(secondary_y=sy, fontsize=fontsize, mark_right=True) - ax = getattr(df.plot, method)(**kwargs) - self._check_ticks_props(axes=ax.right_ax, ylabelsize=fontsize) - - @pytest.mark.slow - def test_x_string_values_ticks(self): - # Test if string plot index have a fixed xtick position - # GH: 7612, GH: 22334 - df = pd.DataFrame( - { - "sales": [3, 2, 3], - "visits": [20, 42, 28], - "day": ["Monday", "Tuesday", "Wednesday"], - } - ) - ax = df.plot.area(x="day") - ax.set_xlim(-1, 3) - xticklabels = [t.get_text() for t in ax.get_xticklabels()] - labels_position = dict(zip(xticklabels, ax.get_xticks())) - # Testing if the label stayed at the right position - assert labels_position["Monday"] == 0.0 - assert labels_position["Tuesday"] == 1.0 - assert labels_position["Wednesday"] == 2.0 - - @pytest.mark.slow - def test_x_multiindex_values_ticks(self): - # Test if multiindex plot index have a fixed xtick position - # GH: 15912 - index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) - ax = df.plot() - ax.set_xlim(-1, 4) - xticklabels = [t.get_text() for t in ax.get_xticklabels()] - labels_position = dict(zip(xticklabels, ax.get_xticks())) - # Testing if the label stayed at the right position - assert labels_position["(2012, 1)"] == 0.0 - assert labels_position["(2012, 2)"] == 1.0 - assert labels_position["(2013, 1)"] == 2.0 - assert labels_position["(2013, 2)"] == 3.0 - - @pytest.mark.parametrize("kind", ["line", "area"]) - def test_xlim_plot_line(self, kind): - # test if xlim is set correctly in plot.line and plot.area - # GH 27686 - df = pd.DataFrame([2, 4], index=[1, 2]) - ax = df.plot(kind=kind) - xlims = ax.get_xlim() - assert xlims[0] < 1 - assert xlims[1] > 2 - - def test_xlim_plot_line_correctly_in_mixed_plot_type(self): - # test if xlim is set correctly when ax contains multiple different kinds - # of plots, GH 27686 - fig, ax = self.plt.subplots() - - indexes = ["k1", "k2", "k3", "k4"] - df = pd.DataFrame( - { - "s1": [1000, 2000, 1500, 2000], - "s2": [900, 1400, 2000, 3000], - "s3": [1500, 1500, 1600, 1200], - "secondary_y": [1, 3, 4, 3], - }, - index=indexes, - ) - df[["s1", "s2", "s3"]].plot.bar(ax=ax, stacked=False) - df[["secondary_y"]].plot(ax=ax, secondary_y=True) - - xlims = ax.get_xlim() - assert xlims[0] < 0 - assert xlims[1] > 3 - - # make sure axis labels are plotted correctly as well - xticklabels = [t.get_text() for t in ax.get_xticklabels()] - assert xticklabels == indexes - - def test_subplots_sharex_false(self): - # test when sharex is set to False, two plots should have different - # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) - df.iloc[5:, 1] = np.nan - df.iloc[:5, 0] = np.nan - - figs, axs = self.plt.subplots(2, 1) - df.plot.line(ax=axs, subplots=True, sharex=False) - - expected_ax1 = np.arange(4.5, 10, 0.5) - expected_ax2 = np.arange(-0.5, 5, 0.5) - - tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) - tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) - - def test_plot_no_rows(self): - # GH 27758 - df = pd.DataFrame(columns=["foo"], dtype=int) - assert df.empty - ax = df.plot() - assert len(ax.get_lines()) == 1 - line = ax.get_lines()[0] - assert len(line.get_xdata()) == 0 - assert len(line.get_ydata()) == 0 - - def test_plot_no_numeric_data(self): - df = pd.DataFrame(["a", "b", "c"]) - with pytest.raises(TypeError): - df.plot() - - def test_missing_markers_legend(self): - # 14958 - df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) - ax = df.plot(y=["A"], marker="x", linestyle="solid") - df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) - df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) - - self._check_legend_labels(ax, labels=["A", "B", "C"]) - self._check_legend_marker(ax, expected_markers=["x", "o", "<"]) - - def test_missing_markers_legend_using_style(self): - # 14563 - df = pd.DataFrame( - { - "A": [1, 2, 3, 4, 5, 6], - "B": [2, 4, 1, 3, 2, 4], - "C": [3, 3, 2, 6, 4, 2], - "X": [1, 2, 3, 4, 5, 6], - } - ) - - fig, ax = self.plt.subplots() - for kind in "ABC": - df.plot("X", kind, label=kind, ax=ax, style=".") - - self._check_legend_labels(ax, labels=["A", "B", "C"]) - self._check_legend_marker(ax, expected_markers=[".", ".", "."]) - def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. @@ -3419,74 +630,4 @@ def test_colors_of_columns_with_same_name(self): df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() - - @pytest.mark.parametrize( - "index_name, old_label, new_label", - [ - (None, "", "new"), - ("old", "old", "new"), - (None, "", ""), - (None, "", 1), - (None, "", [1, 2]), - ], - ) - @pytest.mark.parametrize("kind", ["line", "area", "bar"]) - def test_xlabel_ylabel_dataframe_single_plot( - self, kind, index_name, old_label, new_label - ): - # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) - df.index.name = index_name - - # default is the ylabel is not shown and xlabel is index name - ax = df.plot(kind=kind) - assert ax.get_xlabel() == old_label - assert ax.get_ylabel() == "" - - # old xlabel will be overriden and assigned ylabel will be used as ylabel - ax = df.plot(kind=kind, ylabel=new_label, xlabel=new_label) - assert ax.get_ylabel() == str(new_label) - assert ax.get_xlabel() == str(new_label) - - @pytest.mark.parametrize( - "index_name, old_label, new_label", - [ - (None, "", "new"), - ("old", "old", "new"), - (None, "", ""), - (None, "", 1), - (None, "", [1, 2]), - ], - ) - @pytest.mark.parametrize("kind", ["line", "area", "bar"]) - def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label - ): - # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) - df.index.name = index_name - - # default is the ylabel is not shown and xlabel is index name - axes = df.plot(kind=kind, subplots=True) - assert all(ax.get_ylabel() == "" for ax in axes) - assert all(ax.get_xlabel() == old_label for ax in axes) - - # old xlabel will be overriden and assigned ylabel will be used as ylabel - axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) - assert all(ax.get_ylabel() == str(new_label) for ax in axes) - assert all(ax.get_xlabel() == str(new_label) for ax in axes) - - -def _generate_4_axes_via_gridspec(): - import matplotlib as mpl - import matplotlib.gridspec - import matplotlib.pyplot as plt - - gs = mpl.gridspec.GridSpec(2, 2) - ax_tl = plt.subplot(gs[0, 0]) - ax_ll = plt.subplot(gs[1, 0]) - ax_tr = plt.subplot(gs[0, 1]) - ax_lr = plt.subplot(gs[1, 1]) - - return gs, [ax_tl, ax_ll, ax_tr, ax_lr] + assert legend.get_color() == line.get_color() \ No newline at end of file From 2cb3fd7963ce72fd3a624b01a7af0949721f0944 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:22:54 +0300 Subject: [PATCH 004/147] PEP 8 fixes --- pandas/tests/plotting/frame/test_frame_color.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 41c6578743bbf..517e3c109fc5b 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,7 +48,6 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -630,4 +629,4 @@ def test_colors_of_columns_with_same_name(self): df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() \ No newline at end of file + assert legend.get_color() == line.get_color() From 4527f117af5f8ade3b493c4ee13b2267bb090da7 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 20:19:42 +0300 Subject: [PATCH 005/147] Transfer tests of test_frame.py to test_frame_groupby.py and test_frame_subplots.py --- pandas/tests/plotting/frame/test_frame.py | 547 ------------------ .../plotting/frame/test_frame_groupby.py | 108 ++++ .../plotting/frame/test_frame_subplots.py | 538 +++++++++++++++++ 3 files changed, 646 insertions(+), 547 deletions(-) create mode 100644 pandas/tests/plotting/frame/test_frame_groupby.py create mode 100644 pandas/tests/plotting/frame/test_frame_subplots.py diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 4d339b93fd30d..396eb73e83d17 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -337,411 +337,6 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = pd.DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 def test_negative_log(self): df = -DataFrame( @@ -1342,14 +937,6 @@ def test_bar_center(self): self._check_bar_alignment(df, kind="barh", stacked=False) self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - @pytest.mark.slow def test_bar_align_single_column(self): df = DataFrame(randn(5)) @@ -1389,28 +976,6 @@ def test_bar_edge(self): df, kind="barh", subplots=True, width=0.9, align="edge" ) - @pytest.mark.slow - def test_bar_log_no_subplots(self): - # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 - # regressions in 1.2.1 - expected = np.array([0.1, 1.0, 10.0, 100]) - - # no subplots - df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) - ax = df.plot.bar(grid=True, log=True) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_bar_log_subplots(self): - expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) - - ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( - log=True, subplots=True - ) - - tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) - tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -1489,26 +1054,6 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") - @pytest.mark.slow - def test_boxplot_subplots_return_type(self): - df = self.hist_df - - # normal style: return_type=None - result = df.plot.box(subplots=True) - assert isinstance(result, Series) - self._check_box_return_type( - result, None, expected_keys=["height", "weight", "category"] - ) - - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) - @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): @@ -2489,53 +2034,6 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines - @pytest.mark.slow - def test_df_subplots_patterns_minorticks(self): - # GH 10657 - import matplotlib.pyplot as plt - - df = DataFrame( - np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), - columns=list("AB"), - ) - - # shared subplots - fig, axes = plt.subplots(2, 1, sharex=True) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - fig, axes = plt.subplots(2, 1) - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # not shared - fig, axes = plt.subplots(2, 1) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -2780,22 +2278,6 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes - def test_subplots_sharex_false(self): - # test when sharex is set to False, two plots should have different - # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) - df.iloc[5:, 1] = np.nan - df.iloc[:5, 0] = np.nan - - figs, axs = self.plt.subplots(2, 1) - df.plot.line(ax=axs, subplots=True, sharex=False) - - expected_ax1 = np.arange(4.5, 10, 0.5) - expected_ax2 = np.arange(-0.5, 5, 0.5) - - tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) - tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) - def test_plot_no_rows(self): # GH 27758 df = pd.DataFrame(columns=["foo"], dtype=int) @@ -2867,35 +2349,6 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_ylabel() == str(new_label) assert ax.get_xlabel() == str(new_label) - @pytest.mark.parametrize( - "index_name, old_label, new_label", - [ - (None, "", "new"), - ("old", "old", "new"), - (None, "", ""), - (None, "", 1), - (None, "", [1, 2]), - ], - ) - @pytest.mark.parametrize("kind", ["line", "area", "bar"]) - def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label - ): - # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) - df.index.name = index_name - - # default is the ylabel is not shown and xlabel is index name - axes = df.plot(kind=kind, subplots=True) - assert all(ax.get_ylabel() == "" for ax in axes) - assert all(ax.get_xlabel() == old_label for ax in axes) - - # old xlabel will be overriden and assigned ylabel will be used as ylabel - axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) - assert all(ax.get_ylabel() == str(new_label) for ax in axes) - assert all(ax.get_xlabel() == str(new_label) for ax in axes) - - def _generate_4_axes_via_gridspec(): import matplotlib as mpl import matplotlib.gridspec diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py new file mode 100644 index 0000000000000..253f81d9e2704 --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -0,0 +1,108 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +from numpy.random import rand, randn +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFrameGroupby(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py new file mode 100644 index 0000000000000..92d5941e7e8e6 --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -0,0 +1,538 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +from numpy.random import rand, randn +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFrameGroupby(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = pd.DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [ + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), + ], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) From d851ca260838d5ad99e5a1bb97e7afb44a8095f7 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 20:22:06 +0300 Subject: [PATCH 006/147] Removing unnecessary imports --- .../tests/plotting/frame/test_frame_groupby.py | 18 +++--------------- .../plotting/frame/test_frame_subplots.py | 8 ++------ 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 253f81d9e2704..1aa5d18c076b0 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,26 +1,14 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string -import warnings - import numpy as np -from numpy.random import rand, randn -import pytest +from numpy.random import rand import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works - -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting +from pandas.tests.plotting.common import TestPlotBase @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 92d5941e7e8e6..5ed7f22c22a34 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -11,16 +11,12 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame, Series, date_range import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import TestPlotBase from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting @td.skip_if_no_mpl From d5ddae205431fb54a0f20736acefb0d0896e5dfe Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 20:32:36 +0300 Subject: [PATCH 007/147] PEP 8 fixes --- pandas/tests/plotting/frame/test_frame_subplots.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 5ed7f22c22a34..cb99801de6efd 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -399,7 +399,6 @@ def test_bar_subplots_center(self): self._check_bar_alignment(df, kind="barh", subplots=True) self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - @pytest.mark.slow def test_bar_log_no_subplots(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 From aeacf6cd94749ed7eed2d3db65ec08dc47aa3b39 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Wed, 4 Nov 2020 16:34:58 +0300 Subject: [PATCH 008/147] Fixed class name --- pandas/tests/plotting/frame/test_frame_subplots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index cb99801de6efd..e97ce3262a015 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -20,7 +20,7 @@ @td.skip_if_no_mpl -class TestDataFrameGroupby(TestPlotBase): +class TestDataFrameSubplots(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl From d9c8ae9769243e9312a2841a78b015329196d1ec Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Wed, 4 Nov 2020 16:46:45 +0300 Subject: [PATCH 009/147] Transfer tests of test_frame.py to test_frame_subplots.py --- pandas/tests/plotting/frame/test_frame.py | 168 ----------------- .../plotting/frame/test_frame_subplots.py | 173 ++++++++++++++++++ 2 files changed, 173 insertions(+), 168 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 396eb73e83d17..56ce87644bc39 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -534,46 +534,6 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(rand(5, 5)) @@ -848,134 +808,6 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index e97ce3262a015..3f146d2c97008 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -531,3 +531,176 @@ def test_xlabel_ylabel_dataframe_subplots( axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) assert all(ax.get_ylabel() == str(new_label) for ax in axes) assert all(ax.get_xlabel() == str(new_label) for ax in axes) + + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes + + + From b63c1ba1804871aecddd4ca4c37ea066398ee57e Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Thu, 5 Nov 2020 22:29:31 +0300 Subject: [PATCH 010/147] Transfer tests of test_frame.py to test_frame_groupby.py, test_frame_subplots.py, test_frame_color.py --- pandas/tests/plotting/frame/test_frame.py | 211 +++++++++--------- .../tests/plotting/frame/test_frame_color.py | 100 +++++---- .../plotting/frame/test_frame_groupby.py | 17 +- .../plotting/frame/test_frame_subplots.py | 28 ++- 4 files changed, 193 insertions(+), 163 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 56ce87644bc39..9aab765dca96b 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -6,7 +6,6 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -175,14 +174,14 @@ def test_nonnumeric_exclude(self): @pytest.mark.slow def test_implicit_label(self): - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) ax = df.plot(x="a", y="b") self._check_text_labels(ax.xaxis.get_label(), "a") @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(randn(2, 2), columns=["a", "b"]) + df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -337,10 +336,9 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - def test_negative_log(self): df = -DataFrame( - rand(6, 4), + np.random.rand(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -359,15 +357,20 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): def test_line_area_stacked(self): with tm.RNGContext(42): - df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) + df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"]) neg_df = -df # each column has either positive or negative value sep_df = DataFrame( - {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} + { + "w": np.random.rand(6), + "x": np.random.rand(6), + "y": -np.random.rand(6), + "z": -np.random.rand(6), + } ) # each column has positive-negative mixed value mixed_df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -435,7 +438,7 @@ def test_line_area_nan_df(self): tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) def test_line_lim(self): - df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -459,7 +462,7 @@ def test_line_lim(self): assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): - df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) + df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) neg_df = -df for stacked in [True, False]: @@ -477,7 +480,7 @@ def test_area_lim(self): @pytest.mark.slow def test_bar_linewidth(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) # regular ax = df.plot.bar(linewidth=2) @@ -498,7 +501,7 @@ def test_bar_linewidth(self): @pytest.mark.slow def test_bar_barwidth(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) width = 0.9 @@ -536,7 +539,7 @@ def test_bar_barwidth(self): @pytest.mark.slow def test_bar_bottom_left(self): - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -582,13 +585,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = pd.DataFrame( + df1 = DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = pd.DataFrame( + df2 = DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -612,7 +615,7 @@ def test_bar_categorical(self): @pytest.mark.slow def test_plot_scatter(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -631,7 +634,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = pd.DataFrame(np.random.randn(10), columns=["a"]) + df = DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -642,31 +645,30 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = pd.DataFrame({"dates": dates, "vals": vals}) + df = DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow def test_if_hexbin_xaxis_label_is_visible(self): # addressing issue #10678, to ensure colobar does not # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) @@ -677,16 +679,14 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = pd.DataFrame( - {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} - ) + df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) _check_plot_works(df.plot.scatter, x=x, y=y) @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -742,43 +742,10 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_plot_bar(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -791,7 +758,9 @@ def test_plot_bar(self): _check_plot_works(df.plot.bar, stacked=True) df = DataFrame( - randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) + np.random.randn(10, 15), + index=list(string.ascii_letters[:10]), + columns=range(15), ) _check_plot_works(df.plot.bar) @@ -867,7 +836,7 @@ def test_boxplot_vertical(self): @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -889,7 +858,7 @@ def test_boxplot_return_type(self): @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(randn(100, 4)) + df = DataFrame(np.random.randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) @@ -916,7 +885,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle - df = DataFrame(randn(100, 4)) + df = DataFrame(np.random.randn(100, 4)) series = df[0] ax = _check_plot_works(df.plot.hist) @@ -954,7 +923,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1124,16 +1093,16 @@ def test_hist_df_coord(self): @pytest.mark.slow def test_plot_int_columns(self): - df = DataFrame(randn(100, 4)).cumsum() + df = DataFrame(np.random.randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.slow def test_df_legend_labels(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) for kind in kinds: @@ -1162,9 +1131,9 @@ def test_df_legend_labels(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) ax = df.plot(legend=True, secondary_y="b") self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) ax = df2.plot(legend=False, ax=ax) @@ -1195,9 +1164,7 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = pd.DataFrame( - data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] - ) + df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) @@ -1220,7 +1187,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - randn(4, 4), + np.random.randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -1229,7 +1196,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") @@ -1246,7 +1213,7 @@ def test_legend_name(self): @pytest.mark.slow def test_no_legend(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) for kind in kinds: @@ -1259,7 +1226,7 @@ def test_style_by_column(self): fig = plt.gcf() - df = DataFrame(randn(100, 3)) + df = DataFrame(np.random.randn(100, 3)) for markers in [ {0: "^", 1: "+", 2: "o"}, {0: "^", 1: "+"}, @@ -1281,6 +1248,23 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -1315,7 +1299,7 @@ def test_all_invalid_plot_data(self): @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): - df = DataFrame(randn(10, 2), dtype=object) + df = DataFrame(np.random.randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in plotting.PlotAccessor._common_kinds: @@ -1326,14 +1310,14 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): # area plot doesn't support positive/negative mixed data kinds = ["area"] - df = DataFrame(rand(10, 2), dtype=object) + df = DataFrame(np.random.rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in kinds: with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(randn(10, 2)) + df = DataFrame(np.random.randn(10, 2)) with pytest.raises(ValueError): df.plot(kind="aasdf") @@ -1420,13 +1404,6 @@ def test_hexbin_cmap(self): ax = df.plot.hexbin(x="A", y="B", colormap=cm) assert ax.collections[0].cmap.name == cm - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df @@ -1997,11 +1974,11 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = self.plt.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=ax) # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": randn(8), "b": randn(8)}) + df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) fig = self.plt.figure() ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") @@ -2012,21 +1989,21 @@ def test_plain_axes(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=cax) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=iax) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) + DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2042,7 +2019,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = pd.DataFrame( + df = DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -2063,7 +2040,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -2078,7 +2055,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = pd.DataFrame([2, 4], index=[1, 2]) + df = DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -2090,7 +2067,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = pd.DataFrame( + df = DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -2112,7 +2089,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): def test_plot_no_rows(self): # GH 27758 - df = pd.DataFrame(columns=["foo"], dtype=int) + df = DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -2121,13 +2098,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = pd.DataFrame(["a", "b", "c"]) + df = DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -2137,7 +2114,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = pd.DataFrame( + df = DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -2168,7 +2145,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -2181,6 +2158,28 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_ylabel() == str(new_label) assert ax.get_xlabel() == str(new_label) + @pytest.mark.parametrize( + "xlabel, ylabel", + [ + (None, None), + ("X Label", None), + (None, "Y Label"), + ("X Label", "Y Label"), + ], + ) + @pytest.mark.parametrize("kind", ["scatter", "hexbin"]) + def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): + # GH 37001 + xcol = "Type A" + ycol = "Type B" + df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) + + # default is the labels are column names + ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) + assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) + assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) + + def _generate_4_axes_via_gridspec(): import matplotlib as mpl import matplotlib.gridspec diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 517e3c109fc5b..18f45cd34d812 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -6,7 +6,6 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -50,7 +49,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -78,7 +77,7 @@ def test_rgb_tuple_color(self): _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(randn(10, 2)) + df = DataFrame(np.random.randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") @@ -122,7 +121,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -155,7 +154,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = pd.DataFrame( + df = DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -176,7 +175,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +197,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +213,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( + df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -222,12 +221,45 @@ def test_scatter_with_c_column_name_with_colors(self, cmap): ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) assert ax.collections[0].colorbar is None + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + @pytest.mark.slow def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -270,7 +302,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -280,7 +312,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -349,7 +381,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -392,7 +424,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -429,7 +461,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -451,7 +483,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -519,7 +551,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -569,23 +601,6 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes="red", xxxx="blue")) - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - def test_default_color_cycle(self): import cycler import matplotlib.pyplot as plt @@ -593,14 +608,21 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + def test_invalid_colormap(self): - df = DataFrame(randn(3, 2), columns=["A", "B"]) + df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") @@ -610,7 +632,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,14 +640,14 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 1aa5d18c076b0..32087fda4802b 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,14 +1,25 @@ """ Test cases for DataFrame.plot """ +from datetime import date, datetime +import itertools +import string +import warnings + import numpy as np -from numpy.random import rand +import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 3f146d2c97008..23b677ca1f8c2 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -6,17 +6,20 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting @td.skip_if_no_mpl @@ -197,7 +200,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): pd.to_datetime("2017-08-02 00:00:00"), ], } - testdata = pd.DataFrame(data) + testdata = DataFrame(data) ax_period = testdata.plot(x="numeric", y="period") assert ( ax_period.get_lines()[0].get_data()[1] == testdata["period"].values @@ -491,7 +494,7 @@ def test_df_subplots_patterns_minorticks(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.rand(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -516,10 +519,10 @@ def test_subplots_sharex_false(self): ) @pytest.mark.parametrize("kind", ["line", "area", "bar"]) def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label + self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -534,7 +537,7 @@ def test_xlabel_ylabel_dataframe_subplots( @pytest.mark.slow def test_bar_barwidth_position(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) self._check_bar_alignment( df, kind="bar", stacked=False, width=0.9, position=0.2 ) @@ -555,7 +558,7 @@ def test_bar_barwidth_position(self): @pytest.mark.slow def test_bar_barwidth_position_int(self): # GH 12979 - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) for w in [1, 1.0]: ax = df.plot.bar(stacked=True, width=w) @@ -581,7 +584,6 @@ def test_bar_stacked_center(self): self._check_bar_alignment(df, kind="barh", stacked=True) self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - @pytest.mark.slow def test_bar_center(self): df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) @@ -592,7 +594,7 @@ def test_bar_center(self): @pytest.mark.slow def test_bar_align_single_column(self): - df = DataFrame(randn(5)) + df = DataFrame(np.random.randn(5)) self._check_bar_alignment(df, kind="bar", stacked=False) self._check_bar_alignment(df, kind="bar", stacked=True) self._check_bar_alignment(df, kind="barh", stacked=False) @@ -629,7 +631,6 @@ def test_bar_edge(self): df, kind="barh", subplots=True, width=0.9, align="edge" ) - def _check_bar_alignment( self, df, @@ -701,6 +702,3 @@ def _check_bar_alignment( raise ValueError return axes - - - From 987fa0020f17437d53544fb776ba3ce81b723503 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Thu, 5 Nov 2020 22:36:13 +0300 Subject: [PATCH 011/147] Changed class names --- pandas/tests/plotting/frame/test_frame_color.py | 2 +- pandas/tests/plotting/frame/test_frame_groupby.py | 2 +- pandas/tests/plotting/frame/test_frame_subplots.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 18f45cd34d812..24e879d9491b5 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -23,7 +23,7 @@ @td.skip_if_no_mpl -class TestDataFrameColor(TestPlotBase): +class TestDataFramePlotsColor(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 32087fda4802b..968fa65e63e79 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -23,7 +23,7 @@ @td.skip_if_no_mpl -class TestDataFrameGroupby(TestPlotBase): +class TestDataFramePlotsGroupby(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 23b677ca1f8c2..ef491e438dac1 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -23,7 +23,7 @@ @td.skip_if_no_mpl -class TestDataFrameSubplots(TestPlotBase): +class TestDataFramePlotsSubplots(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl From 7685f3544300852e588a57bc16789f989aee9334 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Thu, 5 Nov 2020 22:38:34 +0300 Subject: [PATCH 012/147] Removed unnecessary imports --- pandas/tests/plotting/frame/test_frame_color.py | 11 +---------- .../tests/plotting/frame/test_frame_groupby.py | 16 ++-------------- .../tests/plotting/frame/test_frame_subplots.py | 10 ++-------- 3 files changed, 5 insertions(+), 32 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 24e879d9491b5..2d509e8f3b320 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,8 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string import warnings import numpy as np @@ -10,17 +7,11 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting - @td.skip_if_no_mpl class TestDataFramePlotsColor(TestPlotBase): diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 968fa65e63e79..92ae025145595 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,25 +1,13 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string -import warnings - import numpy as np -import pytest import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works - -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting +from pandas.tests.plotting.common import TestPlotBase @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index ef491e438dac1..4c86a570360b0 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -1,7 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools import string import warnings @@ -10,16 +8,12 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame, Series, date_range import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import TestPlotBase from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting @td.skip_if_no_mpl From beea5eb22a69a434deb1caf6df4740d3e5754cbc Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Fri, 6 Nov 2020 08:43:54 +0300 Subject: [PATCH 013/147] Removed import --- pandas/tests/plotting/frame/test_frame_groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 92ae025145595..06ce0d5076d69 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -4,7 +4,6 @@ import pandas.util._test_decorators as td -import pandas as pd from pandas import DataFrame import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase From 4e3b78b3154f123d7b69d94bf4d6c6bea1789691 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 09:29:53 -0800 Subject: [PATCH 014/147] catch FutureWarnings (#37587) --- pandas/tests/series/test_arithmetic.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 4920796f661fb..4bb4d3eeda112 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -731,17 +731,23 @@ def test_series_ops_name_retention(flex, box, names, all_binary_operators): left = Series(range(10), name=names[0]) right = Series(range(10), name=names[1]) + name = op.__name__.strip("_") + is_logical = name in ["and", "rand", "xor", "rxor", "or", "ror"] + is_rlogical = is_logical and name.startswith("r") + right = box(right) if flex: - name = op.__name__.strip("_") - if name in ["and", "rand", "xor", "rxor", "or", "ror"]: + if is_logical: # Series doesn't have these as flex methods return result = getattr(left, name)(right) else: - result = op(left, right) + # GH#37374 logical ops behaving as set ops deprecated + warn = FutureWarning if is_rlogical and box is Index else None + with tm.assert_produces_warning(warn, check_stacklevel=False): + result = op(left, right) - if box is pd.Index and op.__name__.strip("_") in ["rxor", "ror", "rand"]: + if box is pd.Index and is_rlogical: # Index treats these as set operators, so does not defer assert isinstance(result, pd.Index) return From 28d77b7546a37b35d786f150bf65a52c41e104a0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 13:21:19 -0800 Subject: [PATCH 015/147] TST/REF: collect indexing tests by method (#37590) --- pandas/tests/frame/indexing/test_indexing.py | 26 -- pandas/tests/frame/indexing/test_set_value.py | 28 +- pandas/tests/indexing/test_callable.py | 254 ------------------ pandas/tests/indexing/test_iloc.py | 86 ++++++ pandas/tests/indexing/test_indexing_slow.py | 14 - pandas/tests/indexing/test_loc.py | 176 ++++++++++++ 6 files changed, 289 insertions(+), 295 deletions(-) delete mode 100644 pandas/tests/indexing/test_callable.py delete mode 100644 pandas/tests/indexing/test_indexing_slow.py diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 58f0e5bc1ad39..9eaa0d0ae6876 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1327,32 +1327,6 @@ def test_getitem_list_duplicates(self): expected = df.iloc[:, 2:] tm.assert_frame_equal(result, expected) - def test_set_value_with_index_dtype_change(self): - df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC")) - - # this is actually ambiguous as the 2 is interpreted as a positional - # so column is not created - df = df_orig.copy() - df._set_value("C", 2, 1.0) - assert list(df.index) == list(df_orig.index) + ["C"] - # assert list(df.columns) == list(df_orig.columns) + [2] - - df = df_orig.copy() - df.loc["C", 2] = 1.0 - assert list(df.index) == list(df_orig.index) + ["C"] - # assert list(df.columns) == list(df_orig.columns) + [2] - - # create both new - df = df_orig.copy() - df._set_value("C", "D", 1.0) - assert list(df.index) == list(df_orig.index) + ["C"] - assert list(df.columns) == list(df_orig.columns) + ["D"] - - df = df_orig.copy() - df.loc["C", "D"] = 1.0 - assert list(df.index) == list(df_orig.index) + ["C"] - assert list(df.columns) == list(df_orig.columns) + ["D"] - # TODO: rename? remove? def test_single_element_ix_dont_upcast(self, float_frame): float_frame["E"] = 1 diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index 484e2d544197e..84def57f6b6e0 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -3,7 +3,7 @@ from pandas.core.dtypes.common import is_float_dtype -from pandas import isna +from pandas import DataFrame, isna class TestSetValue: @@ -38,3 +38,29 @@ def test_set_value_resize(self, float_frame): msg = "could not convert string to float: 'sam'" with pytest.raises(ValueError, match=msg): res._set_value("foobar", "baz", "sam") + + def test_set_value_with_index_dtype_change(self): + df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC")) + + # this is actually ambiguous as the 2 is interpreted as a positional + # so column is not created + df = df_orig.copy() + df._set_value("C", 2, 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + df = df_orig.copy() + df.loc["C", 2] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + # create both new + df = df_orig.copy() + df._set_value("C", "D", 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] + + df = df_orig.copy() + df.loc["C", "D"] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py deleted file mode 100644 index b98c9a3df0438..0000000000000 --- a/pandas/tests/indexing/test_callable.py +++ /dev/null @@ -1,254 +0,0 @@ -import numpy as np - -import pandas as pd -import pandas._testing as tm - - -class TestIndexingCallable: - def test_frame_loc_callable(self): - # GH 11485 - df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) - # iloc cannot use boolean Series (see GH3635) - - # return bool indexer - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.B == "b", :] - tm.assert_frame_equal(res, df.loc[df.B == "b", :]) - - res = df.loc[lambda x: x.B == "b", :] - tm.assert_frame_equal(res, df.loc[df.B == "b", :]) - - res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] - tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) - - res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] - tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) - - res = df.loc[lambda x: x.A > 2, lambda x: "B"] - tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) - - res = df.loc[lambda x: x.A > 2, lambda x: "B"] - tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) - - res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) - - # scalar - res = df.loc[lambda x: 1, lambda x: "A"] - assert res == df.loc[1, "A"] - - res = df.loc[lambda x: 1, lambda x: "A"] - assert res == df.loc[1, "A"] - - def test_frame_loc_callable_mixture(self): - # GH 11485 - df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) - - res = df.loc[lambda x: x.A > 2, ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A > 2, ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[[2, 3], lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) - - res = df.loc[[2, 3], lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) - - res = df.loc[3, lambda x: ["A", "B"]] - tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) - - res = df.loc[3, lambda x: ["A", "B"]] - tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) - - def test_frame_loc_callable_labels(self): - # GH 11485 - df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return label - res = df.loc[lambda x: ["A", "C"]] - tm.assert_frame_equal(res, df.loc[["A", "C"]]) - - res = df.loc[lambda x: ["A", "C"]] - tm.assert_frame_equal(res, df.loc[["A", "C"]]) - - res = df.loc[lambda x: ["A", "C"], :] - tm.assert_frame_equal(res, df.loc[["A", "C"], :]) - - res = df.loc[lambda x: ["A", "C"], lambda x: "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - # mixture - res = df.loc[["A", "C"], lambda x: "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[["A", "C"], lambda x: ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - res = df.loc[lambda x: ["A", "C"], "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[lambda x: ["A", "C"], ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - def test_frame_loc_callable_setitem(self): - # GH 11485 - df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return label - res = df.copy() - res.loc[lambda x: ["A", "C"]] = -20 - exp = df.copy() - exp.loc[["A", "C"]] = -20 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], :] = 20 - exp = df.copy() - exp.loc[["A", "C"], :] = 20 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 - exp = df.copy() - exp.loc[["A", "C"], "X"] = -1 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = [5, 10] - tm.assert_frame_equal(res, exp) - - # mixture - res = df.copy() - res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) - exp = df.copy() - exp.loc[["A", "C"], "X"] = np.array([-1, -2]) - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[["A", "C"], lambda x: ["X"]] = 10 - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = 10 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], "X"] = -2 - exp = df.copy() - exp.loc[["A", "C"], "X"] = -2 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], ["X"]] = -4 - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = -4 - tm.assert_frame_equal(res, exp) - - def test_frame_iloc_callable(self): - # GH 11485 - df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return location - res = df.iloc[lambda x: [1, 3]] - tm.assert_frame_equal(res, df.iloc[[1, 3]]) - - res = df.iloc[lambda x: [1, 3], :] - tm.assert_frame_equal(res, df.iloc[[1, 3], :]) - - res = df.iloc[lambda x: [1, 3], lambda x: 0] - tm.assert_series_equal(res, df.iloc[[1, 3], 0]) - - res = df.iloc[lambda x: [1, 3], lambda x: [0]] - tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) - - # mixture - res = df.iloc[[1, 3], lambda x: 0] - tm.assert_series_equal(res, df.iloc[[1, 3], 0]) - - res = df.iloc[[1, 3], lambda x: [0]] - tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) - - res = df.iloc[lambda x: [1, 3], 0] - tm.assert_series_equal(res, df.iloc[[1, 3], 0]) - - res = df.iloc[lambda x: [1, 3], [0]] - tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) - - def test_frame_iloc_callable_setitem(self): - # GH 11485 - df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return location - res = df.copy() - res.iloc[lambda x: [1, 3]] = 0 - exp = df.copy() - exp.iloc[[1, 3]] = 0 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.iloc[lambda x: [1, 3], :] = -1 - exp = df.copy() - exp.iloc[[1, 3], :] = -1 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.iloc[lambda x: [1, 3], lambda x: 0] = 5 - exp = df.copy() - exp.iloc[[1, 3], 0] = 5 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.iloc[lambda x: [1, 3], lambda x: [0]] = 25 - exp = df.copy() - exp.iloc[[1, 3], [0]] = 25 - tm.assert_frame_equal(res, exp) - - # mixture - res = df.copy() - res.iloc[[1, 3], lambda x: 0] = -3 - exp = df.copy() - exp.iloc[[1, 3], 0] = -3 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.iloc[[1, 3], lambda x: [0]] = -5 - exp = df.copy() - exp.iloc[[1, 3], [0]] = -5 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.iloc[lambda x: [1, 3], 0] = 10 - exp = df.copy() - exp.iloc[[1, 3], 0] = 10 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.iloc[lambda x: [1, 3], [0]] = [-5, -5] - exp = df.copy() - exp.iloc[[1, 3], [0]] = [-5, -5] - tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 867941a97b598..6c80354610a78 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -778,6 +778,92 @@ def test_iloc_setitem_series_duplicate_columns(self): assert df.dtypes.iloc[2] == np.int64 +class TestILocCallable: + def test_frame_iloc_getitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.iloc[lambda x: [1, 3]] + tm.assert_frame_equal(res, df.iloc[[1, 3]]) + + res = df.iloc[lambda x: [1, 3], :] + tm.assert_frame_equal(res, df.iloc[[1, 3], :]) + + res = df.iloc[lambda x: [1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + # mixture + res = df.iloc[[1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[[1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + res = df.iloc[lambda x: [1, 3], 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + def test_frame_iloc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.copy() + res.iloc[lambda x: [1, 3]] = 0 + exp = df.copy() + exp.iloc[[1, 3]] = 0 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], :] = -1 + exp = df.copy() + exp.iloc[[1, 3], :] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: 0] = 5 + exp = df.copy() + exp.iloc[[1, 3], 0] = 5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: [0]] = 25 + exp = df.copy() + exp.iloc[[1, 3], [0]] = 25 + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.iloc[[1, 3], lambda x: 0] = -3 + exp = df.copy() + exp.iloc[[1, 3], 0] = -3 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[[1, 3], lambda x: [0]] = -5 + exp = df.copy() + exp.iloc[[1, 3], [0]] = -5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], 0] = 10 + exp = df.copy() + exp.iloc[[1, 3], 0] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], [0]] = [-5, -5] + exp = df.copy() + exp.iloc[[1, 3], [0]] = [-5, -5] + tm.assert_frame_equal(res, exp) + + class TestILocSeries: def test_iloc(self): ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py deleted file mode 100644 index 2ffa44bec14a6..0000000000000 --- a/pandas/tests/indexing/test_indexing_slow.py +++ /dev/null @@ -1,14 +0,0 @@ -import pytest - -from pandas import DataFrame -import pandas._testing as tm - - -class TestIndexingSlow: - @pytest.mark.slow - def test_large_dataframe_indexing(self): - # GH10692 - result = DataFrame({"x": range(10 ** 6)}, dtype="int64") - result.loc[len(result)] = len(result) + 1 - expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b1c66c3c8850a..d1dcae5997b9d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1082,6 +1082,182 @@ def test_loc_setitem_multiindex_slice(self): tm.assert_series_equal(result, expected) +class TestLocSetitemWithExpansion: + @pytest.mark.slow + def test_loc_setitem_with_expansion_large_dataframe(self): + # GH#10692 + result = DataFrame({"x": range(10 ** 6)}, dtype="int64") + result.loc[len(result)] = len(result) + 1 + expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") + tm.assert_frame_equal(result, expected) + + +class TestLocCallable: + def test_frame_loc_getitem_callable(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + # iloc cannot use boolean Series (see GH3635) + + # return bool indexer + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + # scalar + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + def test_frame_loc_getitem_callable_mixture(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + def test_frame_loc_getitem_callable_labels(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"], :] + tm.assert_frame_equal(res, df.loc[["A", "C"], :]) + + res = df.loc[lambda x: ["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + # mixture + res = df.loc[["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + res = df.loc[lambda x: ["A", "C"], "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + def test_frame_loc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.copy() + res.loc[lambda x: ["A", "C"]] = -20 + exp = df.copy() + exp.loc[["A", "C"]] = -20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], :] = 20 + exp = df.copy() + exp.loc[["A", "C"], :] = 20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = [5, 10] + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) + exp = df.copy() + exp.loc[["A", "C"], "X"] = np.array([-1, -2]) + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[["A", "C"], lambda x: ["X"]] = 10 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], "X"] = -2 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -2 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], ["X"]] = -4 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = -4 + tm.assert_frame_equal(res, exp) + + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( From 86300a45eaee6bb6d3fe913986823c417e1b6ad6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 13:25:45 -0800 Subject: [PATCH 016/147] REF: prelims for single-path setitem_with_indexer (#37588) --- pandas/core/indexing.py | 8 +++++++- pandas/tests/indexing/test_indexing.py | 5 ----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c2dad928845a7..c5e331a104726 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1592,7 +1592,11 @@ def _setitem_with_indexer(self, indexer, value): return # add a new item with the dtype setup - self.obj[key] = infer_fill_value(value) + if com.is_null_slice(indexer[0]): + # We are setting an entire column + self.obj[key] = value + else: + self.obj[key] = infer_fill_value(value) new_indexer = convert_from_missing_indexer_tuple( indexer, self.obj.axes @@ -1641,6 +1645,8 @@ def _setitem_with_indexer_split_path(self, indexer, value): if not isinstance(indexer, tuple): indexer = _tuplify(self.ndim, indexer) + if len(indexer) > self.ndim: + raise IndexError("too many indices for array") if isinstance(value, ABCSeries): value = self._align_series(indexer, value) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index de70ff37a052a..614e424e8aca2 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -116,10 +116,6 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): idxr = idxr(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) - if (len(index) == 0) and (idxr_id == "iloc") and isinstance(obj, pd.DataFrame): - # gh-32896 - pytest.skip("This is currently failing. There's an xfailed test below.") - if idxr_id == "iloc": err = ValueError msg = f"Cannot set values with ndim > {obj.ndim}" @@ -140,7 +136,6 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): idxr[nd3] = 0 def test_setitem_ndarray_3d_does_not_fail_for_iloc_empty_dataframe(self): - # when fixing this, please remove the pytest.skip in test_setitem_ndarray_3d i = Index([]) obj = DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i) nd3 = np.random.randint(5, size=(2, 2, 2)) From 9a7278bfab7d2668f25fe052b31025b487ed695b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 13:26:54 -0800 Subject: [PATCH 017/147] ENH: __repr__ for 2D DTA/TDA (#37164) --- pandas/core/arrays/_mixins.py | 21 +++++++++++++++++ pandas/core/arrays/datetimes.py | 3 ++- pandas/core/arrays/timedeltas.py | 3 ++- pandas/tests/arrays/test_datetimelike.py | 30 ++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index aab5c5a110db8..a2371a39a0efa 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -252,3 +252,24 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs): else: msg = f"'{type(self).__name__}' does not implement reduction '{name}'" raise TypeError(msg) + + # ------------------------------------------------------------------------ + + def __repr__(self) -> str: + if self.ndim == 1: + return super().__repr__() + + from pandas.io.formats.printing import format_object_summary + + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + lines = [ + format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( + ", \n" + ) + for x in self + ] + data = ",\n".join(lines) + class_name = f"<{type(self).__name__}>" + return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a1050f4271e05..b05271552f117 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -566,7 +566,8 @@ def __iter__(self): tstamp : Timestamp """ if self.ndim > 1: - return (self[n] for n in range(len(self))) + for i in range(len(self)): + yield self[i] else: # convert in chunks of 10k for efficiency data = self.asi8 diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 0d9d257810674..e5b56ae80b578 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -352,7 +352,8 @@ def astype(self, dtype, copy: bool = True): def __iter__(self): if self.ndim > 1: - return (self[n] for n in range(len(self))) + for i in range(len(self)): + yield self[i] else: # convert in chunks of 10k for efficiency data = self.asi8 diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index f621479e4f311..b9298e9dec5b5 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -328,11 +328,41 @@ def test_iter_2d(self, arr1d): data2d = arr1d._data[:3, np.newaxis] arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) result = list(arr2d) + assert len(result) == 3 for x in result: assert isinstance(x, type(arr1d)) assert x.ndim == 1 assert x.dtype == arr1d.dtype + def test_repr_2d(self, arr1d): + data2d = arr1d._data[:3, np.newaxis] + arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) + + result = repr(arr2d) + + if isinstance(arr2d, TimedeltaArray): + expected = ( + f"<{type(arr2d).__name__}>\n" + "[\n" + f"['{arr1d[0]._repr_base()}'],\n" + f"['{arr1d[1]._repr_base()}'],\n" + f"['{arr1d[2]._repr_base()}']\n" + "]\n" + f"Shape: (3, 1), dtype: {arr1d.dtype}" + ) + else: + expected = ( + f"<{type(arr2d).__name__}>\n" + "[\n" + f"['{arr1d[0]}'],\n" + f"['{arr1d[1]}'],\n" + f"['{arr1d[2]}']\n" + "]\n" + f"Shape: (3, 1), dtype: {arr1d.dtype}" + ) + + assert result == expected + def test_setitem(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 arr = self.array_cls(data, freq="D") From 391ec87ead5c4a87f6c189240f37fb18c177712f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 14:22:19 -0800 Subject: [PATCH 018/147] CLN: de-duplicate _validate_where_value with _validate_setitem_value (#37595) --- pandas/core/arrays/categorical.py | 5 ----- pandas/core/arrays/datetimelike.py | 2 -- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/extension.py | 2 +- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 03f66ff82ad75..263512e427c69 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1177,11 +1177,6 @@ def map(self, mapper): # ------------------------------------------------------------- # Validators; ideally these can be de-duplicated - def _validate_where_value(self, value): - if is_scalar(value): - return self._validate_fill_value(value) - return self._validate_listlike(value) - def _validate_insert_value(self, value) -> int: return self._validate_fill_value(value) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f8a609fb0cabe..579719d8bac3b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -623,8 +623,6 @@ def _validate_setitem_value(self, value): return self._unbox(value, setitem=True) - _validate_where_value = _validate_setitem_value - def _validate_insert_value(self, value): value = self._validate_scalar(value) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9215fc8994d87..a92190a2bddf8 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -485,7 +485,7 @@ def where(self, cond, other=None): values = self._data._ndarray try: - other = self._data._validate_where_value(other) + other = self._data._validate_setitem_value(other) except (TypeError, ValueError) as err: # Includes tzawareness mismatch and IncompatibleFrequencyError oth = getattr(other, "dtype", other) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index d37ec12fd3eda..cd1871e4687f3 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -343,7 +343,7 @@ def insert(self, loc: int, item): def putmask(self, mask, value): try: - value = self._data._validate_where_value(value) + value = self._data._validate_setitem_value(value) except (TypeError, ValueError): return self.astype(object).putmask(mask, value) From 7bbe6c45abb1e6ab9dc0a76e1730bbf0e0e3eacf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 14:24:36 -0800 Subject: [PATCH 019/147] TST/REF: collect tests by method (#37589) * TST/REF: move remaining setitem tests from test_timeseries * TST/REF: rehome test_timezones test * move misplaced arithmetic test * collect tests by method * move misplaced file --- pandas/tests/frame/indexing/test_setitem.py | 47 +++++++++++++++ .../{ => methods}/test_add_prefix_suffix.py | 0 .../tests/frame/methods/test_reset_index.py | 9 +++ pandas/tests/frame/methods/test_transpose.py | 32 ++++++---- pandas/tests/frame/test_constructors.py | 29 +++++++++ pandas/tests/frame/test_nonunique_indexes.py | 23 ------- pandas/tests/frame/test_timeseries.py | 57 ------------------ pandas/tests/frame/test_timezones.py | 60 ------------------- pandas/tests/series/indexing/test_getitem.py | 15 +++++ .../tests/series/methods/test_is_monotonic.py | 25 ++++++++ pandas/tests/series/methods/test_view.py | 18 ++++++ pandas/tests/series/test_analytics.py | 23 ------- pandas/tests/series/test_arithmetic.py | 20 +++++++ pandas/tests/series/test_constructors.py | 10 ++++ pandas/tests/series/test_period.py | 22 ++----- pandas/tests/series/test_timeseries.py | 30 ---------- 16 files changed, 199 insertions(+), 221 deletions(-) rename pandas/tests/frame/{ => methods}/test_add_prefix_suffix.py (100%) delete mode 100644 pandas/tests/frame/test_timeseries.py delete mode 100644 pandas/tests/frame/test_timezones.py create mode 100644 pandas/tests/series/methods/test_is_monotonic.py create mode 100644 pandas/tests/series/methods/test_view.py delete mode 100644 pandas/tests/series/test_analytics.py diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 55465dffd2027..e1ce10970f07b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -185,6 +185,53 @@ def test_setitem_extension_types(self, obj, dtype): tm.assert_frame_equal(df, expected) + def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): + # GH#7492 + data_ns = np.array([1, "nat"], dtype="datetime64[ns]") + result = Series(data_ns).to_frame() + result["new"] = data_ns + expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + # OutOfBoundsDatetime error shouldn't occur + data_s = np.array([1, "nat"], dtype="datetime64[s]") + result["new"] = data_s + expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into a not-yet-existing column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + assert df[unit].dtype == np.dtype("M8[ns]") + assert (df[unit].values == ex_vals).all() + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_existing_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into an already-existing dt64 column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") + + # We overwrite existing dt64 column with new, non-nano dt64 vals + df["dates"] = vals + assert (df["dates"].values == ex_vals).all() + def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame diff --git a/pandas/tests/frame/test_add_prefix_suffix.py b/pandas/tests/frame/methods/test_add_prefix_suffix.py similarity index 100% rename from pandas/tests/frame/test_add_prefix_suffix.py rename to pandas/tests/frame/methods/test_add_prefix_suffix.py diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 92c9f7564a670..56fd633f5f22b 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -71,6 +71,15 @@ def test_reset_index_tz(self, tz_aware_fixture): expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) tm.assert_frame_equal(df.reset_index(), expected) + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_frame_reset_index_tzaware_index(self, tz): + dr = date_range("2012-06-02", periods=10, tz=tz) + df = DataFrame(np.random.randn(len(dr)), dr) + roundtripped = df.reset_index().set_index("index") + xp = df.index.tz + rs = roundtripped.index.tz + assert xp == rs + def test_reset_index_with_intervals(self): idx = IntervalIndex.from_breaks(np.arange(11), name="x") original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index a5fe5f3a6d5e4..8635168f1eb03 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -1,45 +1,55 @@ import numpy as np +import pytest -import pandas as pd +from pandas import DataFrame, date_range import pandas._testing as tm class TestTranspose: def test_transpose_tzaware_1col_single_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") - df = pd.DataFrame(dti) + df = DataFrame(dti) assert (df.dtypes == dti.dtype).all() res = df.T assert (res.dtypes == dti.dtype).all() def test_transpose_tzaware_2col_single_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") - df3 = pd.DataFrame({"A": dti, "B": dti}) + df3 = DataFrame({"A": dti, "B": dti}) assert (df3.dtypes == dti.dtype).all() res3 = df3.T assert (res3.dtypes == dti.dtype).all() def test_transpose_tzaware_2col_mixed_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") dti2 = dti.tz_convert("US/Pacific") - df4 = pd.DataFrame({"A": dti, "B": dti2}) + df4 = DataFrame({"A": dti, "B": dti2}) assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() assert (df4.T.dtypes == object).all() tm.assert_frame_equal(df4.T.T, df4) + @pytest.mark.parametrize("tz", [None, "America/New_York"]) + def test_transpose_preserves_dtindex_equality_with_dst(self, tz): + # GH#19970 + idx = date_range("20161101", "20161130", freq="4H", tz=tz) + df = DataFrame({"a": range(len(idx)), "b": range(len(idx))}, index=idx) + result = df.T == df.T + expected = DataFrame(True, index=list("ab"), columns=idx) + tm.assert_frame_equal(result, expected) + def test_transpose_object_to_tzaware_mixed_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") dti2 = dti.tz_convert("US/Pacific") # mixed all-tzaware dtypes - df2 = pd.DataFrame([dti, dti2]) + df2 = DataFrame([dti, dti2]) assert (df2.dtypes == object).all() res2 = df2.T assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() @@ -47,7 +57,7 @@ def test_transpose_object_to_tzaware_mixed_tz(self): def test_transpose_uint64(self, uint64_frame): result = uint64_frame.T - expected = pd.DataFrame(uint64_frame.values.T) + expected = DataFrame(uint64_frame.values.T) expected.index = ["A", "B"] tm.assert_frame_equal(result, expected) @@ -63,7 +73,7 @@ def test_transpose_float(self, float_frame): # mixed type index, data = tm.getMixedTypeDict() - mixed = pd.DataFrame(data, index=index) + mixed = DataFrame(data, index=index) mixed_T = mixed.T for col, s in mixed_T.items(): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 46e34a7a58ae4..408024e48a35a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2737,6 +2737,35 @@ def test_constructor_list_str_na(self, string_dtype): class TestDataFrameConstructorWithDatetimeTZ: + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_construction_preserves_tzaware_dtypes(self, tz): + # after GH#7822 + # these retain the timezones on dict construction + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tz) + df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) + tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) + assert df["B"].dtype == tz_expected + + # GH#2810 (with timezones) + datetimes_naive = [ts.to_pydatetime() for ts in dr] + datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] + df = DataFrame({"dr": dr}) + df["dr_tz"] = dr_tz + df["datetimes_naive"] = datetimes_naive + df["datetimes_with_tz"] = datetimes_with_tz + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + ], + index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], + ) + tm.assert_series_equal(result, expected) + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): # GH#25843 tz = tz_aware_fixture diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index c6b1c69442dbc..1c54855ee7bce 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -400,29 +400,6 @@ def check(result, expected=None): result = z.loc[["a", "c", "a"]] check(result, expected) - def test_column_dups_indexing2(self): - - # GH 8363 - # datetime ops with a non-unique index - df = DataFrame( - {"A": np.arange(5, dtype="int64"), "B": np.arange(1, 6, dtype="int64")}, - index=[2, 2, 3, 3, 4], - ) - result = df.B - df.A - expected = Series(1, index=[2, 2, 3, 3, 4]) - tm.assert_series_equal(result, expected) - - df = DataFrame( - { - "A": date_range("20130101", periods=5), - "B": date_range("20130101 09:00:00", periods=5), - }, - index=[2, 2, 3, 3, 4], - ) - result = df.B - df.A - expected = Series(pd.Timedelta("9 hours"), index=[2, 2, 3, 3, 4]) - tm.assert_series_equal(result, expected) - def test_columns_with_dups(self): # GH 3468 related diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py deleted file mode 100644 index 22ffb30324366..0000000000000 --- a/pandas/tests/frame/test_timeseries.py +++ /dev/null @@ -1,57 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import DataFrame, to_datetime -import pandas._testing as tm - - -class TestDataFrameTimeSeriesMethods: - @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) - def test_frame_append_datetime64_col_other_units(self, unit): - n = 100 - - ns_dtype = np.dtype("M8[ns]") - - dtype = np.dtype(f"M8[{unit}]") - vals = np.arange(n, dtype=np.int64).view(dtype) - - df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) - df[unit] = vals - - ex_vals = to_datetime(vals.astype("O")).values - - assert df[unit].dtype == ns_dtype - assert (df[unit].values == ex_vals).all() - - @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) - def test_frame_setitem_existing_datetime64_col_other_units(self, unit): - # Test insertion into existing datetime64 column - n = 100 - ns_dtype = np.dtype("M8[ns]") - - df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) - df["dates"] = np.arange(n, dtype=np.int64).view(ns_dtype) - - dtype = np.dtype(f"M8[{unit}]") - vals = np.arange(n, dtype=np.int64).view(dtype) - - tmp = df.copy() - - tmp["dates"] = vals - ex_vals = to_datetime(vals.astype("O")).values - - assert (tmp["dates"].values == ex_vals).all() - - def test_datetime_assignment_with_NaT_and_diff_time_units(self): - # GH 7492 - data_ns = np.array([1, "nat"], dtype="datetime64[ns]") - result = pd.Series(data_ns).to_frame() - result["new"] = data_ns - expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") - tm.assert_frame_equal(result, expected) - # OutOfBoundsDatetime error shouldn't occur - data_s = np.array([1, "nat"], dtype="datetime64[s]") - result["new"] = data_s - expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py deleted file mode 100644 index 1271a490d6b70..0000000000000 --- a/pandas/tests/frame/test_timezones.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Tests for DataFrame timezone-related methods -""" -import numpy as np -import pytest - -from pandas.core.dtypes.dtypes import DatetimeTZDtype - -from pandas import DataFrame, Series -import pandas._testing as tm -from pandas.core.indexes.datetimes import date_range - - -class TestDataFrameTimezones: - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) - def test_frame_no_datetime64_dtype(self, tz): - # after GH#7822 - # these retain the timezones on dict construction - dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") - dr_tz = dr.tz_localize(tz) - df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) - tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) - assert df["B"].dtype == tz_expected - - # GH#2810 (with timezones) - datetimes_naive = [ts.to_pydatetime() for ts in dr] - datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] - df = DataFrame({"dr": dr}) - df["dr_tz"] = dr_tz - df["datetimes_naive"] = datetimes_naive - df["datetimes_with_tz"] = datetimes_with_tz - result = df.dtypes - expected = Series( - [ - np.dtype("datetime64[ns]"), - DatetimeTZDtype(tz=tz), - np.dtype("datetime64[ns]"), - DatetimeTZDtype(tz=tz), - ], - index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], - ) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) - def test_frame_reset_index(self, tz): - dr = date_range("2012-06-02", periods=10, tz=tz) - df = DataFrame(np.random.randn(len(dr)), dr) - roundtripped = df.reset_index().set_index("index") - xp = df.index.tz - rs = roundtripped.index.tz - assert xp == rs - - @pytest.mark.parametrize("tz", [None, "America/New_York"]) - def test_boolean_compare_transpose_tzindex_with_dst(self, tz): - # GH 19970 - idx = date_range("20161101", "20161130", freq="4H", tz=tz) - df = DataFrame({"a": range(len(idx)), "b": range(len(idx))}, index=idx) - result = df.T == df.T - expected = DataFrame(True, index=list("ab"), columns=idx) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index f8517c3b91fc1..5e87f8f6c1059 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -271,6 +271,21 @@ def test_getitem_boolean_different_order(self, string_series): exp = string_series[string_series > 0] tm.assert_series_equal(sel, exp) + def test_getitem_boolean_contiguous_preserve_freq(self): + rng = date_range("1/1/2000", "3/1/2000", freq="B") + + mask = np.zeros(len(rng), dtype=bool) + mask[10:20] = True + + masked = rng[mask] + expected = rng[10:20] + assert expected.freq == rng.freq + tm.assert_index_equal(masked, expected) + + mask[22] = True + masked = rng[mask] + assert masked.freq is None + class TestGetitemCallable: def test_getitem_callable(self): diff --git a/pandas/tests/series/methods/test_is_monotonic.py b/pandas/tests/series/methods/test_is_monotonic.py new file mode 100644 index 0000000000000..b242b293cb59e --- /dev/null +++ b/pandas/tests/series/methods/test_is_monotonic.py @@ -0,0 +1,25 @@ +import numpy as np + +from pandas import Series, date_range + + +class TestIsMonotonic: + def test_is_monotonic_numeric(self): + + ser = Series(np.random.randint(0, 10, size=1000)) + assert not ser.is_monotonic + ser = Series(np.arange(1000)) + assert ser.is_monotonic is True + assert ser.is_monotonic_increasing is True + ser = Series(np.arange(1000, 0, -1)) + assert ser.is_monotonic_decreasing is True + + def test_is_monotonic_dt64(self): + + ser = Series(date_range("20130101", periods=10)) + assert ser.is_monotonic is True + assert ser.is_monotonic_increasing is True + + ser = Series(list(reversed(ser))) + assert ser.is_monotonic is False + assert ser.is_monotonic_decreasing is True diff --git a/pandas/tests/series/methods/test_view.py b/pandas/tests/series/methods/test_view.py new file mode 100644 index 0000000000000..ccf3aa0d90e6f --- /dev/null +++ b/pandas/tests/series/methods/test_view.py @@ -0,0 +1,18 @@ +from pandas import Series, date_range +import pandas._testing as tm + + +class TestView: + def test_view_tz(self): + # GH#24024 + ser = Series(date_range("2000", periods=4, tz="US/Central")) + result = ser.view("i8") + expected = Series( + [ + 946706400000000000, + 946792800000000000, + 946879200000000000, + 946965600000000000, + ] + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py deleted file mode 100644 index ebb75adde5b13..0000000000000 --- a/pandas/tests/series/test_analytics.py +++ /dev/null @@ -1,23 +0,0 @@ -import numpy as np - -import pandas as pd -from pandas import Series - - -class TestSeriesAnalytics: - def test_is_monotonic(self): - - s = Series(np.random.randint(0, 10, size=1000)) - assert not s.is_monotonic - s = Series(np.arange(1000)) - assert s.is_monotonic is True - assert s.is_monotonic_increasing is True - s = Series(np.arange(1000, 0, -1)) - assert s.is_monotonic_decreasing is True - - s = Series(pd.date_range("20130101", periods=10)) - assert s.is_monotonic is True - assert s.is_monotonic_increasing is True - s = Series(list(reversed(s.tolist()))) - assert s.is_monotonic is False - assert s.is_monotonic_decreasing is True diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 4bb4d3eeda112..9154c566a3dae 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -15,6 +15,7 @@ Index, IntervalIndex, Series, + Timedelta, bdate_range, date_range, isna, @@ -277,6 +278,25 @@ def test_alignment_doesnt_change_tz(self): assert ser.index is dti assert ser_utc.index is dti_utc + def test_arithmetic_with_duplicate_index(self): + + # GH#8363 + # integer ops with a non-unique index + index = [2, 2, 3, 3, 4] + ser = Series(np.arange(1, 6, dtype="int64"), index=index) + other = Series(np.arange(5, dtype="int64"), index=index) + result = ser - other + expected = Series(1, index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + + # GH#8363 + # datetime ops with a non-unique index + ser = Series(date_range("20130101 09:00:00", periods=5), index=index) + other = Series(date_range("20130101", periods=5), index=index) + result = ser - other + expected = Series(Timedelta("9 hours"), index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ # Comparisons diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5c4118bc40f4d..c8fbbcf9aed20 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1045,6 +1045,16 @@ def test_constructor_infer_period(self, data_constructor): tm.assert_series_equal(result, expected) assert result.dtype == "Period[D]" + @pytest.mark.xfail(reason="PeriodDtype Series not supported yet") + def test_construct_from_ints_including_iNaT_scalar_period_dtype(self): + series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]") + + val = series[3] + assert isna(val) + + series[2] = val + assert isna(series[2]) + def test_constructor_period_incompatible_frequency(self): data = [pd.Period("2000", "D"), pd.Period("2001", "A")] result = Series(data) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index d079111aa12d6..17dbfa9cf379a 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -1,36 +1,24 @@ import numpy as np -import pytest -import pandas as pd from pandas import DataFrame, Series, period_range class TestSeriesPeriod: - def setup_method(self, method): - self.series = Series(period_range("2000-01-01", periods=10, freq="D")) # --------------------------------------------------------------------- # NaT support - @pytest.mark.xfail(reason="PeriodDtype Series not supported yet") - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]") - - val = series[3] - assert pd.isna(val) - - series[2] = val - assert pd.isna(series[2]) - def test_intercept_astype_object(self): - expected = self.series.astype("object") + series = Series(period_range("2000-01-01", periods=10, freq="D")) + + expected = series.astype("object") - df = DataFrame({"a": self.series, "b": np.random.randn(len(self.series))}) + df = DataFrame({"a": series, "b": np.random.randn(len(series))}) result = df.values.squeeze() assert (result[:, 0] == expected.values).all() - df = DataFrame({"a": self.series, "b": ["foo"] * len(self.series)}) + df = DataFrame({"a": series, "b": ["foo"] * len(series)}) result = df.values.squeeze() assert (result[:, 0] == expected.values).all() diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 8b32be45e8d57..0769606d18d57 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,26 +1,10 @@ import numpy as np -import pandas as pd from pandas import DataFrame, Series, date_range, timedelta_range import pandas._testing as tm class TestTimeSeries: - def test_contiguous_boolean_preserve_freq(self): - rng = date_range("1/1/2000", "3/1/2000", freq="B") - - mask = np.zeros(len(rng), dtype=bool) - mask[10:20] = True - - masked = rng[mask] - expected = rng[10:20] - assert expected.freq == rng.freq - tm.assert_index_equal(masked, expected) - - mask[22] = True - masked = rng[mask] - assert masked.freq is None - def test_promote_datetime_date(self): rng = date_range("1/1/2000", periods=20) ts = Series(np.random.randn(20), index=rng) @@ -55,17 +39,3 @@ def f(x): s.map(f) s.apply(f) DataFrame(s).applymap(f) - - def test_view_tz(self): - # GH#24024 - ser = Series(pd.date_range("2000", periods=4, tz="US/Central")) - result = ser.view("i8") - expected = Series( - [ - 946706400000000000, - 946792800000000000, - 946879200000000000, - 946965600000000000, - ] - ) - tm.assert_series_equal(result, expected) From 4f4fd9e0bdfecf26aecb6e94d1e1b76440f440c5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 14:39:26 -0800 Subject: [PATCH 020/147] REF: Categorical.is_dtype_equal -> categories_match_up_to_permutation (#37545) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/arrays/categorical.py | 20 ++++++--- pandas/core/dtypes/concat.py | 2 +- pandas/core/indexes/category.py | 2 +- pandas/core/reshape/merge.py | 2 +- .../tests/arrays/categorical/test_dtypes.py | 45 ++++++++++++------- pandas/tests/reshape/merge/test_merge.py | 6 +-- 7 files changed, 50 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6f137302d4994..8a092cb6e36db 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -340,6 +340,7 @@ Deprecations - :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`) - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`) - :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`) +- :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 263512e427c69..b1f913e9ea641 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -78,7 +78,7 @@ def func(self, other): # the same (maybe up to ordering, depending on ordered) msg = "Categoricals can only be compared if 'categories' are the same." - if not self.is_dtype_equal(other): + if not self._categories_match_up_to_permutation(other): raise TypeError(msg) if not self.ordered and not self.categories.equals(other.categories): @@ -1869,11 +1869,12 @@ def _validate_setitem_value(self, value): # require identical categories set if isinstance(value, Categorical): - if not is_dtype_equal(self, value): + if not is_dtype_equal(self.dtype, value.dtype): raise ValueError( "Cannot set a Categorical with another, " "without identical categories" ) + # is_dtype_equal implies categories_match_up_to_permutation new_codes = self._validate_listlike(value) value = Categorical.from_codes(new_codes, dtype=self.dtype) @@ -2107,7 +2108,7 @@ def equals(self, other: object) -> bool: """ if not isinstance(other, Categorical): return False - elif self.is_dtype_equal(other): + elif self._categories_match_up_to_permutation(other): other_codes = self._validate_listlike(other) return np.array_equal(self._codes, other_codes) return False @@ -2120,7 +2121,7 @@ def _concat_same_type(self, to_concat): # ------------------------------------------------------------------ - def is_dtype_equal(self, other): + def _categories_match_up_to_permutation(self, other: "Categorical") -> bool: """ Returns True if categoricals are the same dtype same categories, and same ordered @@ -2133,8 +2134,17 @@ def is_dtype_equal(self, other): ------- bool """ + return hash(self.dtype) == hash(other.dtype) + + def is_dtype_equal(self, other) -> bool: + warn( + "Categorical.is_dtype_equal is deprecated and will be removed " + "in a future version", + FutureWarning, + stacklevel=2, + ) try: - return hash(self.dtype) == hash(other.dtype) + return self._categories_match_up_to_permutation(other) except (AttributeError, TypeError): return False diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 60fd959701821..99dc01ef421d1 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -296,7 +296,7 @@ def _maybe_unwrap(x): raise TypeError("dtype of categories must be the same") ordered = False - if all(first.is_dtype_equal(other) for other in to_union[1:]): + if all(first._categories_match_up_to_permutation(other) for other in to_union[1:]): # identical categories - fastpath categories = first.categories ordered = first.ordered diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2f2836519d847..8cbd0d83c78d7 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -255,7 +255,7 @@ def _is_dtype_compat(self, other) -> Categorical: """ if is_categorical_dtype(other): other = extract_array(other) - if not other.is_dtype_equal(self): + if not other._categories_match_up_to_permutation(self): raise TypeError( "categories must match existing categories when appending" ) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5012be593820e..d82b1474ff3e0 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1083,7 +1083,7 @@ def _maybe_coerce_merge_keys(self): # if either left or right is a categorical # then the must match exactly in categories & ordered if lk_is_cat and rk_is_cat: - if lk.is_dtype_equal(rk): + if lk._categories_match_up_to_permutation(rk): continue elif lk_is_cat or rk_is_cat: diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index 47ce9cb4089f9..deafa22a6e8eb 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -8,34 +8,45 @@ class TestCategoricalDtypes: - def test_is_equal_dtype(self): + def test_is_dtype_equal_deprecated(self): + # GH#37545 + c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) + + with tm.assert_produces_warning(FutureWarning): + c1.is_dtype_equal(c1) + + def test_categories_match_up_to_permutation(self): # test dtype comparisons between cats c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False) c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True) - assert c1.is_dtype_equal(c1) - assert c2.is_dtype_equal(c2) - assert c3.is_dtype_equal(c3) - assert c1.is_dtype_equal(c2) - assert not c1.is_dtype_equal(c3) - assert not c1.is_dtype_equal(Index(list("aabca"))) - assert not c1.is_dtype_equal(c1.astype(object)) - assert c1.is_dtype_equal(CategoricalIndex(c1)) - assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab"))) - assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True)) + assert c1._categories_match_up_to_permutation(c1) + assert c2._categories_match_up_to_permutation(c2) + assert c3._categories_match_up_to_permutation(c3) + assert c1._categories_match_up_to_permutation(c2) + assert not c1._categories_match_up_to_permutation(c3) + assert not c1._categories_match_up_to_permutation(Index(list("aabca"))) + assert not c1._categories_match_up_to_permutation(c1.astype(object)) + assert c1._categories_match_up_to_permutation(CategoricalIndex(c1)) + assert c1._categories_match_up_to_permutation( + CategoricalIndex(c1, categories=list("cab")) + ) + assert not c1._categories_match_up_to_permutation( + CategoricalIndex(c1, ordered=True) + ) # GH 16659 s1 = Series(c1) s2 = Series(c2) s3 = Series(c3) - assert c1.is_dtype_equal(s1) - assert c2.is_dtype_equal(s2) - assert c3.is_dtype_equal(s3) - assert c1.is_dtype_equal(s2) - assert not c1.is_dtype_equal(s3) - assert not c1.is_dtype_equal(s1.astype(object)) + assert c1._categories_match_up_to_permutation(s1) + assert c2._categories_match_up_to_permutation(s2) + assert c3._categories_match_up_to_permutation(s3) + assert c1._categories_match_up_to_permutation(s2) + assert not c1._categories_match_up_to_permutation(s3) + assert not c1._categories_match_up_to_permutation(s1.astype(object)) def test_set_dtype_same(self): c = Categorical(["a", "b", "c"]) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index bb2860b88b288..a58372040c7f3 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1707,8 +1707,8 @@ def test_other_columns(self, left, right): tm.assert_series_equal(result, expected) # categories are preserved - assert left.X.values.is_dtype_equal(merged.X.values) - assert right.Z.values.is_dtype_equal(merged.Z.values) + assert left.X.values._categories_match_up_to_permutation(merged.X.values) + assert right.Z.values._categories_match_up_to_permutation(merged.Z.values) @pytest.mark.parametrize( "change", @@ -1725,7 +1725,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right): X = change(right.X.astype("object")) right = right.assign(X=X) assert is_categorical_dtype(left.X.values.dtype) - # assert not left.X.values.is_dtype_equal(right.X.values) + # assert not left.X.values._categories_match_up_to_permutation(right.X.values) merged = pd.merge(left, right, on="X", how=join_type) From aad064eb4c3eee9f5163ccbb174162fd7c4b9cd5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 22:42:16 +0000 Subject: [PATCH 021/147] CLN refactor non-core (#37580) --- pandas/_config/config.py | 4 ++-- pandas/_config/localization.py | 3 +-- pandas/_testing.py | 18 ++++++++--------- pandas/_version.py | 36 ++++++++++++++-------------------- pandas/compat/__init__.py | 2 +- pandas/conftest.py | 3 +-- 6 files changed, 29 insertions(+), 37 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 0b802f2cc9e69..512b638fc4877 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -392,7 +392,7 @@ class option_context(ContextDecorator): """ def __init__(self, *args): - if not (len(args) % 2 == 0 and len(args) >= 2): + if len(args) % 2 != 0 or len(args) < 2: raise ValueError( "Need to invoke as option_context(pat, val, [(pat, val), ...])." ) @@ -648,7 +648,7 @@ def _build_option_description(k: str) -> str: s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" if d: - rkey = d.rkey if d.rkey else "" + rkey = d.rkey or "" s += "\n (Deprecated" s += f", use `{rkey}` instead." s += ")" diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 3933c8f3d519c..bc76aca93da2a 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -99,8 +99,7 @@ def _valid_locales(locales, normalize): def _default_locale_getter(): - raw_locales = subprocess.check_output(["locale -a"], shell=True) - return raw_locales + return subprocess.check_output(["locale -a"], shell=True) def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter): diff --git a/pandas/_testing.py b/pandas/_testing.py index 427585704ba58..ded2ed3141b47 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -736,8 +736,7 @@ def _get_ilevel_values(index, level): unique = index.levels[level] level_codes = index.codes[level] filled = take_1d(unique._values, level_codes, fill_value=unique._na_value) - values = unique._shallow_copy(filled, name=index.names[level]) - return values + return unique._shallow_copy(filled, name=index.names[level]) if check_less_precise is not no_default: warnings.warn( @@ -1885,8 +1884,7 @@ def makeTimedeltaIndex(k=10, freq="D", name=None, **kwargs): def makePeriodIndex(k=10, name=None, **kwargs): dt = datetime(2000, 1, 1) - dr = pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) - return dr + return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) def makeMultiIndex(k=10, names=None, **kwargs): @@ -2525,9 +2523,12 @@ def network( @wraps(t) def wrapper(*args, **kwargs): - if check_before_test and not raise_on_error: - if not can_connect(url, error_classes): - skip() + if ( + check_before_test + and not raise_on_error + and not can_connect(url, error_classes) + ): + skip() try: return t(*args, **kwargs) except Exception as err: @@ -2942,8 +2943,7 @@ def convert_rows_list_to_csv_str(rows_list: List[str]): Expected output of to_csv() in current OS. """ sep = os.linesep - expected = sep.join(rows_list) + sep - return expected + return sep.join(rows_list) + sep def external_error_raised(expected_exception: Type[Exception]) -> ContextManager: diff --git a/pandas/_version.py b/pandas/_version.py index b3fa8530d09eb..d2df063ff3acf 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -22,8 +22,7 @@ def get_keywords(): # get_keywords(). git_refnames = "$Format:%d$" git_full = "$Format:%H$" - keywords = {"refnames": git_refnames, "full": git_full} - return keywords + return {"refnames": git_refnames, "full": git_full} class VersioneerConfig: @@ -121,17 +120,16 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - f = open(versionfile_abs) - for line in f.readlines(): - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - f.close() + with open(versionfile_abs) as fd: + for line in fd.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) except OSError: pass return keywords @@ -286,13 +284,11 @@ def render_pep440(pieces): if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += f"{pieces['distance']:d}.g{pieces['short']}" - if pieces["dirty"]: - rendered += ".dirty" else: # exception #1 rendered = f"0+untagged.{pieces['distance']:d}.g{pieces['short']}" - if pieces["dirty"]: - rendered += ".dirty" + if pieces["dirty"]: + rendered += ".dirty" return rendered @@ -348,13 +344,11 @@ def render_pep440_old(pieces): rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += f".post{pieces['distance']:d}" - if pieces["dirty"]: - rendered += ".dev0" else: # exception #1 rendered = f"0.post{pieces['distance']:d}" - if pieces["dirty"]: - rendered += ".dev0" + if pieces["dirty"]: + rendered += ".dev0" return rendered diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 57e378758cc78..2ac9b9e2c875c 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -50,7 +50,7 @@ def is_platform_windows() -> bool: bool True if the running platform is windows. """ - return sys.platform == "win32" or sys.platform == "cygwin" + return sys.platform in ["win32", "cygwin"] def is_platform_linux() -> bool: diff --git a/pandas/conftest.py b/pandas/conftest.py index 515d20e8c5781..207be8a86bb8b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -386,13 +386,12 @@ def _create_multiindex(): major_codes = np.array([0, 0, 1, 2, 3, 3]) minor_codes = np.array([0, 1, 0, 1, 0, 1]) index_names = ["first", "second"] - mi = MultiIndex( + return MultiIndex( levels=[major_axis, minor_axis], codes=[major_codes, minor_codes], names=index_names, verify_integrity=False, ) - return mi def _create_mi_with_dt64tz_level(): From 3c307b0f380f6a337e9c4e227dc9e1046cbdce5c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 23:13:17 +0000 Subject: [PATCH 022/147] refactor core/computation (#37585) --- pandas/core/computation/align.py | 12 +++++++----- pandas/core/computation/eval.py | 11 +++++------ pandas/core/computation/expr.py | 17 ++++++++--------- pandas/core/computation/pytables.py | 16 ++++++++-------- pandas/core/computation/scope.py | 3 +-- 5 files changed, 29 insertions(+), 30 deletions(-) diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 82867cf9dcd29..8a8b0d564ea49 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -38,8 +38,7 @@ def _align_core_single_unary_op( def _zip_axes_from_type( typ: Type[FrameOrSeries], new_axes: Sequence[int] ) -> Dict[str, int]: - axes = {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} - return axes + return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)} def _any_pandas_objects(terms) -> bool: @@ -186,8 +185,11 @@ def reconstruct_object(typ, obj, axes, dtype): # The condition is to distinguish 0-dim array (returned in case of # scalar) and 1 element array # e.g. np.array(0) and np.array([0]) - if len(obj.shape) == 1 and len(obj) == 1: - if not isinstance(ret_value, np.ndarray): - ret_value = np.array([ret_value]).astype(res_t) + if ( + len(obj.shape) == 1 + and len(obj) == 1 + and not isinstance(ret_value, np.ndarray) + ): + ret_value = np.array([ret_value]).astype(res_t) return ret_value diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index b77204861f0a4..12f16343362e2 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -52,12 +52,11 @@ def _check_engine(engine: Optional[str]) -> str: # TODO: validate this in a more general way (thinking of future engines # that won't necessarily be import-able) # Could potentially be done on engine instantiation - if engine == "numexpr": - if not NUMEXPR_INSTALLED: - raise ImportError( - "'numexpr' is not installed or an unsupported version. Cannot use " - "engine='numexpr' for query/eval if 'numexpr' is not installed" - ) + if engine == "numexpr" and not NUMEXPR_INSTALLED: + raise ImportError( + "'numexpr' is not installed or an unsupported version. Cannot use " + "engine='numexpr' for query/eval if 'numexpr' is not installed" + ) return engine diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 8c56f02c8d3cc..c971551a7f400 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -496,15 +496,14 @@ def _maybe_evaluate_binop( f"'{lhs.type}' and '{rhs.type}'" ) - if self.engine != "pytables": - if ( - res.op in CMP_OPS_SYMS - and getattr(lhs, "is_datetime", False) - or getattr(rhs, "is_datetime", False) - ): - # all date ops must be done in python bc numexpr doesn't work - # well with NaT - return self._maybe_eval(res, self.binary_ops) + if self.engine != "pytables" and ( + res.op in CMP_OPS_SYMS + and getattr(lhs, "is_datetime", False) + or getattr(rhs, "is_datetime", False) + ): + # all date ops must be done in python bc numexpr doesn't work + # well with NaT + return self._maybe_eval(res, self.binary_ops) if res.op in eval_in_python: # "in"/"not in" ops are always evaluated in python diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index dd622ed724e8f..6ec637a8b4845 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -378,14 +378,14 @@ def prune(self, klass): operand = self.operand operand = operand.prune(klass) - if operand is not None: - if issubclass(klass, ConditionBinOp): - if operand.condition is not None: - return operand.invert() - elif issubclass(klass, FilterBinOp): - if operand.filter is not None: - return operand.invert() - + if operand is not None and ( + issubclass(klass, ConditionBinOp) + and operand.condition is not None + or not issubclass(klass, ConditionBinOp) + and issubclass(klass, FilterBinOp) + and operand.filter is not None + ): + return operand.invert() return None diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 2925f583bfc56..7a9b8caa985e3 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -144,8 +144,7 @@ def __init__( def __repr__(self) -> str: scope_keys = _get_pretty_string(list(self.scope.keys())) res_keys = _get_pretty_string(list(self.resolvers.keys())) - unicode_str = f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})" - return unicode_str + return f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})" @property def has_resolvers(self) -> bool: From bcce6a6ca17b3896a3fa41eb3d1a9620e6109bf2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 18:43:39 -0800 Subject: [PATCH 023/147] TST/REF: share method tests between DataFrame and Series (#37596) --- pandas/conftest.py | 10 ++ pandas/core/frame.py | 14 +- pandas/tests/frame/methods/test_at_time.py | 45 ++++-- .../tests/frame/methods/test_between_time.py | 94 +++++++++++- pandas/tests/frame/methods/test_to_period.py | 77 ++++++++-- .../tests/frame/methods/test_to_timestamp.py | 89 ++++++++--- .../tests/indexes/datetimes/test_indexing.py | 9 ++ pandas/tests/indexing/test_loc.py | 22 +++ pandas/tests/series/indexing/test_getitem.py | 12 +- pandas/tests/series/methods/test_at_time.py | 77 ---------- .../tests/series/methods/test_between_time.py | 144 ------------------ pandas/tests/series/methods/test_to_period.py | 56 ------- .../tests/series/methods/test_to_timestamp.py | 64 -------- 13 files changed, 318 insertions(+), 395 deletions(-) delete mode 100644 pandas/tests/series/methods/test_at_time.py delete mode 100644 pandas/tests/series/methods/test_between_time.py delete mode 100644 pandas/tests/series/methods/test_to_period.py delete mode 100644 pandas/tests/series/methods/test_to_timestamp.py diff --git a/pandas/conftest.py b/pandas/conftest.py index 207be8a86bb8b..b2daa2c5bc3f7 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -290,6 +290,16 @@ def unique_nulls_fixture(request): # ---------------------------------------------------------------- # Classes # ---------------------------------------------------------------- + + +@pytest.fixture(params=[pd.DataFrame, pd.Series]) +def frame_or_series(request): + """ + Fixture to parametrize over DataFrame and Series. + """ + return request.param + + @pytest.fixture( params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item] ) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5134529d9c21f..24b89085ac121 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -131,7 +131,13 @@ from pandas.core.construction import extract_array from pandas.core.generic import NDFrame, _shared_docs from pandas.core.indexes import base as ibase -from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences +from pandas.core.indexes.api import ( + DatetimeIndex, + Index, + PeriodIndex, + ensure_index, + ensure_index_from_sequences, +) from pandas.core.indexes.multi import MultiIndex, maybe_droplevels from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable from pandas.core.internals import BlockManager @@ -9253,6 +9259,9 @@ def to_timestamp( axis_name = self._get_axis_name(axis) old_ax = getattr(self, axis_name) + if not isinstance(old_ax, PeriodIndex): + raise TypeError(f"unsupported Type {type(old_ax).__name__}") + new_ax = old_ax.to_timestamp(freq=freq, how=how) setattr(new_obj, axis_name, new_ax) @@ -9282,6 +9291,9 @@ def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> DataFrame: axis_name = self._get_axis_name(axis) old_ax = getattr(self, axis_name) + if not isinstance(old_ax, DatetimeIndex): + raise TypeError(f"unsupported Type {type(old_ax).__name__}") + new_ax = old_ax.to_period(freq=freq) setattr(new_obj, axis_name, new_ax) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index ac98d632c5dcd..7ac3868e8ddf4 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -4,14 +4,32 @@ import pytest import pytz +from pandas._libs.tslibs import timezones + from pandas import DataFrame, date_range import pandas._testing as tm class TestAtTime: - def test_at_time(self): + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_at_time(self, tzstr, frame_or_series): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = frame_or_series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_at_time(self, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + if frame_or_series is not DataFrame: + ts = ts[0] rs = ts.at_time(rng[1]) assert (rs.index.hour == rng[1].hour).all() assert (rs.index.minute == rng[1].minute).all() @@ -19,23 +37,24 @@ def test_at_time(self): result = ts.at_time("9:30") expected = ts.at_time(time(9, 30)) - tm.assert_frame_equal(result, expected) - - result = ts.loc[time(9, 30)] - expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] - - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) + def test_at_time_midnight(self, frame_or_series): # midnight, everything rng = date_range("1/1/2000", "1/31/2000") ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + if frame_or_series is not DataFrame: + ts = ts[0] result = ts.at_time(time(0, 0)) - tm.assert_frame_equal(result, ts) + tm.assert_equal(result, ts) + def test_at_time_nonexistent(self, frame_or_series): # time doesn't exist rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = DataFrame(np.random.randn(len(rng), 2), rng) + ts = DataFrame(np.random.randn(len(rng)), rng) + if frame_or_series is not DataFrame: + ts = ts[0] rs = ts.at_time("16:00") assert len(rs) == 0 @@ -62,12 +81,14 @@ def test_at_time_tz(self): expected = df.iloc[1:2] tm.assert_frame_equal(result, expected) - def test_at_time_raises(self): + def test_at_time_raises(self, frame_or_series): # GH#20725 - df = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + if frame_or_series is not DataFrame: + obj = obj[0] msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - df.at_time("00:00") + obj.at_time("00:00") @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) def test_at_time_axis(self, axis): diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index 19e802d0fa663..73722f36a0b86 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -1,16 +1,73 @@ -from datetime import time +from datetime import datetime, time import numpy as np import pytest -from pandas import DataFrame, date_range +from pandas._libs.tslibs import timezones +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series, date_range import pandas._testing as tm class TestBetweenTime: - def test_between_time(self, close_open_fixture): + @td.skip_if_has_locale + def test_between_time_formats(self, frame_or_series): + # GH#11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + if frame_or_series is Series: + ts = ts[0] + + strings = [ + ("2:00", "2:30"), + ("0200", "0230"), + ("2:00am", "2:30am"), + ("0200am", "0230am"), + ("2:00:00", "2:30:00"), + ("020000", "023000"), + ("2:00:00am", "2:30:00am"), + ("020000am", "023000am"), + ] + expected_length = 28 + + for time_string in strings: + assert len(ts.between_time(*time_string)) == expected_length + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_between_time(self, tzstr, frame_or_series): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + if frame_or_series is DataFrame: + ts = ts.to_frame() + + ts_local = ts.tz_localize(tzstr) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + def test_between_time_types(self, frame_or_series): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + obj = DataFrame({"A": 0}, index=rng) + if frame_or_series is Series: + obj = obj["A"] + + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + obj.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + def test_between_time(self, close_open_fixture, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + if frame_or_series is not DataFrame: + ts = ts[0] + stime = time(0, 0) etime = time(1, 0) inc_start, inc_end = close_open_fixture @@ -37,11 +94,13 @@ def test_between_time(self, close_open_fixture): result = ts.between_time("00:00", "01:00") expected = ts.between_time(stime, etime) - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + if frame_or_series is not DataFrame: + ts = ts[0] stime = time(22, 0) etime = time(9, 0) @@ -65,14 +124,33 @@ def test_between_time(self, close_open_fixture): else: assert (t < etime) or (t >= stime) - def test_between_time_raises(self): + def test_between_time_raises(self, frame_or_series): # GH#20725 - df = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + if frame_or_series is not DataFrame: + obj = obj[0] + msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - df.between_time(start_time="00:00", end_time="12:00") + obj.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_axis(self, frame_or_series): + # GH#8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = Series(np.random.randn(len(rng)), index=rng) + if frame_or_series is DataFrame: + ts = ts.to_frame() + + stime, etime = ("08:00:00", "09:00:00") + expected_length = 7 + + assert len(ts.between_time(stime, etime)) == expected_length + assert len(ts.between_time(stime, etime, axis=0)) == expected_length + msg = f"No axis named {ts.ndim} for object type {type(ts).__name__}" + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, axis=ts.ndim) - def test_between_time_axis(self, axis): + def test_between_time_axis_aliases(self, axis): # GH#8839 rng = date_range("1/1/2000", periods=100, freq="10min") ts = DataFrame(np.random.randn(len(rng), len(rng))) diff --git a/pandas/tests/frame/methods/test_to_period.py b/pandas/tests/frame/methods/test_to_period.py index 051461b6c554d..e3f3fe9f697a9 100644 --- a/pandas/tests/frame/methods/test_to_period.py +++ b/pandas/tests/frame/methods/test_to_period.py @@ -1,36 +1,87 @@ import numpy as np import pytest -from pandas import DataFrame, date_range, period_range +from pandas import ( + DataFrame, + DatetimeIndex, + PeriodIndex, + Series, + date_range, + period_range, +) import pandas._testing as tm class TestToPeriod: - def test_frame_to_period(self): + def test_to_period(self, frame_or_series): K = 5 - dr = date_range("1/1/2000", "1/1/2001") - pr = period_range("1/1/2000", "1/1/2001") - df = DataFrame(np.random.randn(len(dr), K), index=dr) - df["mix"] = "a" + dr = date_range("1/1/2000", "1/1/2001", freq="D") + obj = DataFrame( + np.random.randn(len(dr), K), index=dr, columns=["A", "B", "C", "D", "E"] + ) + obj["mix"] = "a" + if frame_or_series is Series: + obj = obj["A"] - pts = df.to_period() - exp = df.copy() - exp.index = pr - tm.assert_frame_equal(pts, exp) + pts = obj.to_period() + exp = obj.copy() + exp.index = period_range("1/1/2000", "1/1/2001") + tm.assert_equal(pts, exp) - pts = df.to_period("M") - tm.assert_index_equal(pts.index, exp.index.asfreq("M")) + pts = obj.to_period("M") + exp.index = exp.index.asfreq("M") + tm.assert_equal(pts, exp) + + def test_to_period_without_freq(self, frame_or_series): + # GH#7606 without freq + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) + exp_idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" + ) + + obj = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + if frame_or_series is Series: + obj = obj[idx[0]] + expected = obj.copy() + expected.index = exp_idx + tm.assert_equal(obj.to_period(), expected) + + if frame_or_series is DataFrame: + expected = obj.copy() + expected.columns = exp_idx + tm.assert_frame_equal(obj.to_period(axis=1), expected) + + def test_to_period_columns(self): + dr = date_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df["mix"] = "a" df = df.T pts = df.to_period(axis=1) exp = df.copy() - exp.columns = pr + exp.columns = period_range("1/1/2000", "1/1/2001") tm.assert_frame_equal(pts, exp) pts = df.to_period("M", axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) + def test_to_period_invalid_axis(self): + dr = date_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), 5), index=dr) + df["mix"] = "a" + msg = "No axis named 2 for object type DataFrame" with pytest.raises(ValueError, match=msg): df.to_period(axis=2) + + def test_to_period_raises(self, index, frame_or_series): + # https://github.com/pandas-dev/pandas/issues/33327 + obj = Series(index=index, dtype=object) + if frame_or_series is DataFrame: + obj = obj.to_frame() + + if not isinstance(index, DatetimeIndex): + msg = f"unsupported Type {type(index).__name__}" + with pytest.raises(TypeError, match=msg): + obj.to_period() diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index ae7d2827e05a6..e23d12b691b4a 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -6,6 +6,8 @@ from pandas import ( DataFrame, DatetimeIndex, + PeriodIndex, + Series, Timedelta, date_range, period_range, @@ -14,48 +16,70 @@ import pandas._testing as tm +def _get_with_delta(delta, freq="A-DEC"): + return date_range( + to_datetime("1/1/2001") + delta, + to_datetime("12/31/2009") + delta, + freq=freq, + ) + + class TestToTimestamp: - def test_frame_to_time_stamp(self): + def test_to_timestamp(self, frame_or_series): K = 5 index = period_range(freq="A", start="1/1/2001", end="12/1/2009") - df = DataFrame(np.random.randn(len(index), K), index=index) - df["mix"] = "a" + obj = DataFrame( + np.random.randn(len(index), K), + index=index, + columns=["A", "B", "C", "D", "E"], + ) + obj["mix"] = "a" + if frame_or_series is Series: + obj = obj["A"] exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") - result = df.to_timestamp("D", "end") + result = obj.to_timestamp("D", "end") tm.assert_index_equal(result.index, exp_index) - tm.assert_numpy_array_equal(result.values, df.values) + tm.assert_numpy_array_equal(result.values, obj.values) + if frame_or_series is Series: + assert result.name == "A" exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") - result = df.to_timestamp("D", "start") + result = obj.to_timestamp("D", "start") tm.assert_index_equal(result.index, exp_index) - def _get_with_delta(delta, freq="A-DEC"): - return date_range( - to_datetime("1/1/2001") + delta, - to_datetime("12/31/2009") + delta, - freq=freq, - ) + result = obj.to_timestamp(how="start") + tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23) - result = df.to_timestamp("H", "end") + result = obj.to_timestamp("H", "end") exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23, minutes=59) - result = df.to_timestamp("T", "end") + result = obj.to_timestamp("T", "end") exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") tm.assert_index_equal(result.index, exp_index) - result = df.to_timestamp("S", "end") + result = obj.to_timestamp("S", "end") delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") tm.assert_index_equal(result.index, exp_index) + def test_to_timestamp_columns(self): + K = 5 + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + df = DataFrame( + np.random.randn(len(index), K), + index=index, + columns=["A", "B", "C", "D", "E"], + ) + df["mix"] = "a" + # columns df = df.T @@ -87,10 +111,6 @@ def _get_with_delta(delta, freq="A-DEC"): exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") tm.assert_index_equal(result.columns, exp_index) - # invalid axis - with pytest.raises(ValueError, match="axis"): - df.to_timestamp(axis=2) - result1 = df.to_timestamp("5t", axis=1) result2 = df.to_timestamp("t", axis=1) expected = date_range("2001-01-01", "2009-01-01", freq="AS") @@ -101,3 +121,34 @@ def _get_with_delta(delta, freq="A-DEC"): # PeriodIndex.to_timestamp always use 'infer' assert result1.columns.freqstr == "AS-JAN" assert result2.columns.freqstr == "AS-JAN" + + def to_timestamp_invalid_axis(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + obj = DataFrame(np.random.randn(len(index), 5), index=index) + + # invalid axis + with pytest.raises(ValueError, match="axis"): + obj.to_timestamp(axis=2) + + def test_to_timestamp_hourly(self, frame_or_series): + + index = period_range(freq="H", start="1/1/2001", end="1/2/2001") + obj = Series(1, index=index, name="foo") + if frame_or_series is not Series: + obj = obj.to_frame() + + exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="H") + result = obj.to_timestamp(how="end") + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + if frame_or_series is Series: + assert result.name == "foo" + + def test_to_timestamp_raises(self, index, frame_or_series): + # GH#33327 + obj = frame_or_series(index=index, dtype=object) + + if not isinstance(index, PeriodIndex): + msg = f"unsupported Type {type(index).__name__}" + with pytest.raises(TypeError, match=msg): + obj.to_timestamp() diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 4e46eb126894b..330092b08c1b2 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -719,3 +719,12 @@ def test_slice_datetime_locs(self, box, kind, tz_aware_fixture): result = index.slice_locs(key, box(2010, 1, 2)) expected = (0, 1) assert result == expected + + +class TestIndexerBetweenTime: + def test_indexer_between_time(self): + # GH#11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d1dcae5997b9d..c1a5db992d3df 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,4 +1,5 @@ """ test label based indexing with loc """ +from datetime import time from io import StringIO import re @@ -992,6 +993,27 @@ def test_loc_setitem_str_to_small_float_conversion_type(self): expected = DataFrame(col_data, columns=["A"], dtype=float) tm.assert_frame_equal(result, expected) + def test_loc_getitem_time_object(self, frame_or_series): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + mask = (rng.hour == 9) & (rng.minute == 30) + + obj = DataFrame(np.random.randn(len(rng), 3), index=rng) + if frame_or_series is Series: + obj = obj[0] + + result = obj.loc[time(9, 30)] + exp = obj.loc[mask] + tm.assert_equal(result, exp) + + chunk = obj.loc["1/4/2000":] + result = chunk.loc[time(9, 30)] + expected = result[-1:] + + # Without resetting the freqs, these are 5 min and 1440 min, respectively + result.index = result.index._with_freq(None) + expected.index = expected.index._with_freq(None) + tm.assert_equal(result, expected) + class TestLocWithMultiIndex: @pytest.mark.parametrize( diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 5e87f8f6c1059..2933983a5b18b 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -1,7 +1,7 @@ """ Series.__getitem__ test classes are organized by the type of key passed. """ -from datetime import datetime +from datetime import datetime, time import numpy as np import pytest @@ -83,6 +83,16 @@ def test_string_index_alias_tz_aware(self, tz): result = ser["1/3/2000"] tm.assert_almost_equal(result, ser[2]) + def test_getitem_time_object(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + + mask = (rng.hour == 9) & (rng.minute == 30) + result = ts[time(9, 30)] + expected = ts[mask] + result.index = result.index._with_freq(None) + tm.assert_series_equal(result, expected) + class TestSeriesGetitemSlices: def test_getitem_slice_2d(self, datetime_series): diff --git a/pandas/tests/series/methods/test_at_time.py b/pandas/tests/series/methods/test_at_time.py deleted file mode 100644 index 810e4c1446708..0000000000000 --- a/pandas/tests/series/methods/test_at_time.py +++ /dev/null @@ -1,77 +0,0 @@ -from datetime import time - -import numpy as np -import pytest - -from pandas._libs.tslibs import timezones - -from pandas import DataFrame, Series, date_range -import pandas._testing as tm - - -class TestAtTime: - @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) - def test_localized_at_time(self, tzstr): - tz = timezones.maybe_get_tz(tzstr) - - rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_local = ts.tz_localize(tzstr) - - result = ts_local.at_time(time(10, 0)) - expected = ts.at_time(time(10, 0)).tz_localize(tzstr) - tm.assert_series_equal(result, expected) - assert timezones.tz_compare(result.index.tz, tz) - - def test_at_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - rs = ts.at_time(rng[1]) - assert (rs.index.hour == rng[1].hour).all() - assert (rs.index.minute == rng[1].minute).all() - assert (rs.index.second == rng[1].second).all() - - result = ts.at_time("9:30") - expected = ts.at_time(time(9, 30)) - tm.assert_series_equal(result, expected) - - df = DataFrame(np.random.randn(len(rng), 3), index=rng) - - result = ts[time(9, 30)] - result_df = df.loc[time(9, 30)] - expected = ts[(rng.hour == 9) & (rng.minute == 30)] - exp_df = df[(rng.hour == 9) & (rng.minute == 30)] - - result.index = result.index._with_freq(None) - tm.assert_series_equal(result, expected) - tm.assert_frame_equal(result_df, exp_df) - - chunk = df.loc["1/4/2000":] - result = chunk.loc[time(9, 30)] - expected = result_df[-1:] - - # Without resetting the freqs, these are 5 min and 1440 min, respectively - result.index = result.index._with_freq(None) - expected.index = expected.index._with_freq(None) - tm.assert_frame_equal(result, expected) - - # midnight, everything - rng = date_range("1/1/2000", "1/31/2000") - ts = Series(np.random.randn(len(rng)), index=rng) - - result = ts.at_time(time(0, 0)) - tm.assert_series_equal(result, ts) - - # time doesn't exist - rng = date_range("1/1/2012", freq="23Min", periods=384) - ts = Series(np.random.randn(len(rng)), rng) - rs = ts.at_time("16:00") - assert len(rs) == 0 - - def test_at_time_raises(self): - # GH20725 - ser = Series("a b c".split()) - msg = "Index must be DatetimeIndex" - with pytest.raises(TypeError, match=msg): - ser.at_time("00:00") diff --git a/pandas/tests/series/methods/test_between_time.py b/pandas/tests/series/methods/test_between_time.py deleted file mode 100644 index e9d2f8e6f1637..0000000000000 --- a/pandas/tests/series/methods/test_between_time.py +++ /dev/null @@ -1,144 +0,0 @@ -from datetime import datetime, time -from itertools import product - -import numpy as np -import pytest - -from pandas._libs.tslibs import timezones -import pandas.util._test_decorators as td - -from pandas import DataFrame, Series, date_range -import pandas._testing as tm - - -class TestBetweenTime: - @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) - def test_localized_between_time(self, tzstr): - tz = timezones.maybe_get_tz(tzstr) - - rng = date_range("4/16/2012", "5/1/2012", freq="H") - ts = Series(np.random.randn(len(rng)), index=rng) - - ts_local = ts.tz_localize(tzstr) - - t1, t2 = time(10, 0), time(11, 0) - result = ts_local.between_time(t1, t2) - expected = ts.between_time(t1, t2).tz_localize(tzstr) - tm.assert_series_equal(result, expected) - assert timezones.tz_compare(result.index.tz, tz) - - def test_between_time(self): - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(0, 0) - etime = time(1, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = 13 * 4 + 1 - if not inc_start: - exp_len -= 5 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert t >= stime - else: - assert t > stime - - if inc_end: - assert t <= etime - else: - assert t < etime - - result = ts.between_time("00:00", "01:00") - expected = ts.between_time(stime, etime) - tm.assert_series_equal(result, expected) - - # across midnight - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime = time(22, 0) - etime = time(9, 0) - - close_open = product([True, False], [True, False]) - for inc_start, inc_end in close_open: - filtered = ts.between_time(stime, etime, inc_start, inc_end) - exp_len = (12 * 11 + 1) * 4 + 1 - if not inc_start: - exp_len -= 4 - if not inc_end: - exp_len -= 4 - - assert len(filtered) == exp_len - for rs in filtered.index: - t = rs.time() - if inc_start: - assert (t >= stime) or (t <= etime) - else: - assert (t > stime) or (t <= etime) - - if inc_end: - assert (t <= etime) or (t >= stime) - else: - assert (t < etime) or (t >= stime) - - def test_between_time_raises(self): - # GH20725 - ser = Series("a b c".split()) - msg = "Index must be DatetimeIndex" - with pytest.raises(TypeError, match=msg): - ser.between_time(start_time="00:00", end_time="12:00") - - def test_between_time_types(self): - # GH11818 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" - with pytest.raises(ValueError, match=msg): - rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - frame = DataFrame({"A": 0}, index=rng) - with pytest.raises(ValueError, match=msg): - frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - series = Series(0, index=rng) - with pytest.raises(ValueError, match=msg): - series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) - - @td.skip_if_has_locale - def test_between_time_formats(self): - # GH11818 - rng = date_range("1/1/2000", "1/5/2000", freq="5min") - ts = DataFrame(np.random.randn(len(rng), 2), index=rng) - - strings = [ - ("2:00", "2:30"), - ("0200", "0230"), - ("2:00am", "2:30am"), - ("0200am", "0230am"), - ("2:00:00", "2:30:00"), - ("020000", "023000"), - ("2:00:00am", "2:30:00am"), - ("020000am", "023000am"), - ] - expected_length = 28 - - for time_string in strings: - assert len(ts.between_time(*time_string)) == expected_length - - def test_between_time_axis(self): - # issue 8839 - rng = date_range("1/1/2000", periods=100, freq="10min") - ts = Series(np.random.randn(len(rng)), index=rng) - stime, etime = ("08:00:00", "09:00:00") - expected_length = 7 - - assert len(ts.between_time(stime, etime)) == expected_length - assert len(ts.between_time(stime, etime, axis=0)) == expected_length - msg = "No axis named 1 for object type Series" - with pytest.raises(ValueError, match=msg): - ts.between_time(stime, etime, axis=1) diff --git a/pandas/tests/series/methods/test_to_period.py b/pandas/tests/series/methods/test_to_period.py deleted file mode 100644 index b40fc81931e20..0000000000000 --- a/pandas/tests/series/methods/test_to_period.py +++ /dev/null @@ -1,56 +0,0 @@ -import numpy as np -import pytest - -from pandas import ( - DataFrame, - DatetimeIndex, - PeriodIndex, - Series, - date_range, - period_range, -) -import pandas._testing as tm - - -class TestToPeriod: - def test_to_period(self): - rng = date_range("1/1/2000", "1/1/2001", freq="D") - ts = Series(np.random.randn(len(rng)), index=rng) - - pts = ts.to_period() - exp = ts.copy() - exp.index = period_range("1/1/2000", "1/1/2001") - tm.assert_series_equal(pts, exp) - - pts = ts.to_period("M") - exp.index = exp.index.asfreq("M") - tm.assert_index_equal(pts.index, exp.index.asfreq("M")) - tm.assert_series_equal(pts, exp) - - # GH#7606 without freq - idx = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) - exp_idx = PeriodIndex( - ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" - ) - - s = Series(np.random.randn(4), index=idx) - expected = s.copy() - expected.index = exp_idx - tm.assert_series_equal(s.to_period(), expected) - - df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) - expected = df.copy() - expected.index = exp_idx - tm.assert_frame_equal(df.to_period(), expected) - - expected = df.copy() - expected.columns = exp_idx - tm.assert_frame_equal(df.to_period(axis=1), expected) - - def test_to_period_raises(self, index): - # https://github.com/pandas-dev/pandas/issues/33327 - ser = Series(index=index, dtype=object) - if not isinstance(index, DatetimeIndex): - msg = f"unsupported Type {type(index).__name__}" - with pytest.raises(TypeError, match=msg): - ser.to_period() diff --git a/pandas/tests/series/methods/test_to_timestamp.py b/pandas/tests/series/methods/test_to_timestamp.py deleted file mode 100644 index 13a2042a2f639..0000000000000 --- a/pandas/tests/series/methods/test_to_timestamp.py +++ /dev/null @@ -1,64 +0,0 @@ -from datetime import timedelta - -import pytest - -from pandas import PeriodIndex, Series, Timedelta, date_range, period_range, to_datetime -import pandas._testing as tm - - -class TestToTimestamp: - def test_to_timestamp(self): - index = period_range(freq="A", start="1/1/2001", end="12/1/2009") - series = Series(1, index=index, name="foo") - - exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") - result = series.to_timestamp(how="end") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - assert result.name == "foo" - - exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") - result = series.to_timestamp(how="start") - tm.assert_index_equal(result.index, exp_index) - - def _get_with_delta(delta, freq="A-DEC"): - return date_range( - to_datetime("1/1/2001") + delta, - to_datetime("12/31/2009") + delta, - freq=freq, - ) - - delta = timedelta(hours=23) - result = series.to_timestamp("H", "end") - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - delta = timedelta(hours=23, minutes=59) - result = series.to_timestamp("T", "end") - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - result = series.to_timestamp("S", "end") - delta = timedelta(hours=23, minutes=59, seconds=59) - exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - - index = period_range(freq="H", start="1/1/2001", end="1/2/2001") - series = Series(1, index=index, name="foo") - - exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="H") - result = series.to_timestamp(how="end") - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") - tm.assert_index_equal(result.index, exp_index) - assert result.name == "foo" - - def test_to_timestamp_raises(self, index): - # https://github.com/pandas-dev/pandas/issues/33327 - ser = Series(index=index, dtype=object) - if not isinstance(index, PeriodIndex): - msg = f"unsupported Type {type(index).__name__}" - with pytest.raises(TypeError, match=msg): - ser.to_timestamp() From 0cd2f36f156d2c66e9c8a572a5fb2edb5724ed5f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 18:47:20 -0800 Subject: [PATCH 024/147] BUG: Index.where casting ints to str (#37591) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/base.py | 18 +++++------------- pandas/core/indexes/datetimelike.py | 11 ++--------- pandas/core/indexes/numeric.py | 2 ++ pandas/tests/indexes/base_class/test_where.py | 13 +++++++++++++ pandas/tests/indexes/datetimelike.py | 3 +-- .../tests/indexes/datetimes/test_indexing.py | 16 +++++++++------- pandas/tests/indexes/period/test_indexing.py | 11 ++++++----- .../tests/indexes/timedeltas/test_indexing.py | 11 ++++++----- pandas/tests/indexing/test_coercion.py | 7 ++++--- 10 files changed, 49 insertions(+), 44 deletions(-) create mode 100644 pandas/tests/indexes/base_class/test_where.py diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8a092cb6e36db..45a95f6aeb2f6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -456,6 +456,7 @@ Indexing - Bug in :meth:`Series.loc.__getitem__` with a non-unique :class:`MultiIndex` and an empty-list indexer (:issue:`13691`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`) - Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`) +- Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5ee5e867567b3..b220756a24f9f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -40,7 +40,6 @@ ensure_int64, ensure_object, ensure_platform_int, - is_bool, is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, @@ -4079,23 +4078,16 @@ def where(self, cond, other=None): if other is None: other = self._na_value - dtype = self.dtype values = self.values - if is_bool(other) or is_bool_dtype(other): - - # bools force casting - values = values.astype(object) - dtype = None + try: + self._validate_fill_value(other) + except (ValueError, TypeError): + return self.astype(object).where(cond, other) values = np.where(cond, values, other) - if self._is_numeric_dtype and np.any(isna(values)): - # We can't coerce to the numeric dtype of "self" (unless - # it's float) if there are NaN values in our output. - dtype = None - - return Index(values, dtype=dtype, name=self.name) + return Index(values, name=self.name) # construction helpers @final diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a92190a2bddf8..9e2ac6013cb43 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -482,16 +482,9 @@ def isin(self, values, level=None): @Appender(Index.where.__doc__) def where(self, cond, other=None): - values = self._data._ndarray + other = self._data._validate_setitem_value(other) - try: - other = self._data._validate_setitem_value(other) - except (TypeError, ValueError) as err: - # Includes tzawareness mismatch and IncompatibleFrequencyError - oth = getattr(other, "dtype", other) - raise TypeError(f"Where requires matching dtype, not {oth}") from err - - result = np.where(cond, values, other) + result = np.where(cond, self._data._ndarray, other) arr = self._data._from_backing_data(result) return type(self)._simple_new(arr, name=self.name) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index d6f571360b457..9eb8a8b719d41 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -121,6 +121,8 @@ def _validate_fill_value(self, value): # force conversion to object # so we don't lose the bools raise TypeError + if isinstance(value, str): + raise TypeError return value diff --git a/pandas/tests/indexes/base_class/test_where.py b/pandas/tests/indexes/base_class/test_where.py new file mode 100644 index 0000000000000..0c8969735e14e --- /dev/null +++ b/pandas/tests/indexes/base_class/test_where.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas import Index +import pandas._testing as tm + + +class TestWhere: + def test_where_intlike_str_doesnt_cast_ints(self): + idx = Index(range(3)) + mask = np.array([True, False, True]) + res = idx.where(mask, "2") + expected = Index([0, "2", 2]) + tm.assert_index_equal(res, expected) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index be8ca61f1a730..6f078237e3a97 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -143,10 +143,9 @@ def test_where_cast_str(self): result = index.where(mask, [str(index[0])]) tm.assert_index_equal(result, expected) - msg = "Where requires matching dtype, not foo" + msg = "value should be a '.*', 'NaT', or array of those" with pytest.raises(TypeError, match=msg): index.where(mask, "foo") - msg = r"Where requires matching dtype, not \['foo'\]" with pytest.raises(TypeError, match=msg): index.where(mask, ["foo"]) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 330092b08c1b2..d4ebb557fd6cd 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -177,24 +177,26 @@ def test_where_invalid_dtypes(self): i2 = Index([pd.NaT, pd.NaT] + dti[2:].tolist()) - with pytest.raises(TypeError, match="Where requires matching dtype"): + msg = "value should be a 'Timestamp', 'NaT', or array of those. Got" + msg2 = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg2): # passing tz-naive ndarray to tzaware DTI dti.where(notna(i2), i2.values) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg2): # passing tz-aware DTI to tznaive DTI dti.tz_localize(None).where(notna(i2), i2) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): dti.where(notna(i2), i2.tz_localize(None).to_period("D")) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): dti.where(notna(i2), i2.asi8.view("timedelta64[ns]")) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): dti.where(notna(i2), i2.asi8) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): # non-matching scalar dti.where(notna(i2), pd.Timedelta(days=4)) @@ -203,7 +205,7 @@ def test_where_mismatched_nat(self, tz_aware_fixture): dti = pd.date_range("2013-01-01", periods=3, tz=tz) cond = np.array([True, False, True]) - msg = "Where requires matching dtype" + msg = "value should be a 'Timestamp', 'NaT', or array of those. Got" with pytest.raises(TypeError, match=msg): # wrong-dtyped NaT dti.where(cond, np.timedelta64("NaT", "ns")) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index b6d3c36f1682c..19dfa9137cc5c 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -545,16 +545,17 @@ def test_where_invalid_dtypes(self): i2 = PeriodIndex([NaT, NaT] + pi[2:].tolist(), freq="D") - with pytest.raises(TypeError, match="Where requires matching dtype"): + msg = "value should be a 'Period', 'NaT', or array of those" + with pytest.raises(TypeError, match=msg): pi.where(notna(i2), i2.asi8) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): pi.where(notna(i2), i2.asi8.view("timedelta64[ns]")) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): pi.where(notna(i2), i2.to_timestamp("S")) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): # non-matching scalar pi.where(notna(i2), Timedelta(days=4)) @@ -562,7 +563,7 @@ def test_where_mismatched_nat(self): pi = period_range("20130101", periods=5, freq="D") cond = np.array([True, False, True, True, False]) - msg = "Where requires matching dtype" + msg = "value should be a 'Period', 'NaT', or array of those" with pytest.raises(TypeError, match=msg): # wrong-dtyped NaT pi.where(cond, np.timedelta64("NaT", "ns")) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 396a676b97a1b..37aa9653550fb 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -150,16 +150,17 @@ def test_where_invalid_dtypes(self): i2 = Index([pd.NaT, pd.NaT] + tdi[2:].tolist()) - with pytest.raises(TypeError, match="Where requires matching dtype"): + msg = "value should be a 'Timedelta', 'NaT', or array of those" + with pytest.raises(TypeError, match=msg): tdi.where(notna(i2), i2.asi8) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): tdi.where(notna(i2), i2 + pd.Timestamp.now()) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): tdi.where(notna(i2), (i2 + pd.Timestamp.now()).to_period("D")) - with pytest.raises(TypeError, match="Where requires matching dtype"): + with pytest.raises(TypeError, match=msg): # non-matching scalar tdi.where(notna(i2), pd.Timestamp.now()) @@ -167,7 +168,7 @@ def test_where_mismatched_nat(self): tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") cond = np.array([True, False, False]) - msg = "Where requires matching dtype" + msg = "value should be a 'Timedelta', 'NaT', or array of those" with pytest.raises(TypeError, match=msg): # wrong-dtyped NaT tdi.where(cond, np.datetime64("NaT", "ns")) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 436b2aa838b08..fd6f6fbc6a4ba 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -780,7 +780,7 @@ def test_where_index_timedelta64(self, value): result = tdi.where(cond, value) tm.assert_index_equal(result, expected) - msg = "Where requires matching dtype" + msg = "value should be a 'Timedelta', 'NaT', or array of thos" with pytest.raises(TypeError, match=msg): # wrong-dtyped NaT tdi.where(cond, np.datetime64("NaT", "ns")) @@ -804,11 +804,12 @@ def test_where_index_period(self): tm.assert_index_equal(result, expected) # Passing a mismatched scalar - msg = "Where requires matching dtype" + msg = "value should be a 'Period', 'NaT', or array of those" with pytest.raises(TypeError, match=msg): pi.where(cond, pd.Timedelta(days=4)) - with pytest.raises(TypeError, match=msg): + msg = r"Input has different freq=D from PeriodArray\(freq=Q-DEC\)" + with pytest.raises(ValueError, match=msg): pi.where(cond, pd.Period("2020-04-21", "D")) From d021726f874b0db9fbb7a1a33e2ef4a54cd41631 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Nov 2020 01:57:28 -0800 Subject: [PATCH 025/147] REF: IntervalArray comparisons (#37124) --- pandas/core/arrays/interval.py | 79 +++++++++++++++---- pandas/core/indexes/interval.py | 13 --- pandas/tests/extension/base/methods.py | 2 +- .../tests/indexes/interval/test_interval.py | 8 +- 4 files changed, 68 insertions(+), 34 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 161cf3bf3a677..f8ece2a9fe7d4 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1,3 +1,4 @@ +import operator from operator import le, lt import textwrap from typing import TYPE_CHECKING, Optional, Tuple, Union, cast @@ -12,6 +13,7 @@ IntervalMixin, intervals_to_interval_bounds, ) +from pandas._libs.missing import NA from pandas._typing import ArrayLike, Dtype from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender @@ -48,7 +50,7 @@ from pandas.core.construction import array, extract_array from pandas.core.indexers import check_array_indexer from pandas.core.indexes.base import ensure_index -from pandas.core.ops import unpack_zerodim_and_defer +from pandas.core.ops import invalid_comparison, unpack_zerodim_and_defer if TYPE_CHECKING: from pandas import Index @@ -520,8 +522,7 @@ def __setitem__(self, key, value): self._left[key] = value_left self._right[key] = value_right - @unpack_zerodim_and_defer("__eq__") - def __eq__(self, other): + def _cmp_method(self, other, op): # ensure pandas array for list-like and eliminate non-interval scalars if is_list_like(other): if len(self) != len(other): @@ -529,7 +530,7 @@ def __eq__(self, other): other = array(other) elif not isinstance(other, Interval): # non-interval scalar -> no matches - return np.zeros(len(self), dtype=bool) + return invalid_comparison(self, other, op) # determine the dtype of the elements we want to compare if isinstance(other, Interval): @@ -543,7 +544,8 @@ def __eq__(self, other): # extract intervals if we have interval categories with matching closed if is_interval_dtype(other_dtype): if self.closed != other.categories.closed: - return np.zeros(len(self), dtype=bool) + return invalid_comparison(self, other, op) + other = other.categories.take( other.codes, allow_fill=True, fill_value=other.categories._na_value ) @@ -551,27 +553,70 @@ def __eq__(self, other): # interval-like -> need same closed and matching endpoints if is_interval_dtype(other_dtype): if self.closed != other.closed: - return np.zeros(len(self), dtype=bool) - return (self._left == other.left) & (self._right == other.right) + return invalid_comparison(self, other, op) + elif not isinstance(other, Interval): + other = type(self)(other) + + if op is operator.eq: + return (self._left == other.left) & (self._right == other.right) + elif op is operator.ne: + return (self._left != other.left) | (self._right != other.right) + elif op is operator.gt: + return (self._left > other.left) | ( + (self._left == other.left) & (self._right > other.right) + ) + elif op is operator.ge: + return (self == other) | (self > other) + elif op is operator.lt: + return (self._left < other.left) | ( + (self._left == other.left) & (self._right < other.right) + ) + else: + # operator.lt + return (self == other) | (self < other) # non-interval/non-object dtype -> no matches if not is_object_dtype(other_dtype): - return np.zeros(len(self), dtype=bool) + return invalid_comparison(self, other, op) # object dtype -> iteratively check for intervals result = np.zeros(len(self), dtype=bool) for i, obj in enumerate(other): - # need object to be an Interval with same closed and endpoints - if ( - isinstance(obj, Interval) - and self.closed == obj.closed - and self._left[i] == obj.left - and self._right[i] == obj.right - ): - result[i] = True - + try: + result[i] = op(self[i], obj) + except TypeError: + if obj is NA: + # comparison with np.nan returns NA + # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092 + result[i] = op is operator.ne + else: + raise return result + @unpack_zerodim_and_defer("__eq__") + def __eq__(self, other): + return self._cmp_method(other, operator.eq) + + @unpack_zerodim_and_defer("__ne__") + def __ne__(self, other): + return self._cmp_method(other, operator.ne) + + @unpack_zerodim_and_defer("__gt__") + def __gt__(self, other): + return self._cmp_method(other, operator.gt) + + @unpack_zerodim_and_defer("__ge__") + def __ge__(self, other): + return self._cmp_method(other, operator.ge) + + @unpack_zerodim_and_defer("__lt__") + def __lt__(self, other): + return self._cmp_method(other, operator.lt) + + @unpack_zerodim_and_defer("__le__") + def __le__(self, other): + return self._cmp_method(other, operator.le) + def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1bd71f00b534d..2061e652a4c01 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1074,19 +1074,6 @@ def _is_all_dates(self) -> bool: # TODO: arithmetic operations - # GH#30817 until IntervalArray implements inequalities, get them from Index - def __lt__(self, other): - return Index.__lt__(self, other) - - def __le__(self, other): - return Index.__le__(self, other) - - def __gt__(self, other): - return Index.__gt__(self, other) - - def __ge__(self, other): - return Index.__ge__(self, other) - def _is_valid_endpoint(endpoint) -> bool: """ diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e973b1247941f..29a59cdefbd83 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -447,7 +447,7 @@ def test_repeat(self, data, repeats, as_series, use_numpy): @pytest.mark.parametrize( "repeats, kwargs, error, msg", [ - (2, dict(axis=1), ValueError, "'axis"), + (2, dict(axis=1), ValueError, "axis"), (-1, dict(), ValueError, "negative"), ([1, 2], dict(), ValueError, "shape"), (2, dict(foo="bar"), TypeError, "'foo'"), diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 67e031b53e44e..157446b1fff5d 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -579,9 +579,11 @@ def test_comparison(self): actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) - msg = ( - "not supported between instances of 'int' and " - "'pandas._libs.interval.Interval'" + msg = "|".join( + [ + "not supported between instances of 'int' and '.*.Interval'", + r"Invalid comparison between dtype=interval\[int64\] and ", + ] ) with pytest.raises(TypeError, match=msg): self.index > 0 From 1461ba7e9c8c83e89f959e5a2c0c9ff9359d9129 Mon Sep 17 00:00:00 2001 From: Philip Cerles Date: Tue, 3 Nov 2020 05:30:03 -0800 Subject: [PATCH 026/147] regression fix for merging DF with datetime index with empty DF (#36897) --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/reshape/merge.py | 11 ++++-- pandas/tests/reshape/merge/test_multi.py | 49 +++++++++++++++++++++++- 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 45a95f6aeb2f6..7111d54d65815 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -469,7 +469,6 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) -- I/O ^^^ @@ -536,6 +535,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns both multiindexed (:issue:`36360`) - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) +- Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index d82b1474ff3e0..1219fefd7ea92 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -830,12 +830,15 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): rvals = algos.take_1d(take_right, right_indexer, fill_value=rfill) # if we have an all missing left_indexer - # make sure to just use the right values - mask = left_indexer == -1 - if mask.all(): + # make sure to just use the right values or vice-versa + mask_left = left_indexer == -1 + mask_right = right_indexer == -1 + if mask_left.all(): key_col = rvals + elif mask_right.all(): + key_col = lvals else: - key_col = Index(lvals).where(~mask, rvals) + key_col = Index(lvals).where(~mask_left, rvals) if result._is_label_reference(name): result[name] = key_col diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index b1922241c7843..260a0e9d486b2 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp import pandas._testing as tm from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge @@ -481,6 +481,53 @@ def test_merge_datetime_index(self, klass): result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("merge_type", ["left", "right"]) + def test_merge_datetime_multi_index_empty_df(self, merge_type): + # see gh-36895 + + left = DataFrame( + data={ + "data": [1.5, 1.5], + }, + index=MultiIndex.from_tuples( + [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ), + ) + + right = DataFrame( + index=MultiIndex.from_tuples([], names=["date", "panel"]), columns=["state"] + ) + + expected_index = MultiIndex.from_tuples( + [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ) + + if merge_type == "left": + expected = DataFrame( + data={ + "data": [1.5, 1.5], + "state": [None, None], + }, + index=expected_index, + ) + results_merge = left.merge(right, how="left", on=["date", "panel"]) + results_join = left.join(right, how="left") + else: + expected = DataFrame( + data={ + "state": [None, None], + "data": [1.5, 1.5], + }, + index=expected_index, + ) + results_merge = right.merge(left, how="right", on=["date", "panel"]) + results_join = right.join(left, how="right") + + tm.assert_frame_equal(results_merge, expected) + tm.assert_frame_equal(results_join, expected) + def test_join_multi_levels(self): # GH 3662 From 34defeb1973da70c5fe6c2f0e89fc7f04bdf740e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 3 Nov 2020 20:18:25 +0100 Subject: [PATCH 027/147] ERR: fix error message in Period for invalid frequency (#37602) --- pandas/_libs/tslibs/period.pyx | 2 +- pandas/tests/scalar/period/test_period.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index b1f9ff71f5faa..b817d80c64ccd 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2438,7 +2438,7 @@ cpdef int freq_to_dtype_code(BaseOffset freq) except? -1: try: return freq._period_dtype_code except AttributeError as err: - raise ValueError(INVALID_FREQ_ERR_MSG) from err + raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index f150e5e5b18b2..46bc6421c2070 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1554,3 +1554,9 @@ def test_negone_ordinals(): repr(period) period = Period(ordinal=-1, freq="W") repr(period) + + +def test_invalid_frequency_error_message(): + msg = "Invalid frequency: " + with pytest.raises(ValueError, match=msg): + Period("2012-01-02", freq="WOM-1MON") From f632fe892586f6f2687784bb91aeaaed50f0685b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Nov 2020 15:09:05 -0800 Subject: [PATCH 028/147] CLN: remove rebox_native (#37608) --- pandas/core/arrays/datetimelike.py | 5 +++-- pandas/core/arrays/datetimes.py | 9 +++------ pandas/core/arrays/period.py | 8 ++------ pandas/core/arrays/timedeltas.py | 8 ++------ pandas/tests/arrays/test_datetimelike.py | 5 +++-- 5 files changed, 13 insertions(+), 22 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 579719d8bac3b..1955a96160a4a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -151,7 +151,9 @@ def _rebox_native(cls, value: int) -> Union[int, np.datetime64, np.timedelta64]: """ raise AbstractMethodError(cls) - def _unbox_scalar(self, value: DTScalarOrNaT, setitem: bool = False) -> int: + def _unbox_scalar( + self, value: DTScalarOrNaT, setitem: bool = False + ) -> Union[np.int64, np.datetime64, np.timedelta64]: """ Unbox the integer value of a scalar `value`. @@ -636,7 +638,6 @@ def _unbox( """ if lib.is_scalar(other): other = self._unbox_scalar(other, setitem=setitem) - other = self._rebox_native(other) else: # same type as self self._check_compatible_with(other, setitem=setitem) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b05271552f117..f655d10881011 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -454,16 +454,13 @@ def _generate_range( # ----------------------------------------------------------------- # DatetimeLike Interface - @classmethod - def _rebox_native(cls, value: int) -> np.datetime64: - return np.int64(value).view("M8[ns]") - - def _unbox_scalar(self, value, setitem: bool = False): + def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") if not isna(value): self._check_compatible_with(value, setitem=setitem) - return value.value + return value.asm8 + return np.datetime64(value.value, "ns") def _scalar_from_string(self, value): return Timestamp(value, tz=self.tz) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b95a7acc19b1f..d808ade53ad33 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -260,18 +260,14 @@ def _generate_range(cls, start, end, periods, freq, fields): # ----------------------------------------------------------------- # DatetimeLike Interface - @classmethod - def _rebox_native(cls, value: int) -> np.int64: - return np.int64(value) - def _unbox_scalar( self, value: Union[Period, NaTType], setitem: bool = False ) -> int: if value is NaT: - return value.value + return np.int64(value.value) elif isinstance(value, self._scalar_type): self._check_compatible_with(value, setitem=setitem) - return value.ordinal + return np.int64(value.ordinal) else: raise ValueError(f"'value' should be a Period. Got '{value}' instead.") diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index e5b56ae80b578..e4a844fd4c6ef 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -301,15 +301,11 @@ def _generate_range(cls, start, end, periods, freq, closed=None): # ---------------------------------------------------------------- # DatetimeLike Interface - @classmethod - def _rebox_native(cls, value: int) -> np.timedelta64: - return np.int64(value).view("m8[ns]") - - def _unbox_scalar(self, value, setitem: bool = False): + def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timedelta.") self._check_compatible_with(value, setitem=setitem) - return value.value + return np.timedelta64(value.value, "ns") def _scalar_from_string(self, value): return Timedelta(value) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index b9298e9dec5b5..ec20c829f1544 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -191,10 +191,11 @@ def test_unbox_scalar(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 arr = self.array_cls(data, freq="D") result = arr._unbox_scalar(arr[0]) - assert isinstance(result, int) + expected = arr._data.dtype.type + assert isinstance(result, expected) result = arr._unbox_scalar(pd.NaT) - assert isinstance(result, int) + assert isinstance(result, expected) msg = f"'value' should be a {self.dtype.__name__}." with pytest.raises(ValueError, match=msg): From 58ad9b0a537e0214ccce95c488a4fd00a0430f6f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Nov 2020 17:50:29 -0800 Subject: [PATCH 029/147] TST/REF: tests.generic (#37618) --- pandas/tests/frame/methods/test_equals.py | 57 ++++++++- pandas/tests/frame/methods/test_head_tail.py | 24 ++++ .../generic/methods/test_first_valid_index.py | 5 +- pandas/tests/generic/methods/test_pipe.py | 15 +-- .../generic/methods/test_reorder_levels.py | 11 +- pandas/tests/generic/methods/test_sample.py | 10 +- pandas/tests/generic/test_generic.py | 111 +++--------------- 7 files changed, 112 insertions(+), 121 deletions(-) diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py index c024390297fec..de2509ed91be2 100644 --- a/pandas/tests/frame/methods/test_equals.py +++ b/pandas/tests/frame/methods/test_equals.py @@ -1,4 +1,6 @@ -from pandas import DataFrame +import numpy as np + +from pandas import DataFrame, date_range import pandas._testing as tm @@ -21,3 +23,56 @@ def test_equals_different_blocks(self): tm.assert_frame_equal(df0, df1) assert df0.equals(df1) assert df1.equals(df0) + + def test_equals(self): + # Add object dtype column with nans + index = np.random.random(10) + df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) + df1["text"] = "the sky is so blue. we could use more chocolate.".split() + df1["start"] = date_range("2000-1-1", periods=10, freq="T") + df1["end"] = date_range("2000-1-1", periods=10, freq="D") + df1["diff"] = df1["end"] - df1["start"] + df1["bool"] = np.arange(10) % 3 == 0 + df1.loc[::2] = np.nan + df2 = df1.copy() + assert df1["text"].equals(df2["text"]) + assert df1["start"].equals(df2["start"]) + assert df1["end"].equals(df2["end"]) + assert df1["diff"].equals(df2["diff"]) + assert df1["bool"].equals(df2["bool"]) + assert df1.equals(df2) + assert not df1.equals(object) + + # different dtype + different = df1.copy() + different["floats"] = different["floats"].astype("float32") + assert not df1.equals(different) + + # different index + different_index = -index + different = df2.set_index(different_index) + assert not df1.equals(different) + + # different columns + different = df2.copy() + different.columns = df2.columns[::-1] + assert not df1.equals(different) + + # DatetimeIndex + index = date_range("2000-1-1", periods=10, freq="T") + df1 = df1.set_index(index) + df2 = df1.copy() + assert df1.equals(df2) + + # MultiIndex + df3 = df1.set_index(["text"], append=True) + df2 = df1.set_index(["text"], append=True) + assert df3.equals(df2) + + df2 = df1.set_index(["floats"], append=True) + assert not df3.equals(df2) + + # NaN in index + df3 = df1.set_index(["floats"], append=True) + df2 = df1.set_index(["floats"], append=True) + assert df3.equals(df2) diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py index 93763bc12ce0d..fa28f7d3e16a2 100644 --- a/pandas/tests/frame/methods/test_head_tail.py +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -4,6 +4,30 @@ import pandas._testing as tm +def test_head_tail_generic(index, frame_or_series): + # GH#5370 + + ndim = 2 if frame_or_series is DataFrame else 1 + shape = (len(index),) * ndim + vals = np.random.randn(*shape) + obj = frame_or_series(vals, index=index) + + tm.assert_equal(obj.head(), obj.iloc[:5]) + tm.assert_equal(obj.tail(), obj.iloc[-5:]) + + # 0-len + tm.assert_equal(obj.head(0), obj.iloc[0:0]) + tm.assert_equal(obj.tail(0), obj.iloc[0:0]) + + # bounded + tm.assert_equal(obj.head(len(obj) + 1), obj) + tm.assert_equal(obj.tail(len(obj) + 1), obj) + + # neg index + tm.assert_equal(obj.head(-3), obj.head(len(index) - 3)) + tm.assert_equal(obj.tail(-3), obj.tail(len(index) - 3)) + + def test_head_tail(float_frame): tm.assert_frame_equal(float_frame.head(), float_frame[:5]) tm.assert_frame_equal(float_frame.tail(), float_frame[-5:]) diff --git a/pandas/tests/generic/methods/test_first_valid_index.py b/pandas/tests/generic/methods/test_first_valid_index.py index bca3452c3c458..8d021f0e3954e 100644 --- a/pandas/tests/generic/methods/test_first_valid_index.py +++ b/pandas/tests/generic/methods/test_first_valid_index.py @@ -9,10 +9,9 @@ class TestFirstValidIndex: - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_first_valid_index_single_nan(self, klass): + def test_first_valid_index_single_nan(self, frame_or_series): # GH#9752 Series/DataFrame should both return None, not raise - obj = klass([np.nan]) + obj = frame_or_series([np.nan]) assert obj.first_valid_index() is None assert obj.iloc[:0].first_valid_index() is None diff --git a/pandas/tests/generic/methods/test_pipe.py b/pandas/tests/generic/methods/test_pipe.py index 59e5edc4b8bb5..b378600634bf0 100644 --- a/pandas/tests/generic/methods/test_pipe.py +++ b/pandas/tests/generic/methods/test_pipe.py @@ -5,11 +5,10 @@ class TestPipe: - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_pipe(self, klass): + def test_pipe(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) expected = DataFrame({"A": [1, 4, 9]}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] expected = expected["A"] @@ -17,20 +16,18 @@ def test_pipe(self, klass): result = obj.pipe(f, 2) tm.assert_equal(result, expected) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_pipe_tuple(self, klass): + def test_pipe_tuple(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] f = lambda x, y: y result = obj.pipe((f, "y"), 0) tm.assert_equal(result, obj) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_pipe_tuple_error(self, klass): + def test_pipe_tuple_error(self, frame_or_series): obj = DataFrame({"A": [1, 2, 3]}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] f = lambda x, y: y diff --git a/pandas/tests/generic/methods/test_reorder_levels.py b/pandas/tests/generic/methods/test_reorder_levels.py index 8bb6417e56659..6bfbf089a6108 100644 --- a/pandas/tests/generic/methods/test_reorder_levels.py +++ b/pandas/tests/generic/methods/test_reorder_levels.py @@ -1,20 +1,19 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, MultiIndex import pandas._testing as tm class TestReorderLevels: - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_reorder_levels(self, klass): + def test_reorder_levels(self, frame_or_series): index = MultiIndex( levels=[["bar"], ["one", "two", "three"], [0, 1]], codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], names=["L0", "L1", "L2"], ) df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) - obj = df if klass is DataFrame else df["A"] + obj = df if frame_or_series is DataFrame else df["A"] # no change, position result = obj.reorder_levels([0, 1, 2]) @@ -32,7 +31,7 @@ def test_reorder_levels(self, klass): names=["L1", "L2", "L0"], ) expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) - expected = expected if klass is DataFrame else expected["A"] + expected = expected if frame_or_series is DataFrame else expected["A"] tm.assert_equal(result, expected) result = obj.reorder_levels([0, 0, 0]) @@ -42,7 +41,7 @@ def test_reorder_levels(self, klass): names=["L0", "L0", "L0"], ) expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) - expected = expected if klass is DataFrame else expected["A"] + expected = expected if frame_or_series is DataFrame else expected["A"] tm.assert_equal(result, expected) result = obj.reorder_levels(["L0", "L0", "L0"]) diff --git a/pandas/tests/generic/methods/test_sample.py b/pandas/tests/generic/methods/test_sample.py index 7303dad9170ed..b26a3785f918d 100644 --- a/pandas/tests/generic/methods/test_sample.py +++ b/pandas/tests/generic/methods/test_sample.py @@ -155,22 +155,20 @@ def test_sample_none_weights(self, obj): ), ], ) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_sample_random_state(self, func_str, arg, klass): + def test_sample_random_state(self, func_str, arg, frame_or_series): # GH#32503 obj = DataFrame({"col1": range(10, 20), "col2": range(20, 30)}) - if klass is Series: + if frame_or_series is Series: obj = obj["col1"] result = obj.sample(n=3, random_state=eval(func_str)(arg)) expected = obj.sample(n=3, random_state=com.random_state(eval(func_str)(arg))) tm.assert_equal(result, expected) - @pytest.mark.parametrize("klass", [Series, DataFrame]) - def test_sample_upsampling_without_replacement(self, klass): + def test_sample_upsampling_without_replacement(self, frame_or_series): # GH#27451 obj = DataFrame({"A": list("abc")}) - if klass is Series: + if frame_or_series is Series: obj = obj["A"] msg = ( diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 45601abc95fe6..930c48cbdc214 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -5,8 +5,7 @@ from pandas.core.dtypes.common import is_scalar -import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, Series import pandas._testing as tm # ---------------------------------------------------------------------- @@ -248,31 +247,6 @@ def test_metadata_propagation(self): self.check_metadata(v1 & v2) self.check_metadata(v1 | v2) - def test_head_tail(self, index): - # GH5370 - - o = self._construct(shape=len(index)) - - axis = o._get_axis_name(0) - setattr(o, axis, index) - - o.head() - - self._compare(o.head(), o.iloc[:5]) - self._compare(o.tail(), o.iloc[-5:]) - - # 0-len - self._compare(o.head(0), o.iloc[0:0]) - self._compare(o.tail(0), o.iloc[0:0]) - - # bounded - self._compare(o.head(len(o) + 1), o) - self._compare(o.tail(len(o) + 1), o) - - # neg index - self._compare(o.head(-3), o.head(len(index) - 3)) - self._compare(o.tail(-3), o.tail(len(index) - 3)) - def test_size_compat(self): # GH8846 # size property should be defined @@ -460,77 +434,23 @@ def test_take_invalid_kwargs(self): obj.take(indices, mode="clip") @pytest.mark.parametrize("is_copy", [True, False]) - def test_depr_take_kwarg_is_copy(self, is_copy): + def test_depr_take_kwarg_is_copy(self, is_copy, frame_or_series): # GH 27357 - df = DataFrame({"A": [1, 2, 3]}) + obj = DataFrame({"A": [1, 2, 3]}) + if frame_or_series is Series: + obj = obj["A"] + msg = ( "is_copy is deprecated and will be removed in a future version. " "'take' always returns a copy, so there is no need to specify this." ) with tm.assert_produces_warning(FutureWarning) as w: - df.take([0, 1], is_copy=is_copy) + obj.take([0, 1], is_copy=is_copy) assert w[0].message.args[0] == msg - s = Series([1, 2, 3]) - with tm.assert_produces_warning(FutureWarning): - s.take([0, 1], is_copy=is_copy) - - def test_equals(self): - # Add object dtype column with nans - index = np.random.random(10) - df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) - df1["text"] = "the sky is so blue. we could use more chocolate.".split() - df1["start"] = date_range("2000-1-1", periods=10, freq="T") - df1["end"] = date_range("2000-1-1", periods=10, freq="D") - df1["diff"] = df1["end"] - df1["start"] - df1["bool"] = np.arange(10) % 3 == 0 - df1.loc[::2] = np.nan - df2 = df1.copy() - assert df1["text"].equals(df2["text"]) - assert df1["start"].equals(df2["start"]) - assert df1["end"].equals(df2["end"]) - assert df1["diff"].equals(df2["diff"]) - assert df1["bool"].equals(df2["bool"]) - assert df1.equals(df2) - assert not df1.equals(object) - - # different dtype - different = df1.copy() - different["floats"] = different["floats"].astype("float32") - assert not df1.equals(different) - - # different index - different_index = -index - different = df2.set_index(different_index) - assert not df1.equals(different) - - # different columns - different = df2.copy() - different.columns = df2.columns[::-1] - assert not df1.equals(different) - - # DatetimeIndex - index = pd.date_range("2000-1-1", periods=10, freq="T") - df1 = df1.set_index(index) - df2 = df1.copy() - assert df1.equals(df2) - - # MultiIndex - df3 = df1.set_index(["text"], append=True) - df2 = df1.set_index(["text"], append=True) - assert df3.equals(df2) - - df2 = df1.set_index(["floats"], append=True) - assert not df3.equals(df2) - - # NaN in index - df3 = df1.set_index(["floats"], append=True) - df2 = df1.set_index(["floats"], append=True) - assert df3.equals(df2) - - @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) - def test_axis_classmethods(self, box): + def test_axis_classmethods(self, frame_or_series): + box = frame_or_series obj = box(dtype=object) values = box._AXIS_TO_AXIS_NUMBER.keys() for v in values: @@ -538,24 +458,23 @@ def test_axis_classmethods(self, box): assert obj._get_axis_name(v) == box._get_axis_name(v) assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) - @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) - def test_axis_names_deprecated(self, box): + def test_axis_names_deprecated(self, frame_or_series): # GH33637 + box = frame_or_series obj = box(dtype=object) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): obj._AXIS_NAMES - @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) - def test_axis_numbers_deprecated(self, box): + def test_axis_numbers_deprecated(self, frame_or_series): # GH33637 + box = frame_or_series obj = box(dtype=object) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): obj._AXIS_NUMBERS - @pytest.mark.parametrize("as_frame", [True, False]) - def test_flags_identity(self, as_frame): + def test_flags_identity(self, frame_or_series): s = Series([1, 2]) - if as_frame: + if frame_or_series is DataFrame: s = s.to_frame() assert s.flags is s.flags From eadfc5453222211c593cfd73f457ba084e707273 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Nov 2020 17:51:33 -0800 Subject: [PATCH 030/147] TST: collect tests by method (#37617) * TST/REF: collect test_timeseries tests by method * misplaced DataFrame.values tst * misplaced dataframe.values test * collect test by method --- pandas/tests/frame/methods/test_asfreq.py | 11 ++++ pandas/tests/frame/methods/test_values.py | 19 ++++++- .../tests/indexes/datetimes/test_indexing.py | 7 +++ .../tests/series/apply/test_series_apply.py | 13 ++++- pandas/tests/series/methods/test_values.py | 20 +++++++ pandas/tests/series/test_arithmetic.py | 15 ++++++ pandas/tests/series/test_dtypes.py | 52 ++++--------------- pandas/tests/series/test_period.py | 24 --------- pandas/tests/series/test_timeseries.py | 41 --------------- 9 files changed, 93 insertions(+), 109 deletions(-) create mode 100644 pandas/tests/series/methods/test_values.py delete mode 100644 pandas/tests/series/test_period.py delete mode 100644 pandas/tests/series/test_timeseries.py diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index cdcd922949bcf..368ce88abe165 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -74,3 +74,14 @@ def test_asfreq_fillvalue(self): expected_series = ts.asfreq(freq="1S").fillna(9.0) actual_series = ts.asfreq(freq="1S", fill_value=9.0) tm.assert_series_equal(expected_series, actual_series) + + def test_asfreq_with_date_object_index(self, frame_or_series): + rng = date_range("1/1/2000", periods=20) + ts = frame_or_series(np.random.randn(20), index=rng) + + ts2 = ts.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts2.asfreq("4H", method="ffill") + expected = ts.asfreq("4H", method="ffill") + tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index 564a659724768..fb0c5d31f692b 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -1,6 +1,7 @@ import numpy as np +import pytest -from pandas import DataFrame, NaT, Timestamp, date_range +from pandas import DataFrame, NaT, Series, Timestamp, date_range, period_range import pandas._testing as tm @@ -44,6 +45,22 @@ def test_values_duplicates(self): tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("constructor", [date_range, period_range]) + def test_values_casts_datetimelike_to_object(self, constructor): + series = Series(constructor("2000-01-01", periods=10, freq="D")) + + expected = series.astype("object") + + df = DataFrame({"a": series, "b": np.random.randn(len(series))}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + df = DataFrame({"a": series, "b": ["foo"] * len(series)}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + def test_frame_values_with_tz(self): tz = "US/Central" df = DataFrame({"A": date_range("2000", periods=4, tz=tz)}) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index d4ebb557fd6cd..59269b9b54ddc 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -544,6 +544,13 @@ def test_contains_nonunique(self, vals): class TestGetIndexer: + def test_get_indexer_date_objs(self): + rng = date_range("1/1/2000", periods=20) + + result = rng.get_indexer(rng.map(lambda x: x.date())) + expected = rng.get_indexer(rng) + tm.assert_numpy_array_equal(result, expected) + def test_get_indexer(self): idx = pd.date_range("2000-01-01", periods=3) exp = np.array([0, 1, 2], dtype=np.intp) diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py index 9096d2a1033e5..93431a5c75091 100644 --- a/pandas/tests/series/apply/test_series_apply.py +++ b/pandas/tests/series/apply/test_series_apply.py @@ -5,12 +5,23 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, isna +from pandas import DataFrame, Index, MultiIndex, Series, isna, timedelta_range import pandas._testing as tm from pandas.core.base import SpecificationError class TestSeriesApply: + def test_series_map_box_timedelta(self): + # GH#11349 + ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) + + def f(x): + return x.total_seconds() + + ser.map(f) + ser.apply(f) + DataFrame(ser).applymap(f) + def test_apply(self, datetime_series): with np.errstate(all="ignore"): tm.assert_series_equal( diff --git a/pandas/tests/series/methods/test_values.py b/pandas/tests/series/methods/test_values.py new file mode 100644 index 0000000000000..e28a714ea656d --- /dev/null +++ b/pandas/tests/series/methods/test_values.py @@ -0,0 +1,20 @@ +import numpy as np +import pytest + +from pandas import IntervalIndex, Series, period_range +import pandas._testing as tm + + +class TestValues: + @pytest.mark.parametrize( + "data", + [ + period_range("2000", periods=4), + IntervalIndex.from_breaks([1, 2, 3, 4]), + ], + ) + def test_values_object_extension_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/23995 + result = Series(data).values + expected = np.array(data.astype(object)) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 9154c566a3dae..fa8f85178ba9f 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -730,6 +730,21 @@ def test_datetime_understood(self): expected = Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"])) tm.assert_series_equal(result, expected) + def test_align_date_objects_with_datetimeindex(self): + rng = date_range("1/1/2000", periods=20) + ts = Series(np.random.randn(20), index=rng) + + ts_slice = ts[5:] + ts2 = ts_slice.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts + ts2 + result2 = ts2 + ts + expected = ts + ts[5:] + expected.index = expected.index._with_freq(None) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + @pytest.mark.parametrize( "names", diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index b85a53960b0f6..2fbed92567f71 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -6,7 +6,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, DataFrame, Series, date_range +from pandas import Categorical, DataFrame, Series import pandas._testing as tm @@ -120,18 +120,20 @@ def cmp(a, b): s.astype("object").astype(CategoricalDtype()), roundtrip_expected ) + def test_invalid_conversions(self): # invalid conversion (these are NOT a dtype) + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.randint(0, 10000, 100)).sort_values() + ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) + msg = ( "dtype '' " "not understood" ) - - for invalid in [ - lambda x: x.astype(Categorical), - lambda x: x.astype("object").astype(Categorical), - ]: - with pytest.raises(TypeError, match=msg): - invalid(s) + with pytest.raises(TypeError, match=msg): + ser.astype(Categorical) + with pytest.raises(TypeError, match=msg): + ser.astype("object").astype(Categorical) @pytest.mark.parametrize("dtype", np.typecodes["All"]) def test_astype_empty_constructor_equality(self, dtype): @@ -148,27 +150,6 @@ def test_astype_empty_constructor_equality(self, dtype): as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) - def test_intercept_astype_object(self): - series = Series(date_range("1/1/2000", periods=10)) - - # This test no longer makes sense, as - # Series is by default already M8[ns]. - expected = series.astype("object") - - df = DataFrame({"a": series, "b": np.random.randn(len(series))}) - exp_dtypes = Series( - [np.dtype("datetime64[ns]"), np.dtype("float64")], index=["a", "b"] - ) - tm.assert_series_equal(df.dtypes, exp_dtypes) - - result = df.values.squeeze() - assert (result[:, 0] == expected.values).all() - - df = DataFrame({"a": series, "b": ["foo"] * len(series)}) - - result = df.values.squeeze() - assert (result[:, 0] == expected.values).all() - def test_series_to_categorical(self): # see gh-16524: test conversion of Series to Categorical series = Series(["a", "b", "c"]) @@ -178,19 +159,6 @@ def test_series_to_categorical(self): tm.assert_series_equal(result, expected) - @pytest.mark.parametrize( - "data", - [ - pd.period_range("2000", periods=4), - pd.IntervalIndex.from_breaks([1, 2, 3, 4]), - ], - ) - def test_values_compatibility(self, data): - # https://github.com/pandas-dev/pandas/issues/23995 - result = Series(data).values - expected = np.array(data.astype(object)) - tm.assert_numpy_array_equal(result, expected) - def test_reindex_astype_order_consistency(self): # GH 17444 s = Series([1, 2, 3], index=[2, 0, 1]) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py deleted file mode 100644 index 17dbfa9cf379a..0000000000000 --- a/pandas/tests/series/test_period.py +++ /dev/null @@ -1,24 +0,0 @@ -import numpy as np - -from pandas import DataFrame, Series, period_range - - -class TestSeriesPeriod: - - # --------------------------------------------------------------------- - # NaT support - - def test_intercept_astype_object(self): - series = Series(period_range("2000-01-01", periods=10, freq="D")) - - expected = series.astype("object") - - df = DataFrame({"a": series, "b": np.random.randn(len(series))}) - - result = df.values.squeeze() - assert (result[:, 0] == expected.values).all() - - df = DataFrame({"a": series, "b": ["foo"] * len(series)}) - - result = df.values.squeeze() - assert (result[:, 0] == expected.values).all() diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py deleted file mode 100644 index 0769606d18d57..0000000000000 --- a/pandas/tests/series/test_timeseries.py +++ /dev/null @@ -1,41 +0,0 @@ -import numpy as np - -from pandas import DataFrame, Series, date_range, timedelta_range -import pandas._testing as tm - - -class TestTimeSeries: - def test_promote_datetime_date(self): - rng = date_range("1/1/2000", periods=20) - ts = Series(np.random.randn(20), index=rng) - - ts_slice = ts[5:] - ts2 = ts_slice.copy() - ts2.index = [x.date() for x in ts2.index] - - result = ts + ts2 - result2 = ts2 + ts - expected = ts + ts[5:] - expected.index = expected.index._with_freq(None) - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result2, expected) - - # test asfreq - result = ts2.asfreq("4H", method="ffill") - expected = ts[5:].asfreq("4H", method="ffill") - tm.assert_series_equal(result, expected) - - result = rng.get_indexer(ts2.index) - expected = rng.get_indexer(ts_slice.index) - tm.assert_numpy_array_equal(result, expected) - - def test_series_map_box_timedelta(self): - # GH 11349 - s = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) - - def f(x): - return x.total_seconds() - - s.map(f) - s.apply(f) - DataFrame(s).applymap(f) From 051f0f1fa5e39221babc46a7686fbc2423134f51 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Nov 2020 17:53:16 -0800 Subject: [PATCH 031/147] TST/REF: share tests across Series/DataFrame (#37616) --- pandas/tests/frame/methods/test_asof.py | 10 +- pandas/tests/frame/methods/test_droplevel.py | 29 +++-- .../frame/methods/test_first_and_last.py | 44 +++++--- pandas/tests/frame/methods/test_head_tail.py | 3 + pandas/tests/frame/methods/test_truncate.py | 69 ++++++++---- pandas/tests/frame/methods/test_tz_convert.py | 9 +- .../tests/frame/methods/test_tz_localize.py | 9 +- pandas/tests/series/methods/test_asof.py | 3 - pandas/tests/series/methods/test_droplevel.py | 19 ---- .../series/methods/test_first_and_last.py | 69 ------------ .../series/{indexing => methods}/test_pop.py | 0 pandas/tests/series/methods/test_truncate.py | 106 ------------------ 12 files changed, 111 insertions(+), 259 deletions(-) delete mode 100644 pandas/tests/series/methods/test_droplevel.py delete mode 100644 pandas/tests/series/methods/test_first_and_last.py rename pandas/tests/series/{indexing => methods}/test_pop.py (100%) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 70b42976c95a7..6931dd0ea2d4c 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -96,12 +96,16 @@ def test_missing(self, date_range_frame): result = df.asof("1989-12-31") assert isinstance(result.name, Period) + def test_asof_all_nans(self, frame_or_series): + # GH 15713 + # DataFrame/Series is all nans + result = frame_or_series([np.nan]).asof([0]) + expected = frame_or_series([np.nan]) + tm.assert_equal(result, expected) + def test_all_nans(self, date_range_frame): # GH 15713 # DataFrame is all nans - result = DataFrame([np.nan]).asof([0]) - expected = DataFrame([np.nan]) - tm.assert_frame_equal(result, expected) # testing non-default indexes, multiple inputs N = 150 diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py index 517905cf23259..ce98704b03106 100644 --- a/pandas/tests/frame/methods/test_droplevel.py +++ b/pandas/tests/frame/methods/test_droplevel.py @@ -1,23 +1,32 @@ +import pytest + from pandas import DataFrame, Index, MultiIndex import pandas._testing as tm class TestDropLevel: - def test_droplevel(self): + def test_droplevel(self, frame_or_series): # GH#20342 - df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) - df = df.set_index([0, 1]).rename_axis(["a", "b"]) - df.columns = MultiIndex.from_tuples( + cols = MultiIndex.from_tuples( [("c", "e"), ("d", "f")], names=["level_1", "level_2"] ) + mi = MultiIndex.from_tuples([(1, 2), (5, 6), (9, 10)], names=["a", "b"]) + df = DataFrame([[3, 4], [7, 8], [11, 12]], index=mi, columns=cols) + if frame_or_series is not DataFrame: + df = df.iloc[:, 0] # test that dropping of a level in index works expected = df.reset_index("a", drop=True) result = df.droplevel("a", axis="index") - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) - # test that dropping of a level in columns works - expected = df.copy() - expected.columns = Index(["c", "d"], name="level_1") - result = df.droplevel("level_2", axis="columns") - tm.assert_frame_equal(result, expected) + if frame_or_series is DataFrame: + # test that dropping of a level in columns works + expected = df.copy() + expected.columns = Index(["c", "d"], name="level_1") + result = df.droplevel("level_2", axis="columns") + tm.assert_equal(result, expected) + else: + # test that droplevel raises ValueError on axis != 0 + with pytest.raises(ValueError, match="No axis named columns"): + df.droplevel(1, axis="columns") diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 2b3756969acca..d21e1eee54e16 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -8,56 +8,64 @@ class TestFirst: - def test_first_subset(self): + def test_first_subset(self, frame_or_series): ts = tm.makeTimeDataFrame(freq="12h") + if frame_or_series is not DataFrame: + ts = ts["A"] result = ts.first("10d") assert len(result) == 20 ts = tm.makeTimeDataFrame(freq="D") + if frame_or_series is not DataFrame: + ts = ts["A"] result = ts.first("10d") assert len(result) == 10 result = ts.first("3M") expected = ts[:"3/31/2000"] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) result = ts.first("21D") expected = ts[:21] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) result = ts[:0].first("3M") - tm.assert_frame_equal(result, ts[:0]) + tm.assert_equal(result, ts[:0]) - def test_first_raises(self): + def test_first_last_raises(self, frame_or_series): # GH#20725 - df = DataFrame([[1, 2, 3], [4, 5, 6]]) + obj = DataFrame([[1, 2, 3], [4, 5, 6]]) + if frame_or_series is not DataFrame: + obj = obj[0] + msg = "'first' only supports a DatetimeIndex index" with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - df.first("1D") + obj.first("1D") + + msg = "'last' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex + obj.last("1D") - def test_last_subset(self): + def test_last_subset(self, frame_or_series): ts = tm.makeTimeDataFrame(freq="12h") + if frame_or_series is not DataFrame: + ts = ts["A"] result = ts.last("10d") assert len(result) == 20 ts = tm.makeTimeDataFrame(nper=30, freq="D") + if frame_or_series is not DataFrame: + ts = ts["A"] result = ts.last("10d") assert len(result) == 10 result = ts.last("21D") expected = ts["2000-01-10":] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) result = ts.last("21D") expected = ts[-21:] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) result = ts[:0].last("3M") - tm.assert_frame_equal(result, ts[:0]) - - def test_last_raises(self): - # GH20725 - df = DataFrame([[1, 2, 3], [4, 5, 6]]) - msg = "'last' only supports a DatetimeIndex index" - with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - df.last("1D") + tm.assert_equal(result, ts[:0]) diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py index fa28f7d3e16a2..99cb7840c3eb6 100644 --- a/pandas/tests/frame/methods/test_head_tail.py +++ b/pandas/tests/frame/methods/test_head_tail.py @@ -48,6 +48,9 @@ def test_head_tail(float_frame): tm.assert_frame_equal(df.tail(0), df[0:0]) tm.assert_frame_equal(df.head(-1), df.iloc[:-1]) tm.assert_frame_equal(df.tail(-1), df.iloc[1:]) + + +def test_head_tail_empty(): # test empty dataframe empty_df = DataFrame() tm.assert_frame_equal(empty_df.tail(), empty_df) diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py index 674f482c478a0..c6d6637edc88c 100644 --- a/pandas/tests/frame/methods/test_truncate.py +++ b/pandas/tests/frame/methods/test_truncate.py @@ -2,12 +2,15 @@ import pytest import pandas as pd +from pandas import DataFrame, Series, date_range import pandas._testing as tm class TestDataFrameTruncate: - def test_truncate(self, datetime_frame): + def test_truncate(self, datetime_frame, frame_or_series): ts = datetime_frame[::3] + if frame_or_series is Series: + ts = ts.iloc[:, 0] start, end = datetime_frame.index[3], datetime_frame.index[6] @@ -16,34 +19,41 @@ def test_truncate(self, datetime_frame): # neither specified truncated = ts.truncate() - tm.assert_frame_equal(truncated, ts) + tm.assert_equal(truncated, ts) # both specified expected = ts[1:3] truncated = ts.truncate(start, end) - tm.assert_frame_equal(truncated, expected) + tm.assert_equal(truncated, expected) truncated = ts.truncate(start_missing, end_missing) - tm.assert_frame_equal(truncated, expected) + tm.assert_equal(truncated, expected) # start specified expected = ts[1:] truncated = ts.truncate(before=start) - tm.assert_frame_equal(truncated, expected) + tm.assert_equal(truncated, expected) truncated = ts.truncate(before=start_missing) - tm.assert_frame_equal(truncated, expected) + tm.assert_equal(truncated, expected) # end specified expected = ts[:3] truncated = ts.truncate(after=end) - tm.assert_frame_equal(truncated, expected) + tm.assert_equal(truncated, expected) truncated = ts.truncate(after=end_missing) - tm.assert_frame_equal(truncated, expected) + tm.assert_equal(truncated, expected) + + # corner case, empty series/frame returned + truncated = ts.truncate(after=ts.index[0] - ts.index.freq) + assert len(truncated) == 0 + + truncated = ts.truncate(before=ts.index[-1] + ts.index.freq) + assert len(truncated) == 0 msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00" with pytest.raises(ValueError, match=msg): @@ -57,25 +67,35 @@ def test_truncate_copy(self, datetime_frame): truncated.values[:] = 5.0 assert not (datetime_frame.values[5:11] == 5).any() - def test_truncate_nonsortedindex(self): + def test_truncate_nonsortedindex(self, frame_or_series): # GH#17935 - df = pd.DataFrame({"A": ["a", "b", "c", "d", "e"]}, index=[5, 3, 2, 9, 0]) + obj = DataFrame({"A": ["a", "b", "c", "d", "e"]}, index=[5, 3, 2, 9, 0]) + if frame_or_series is Series: + obj = obj["A"] + msg = "truncate requires a sorted index" with pytest.raises(ValueError, match=msg): - df.truncate(before=3, after=9) + obj.truncate(before=3, after=9) + + def test_sort_values_nonsortedindex(self): + # TODO: belongs elsewhere? - rng = pd.date_range("2011-01-01", "2012-01-01", freq="W") - ts = pd.DataFrame( + rng = date_range("2011-01-01", "2012-01-01", freq="W") + ts = DataFrame( {"A": np.random.randn(len(rng)), "B": np.random.randn(len(rng))}, index=rng ) + msg = "truncate requires a sorted index" with pytest.raises(ValueError, match=msg): ts.sort_values("A", ascending=False).truncate( before="2011-11", after="2011-12" ) - df = pd.DataFrame( + def test_truncate_nonsortedindex_axis1(self): + # GH#17935 + + df = DataFrame( { 3: np.random.randn(5), 20: np.random.randn(5), @@ -93,27 +113,34 @@ def test_truncate_nonsortedindex(self): [(1, 2, [2, 1]), (None, 2, [2, 1, 0]), (1, None, [3, 2, 1])], ) @pytest.mark.parametrize("klass", [pd.Int64Index, pd.DatetimeIndex]) - def test_truncate_decreasing_index(self, before, after, indices, klass): + def test_truncate_decreasing_index( + self, before, after, indices, klass, frame_or_series + ): # https://github.com/pandas-dev/pandas/issues/33756 idx = klass([3, 2, 1, 0]) if klass is pd.DatetimeIndex: before = pd.Timestamp(before) if before is not None else None after = pd.Timestamp(after) if after is not None else None indices = [pd.Timestamp(i) for i in indices] - values = pd.DataFrame(range(len(idx)), index=idx) + values = frame_or_series(range(len(idx)), index=idx) result = values.truncate(before=before, after=after) expected = values.loc[indices] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) - def test_truncate_multiindex(self): + def test_truncate_multiindex(self, frame_or_series): # GH 34564 mi = pd.MultiIndex.from_product([[1, 2, 3, 4], ["A", "B"]], names=["L1", "L2"]) - s1 = pd.DataFrame(range(mi.shape[0]), index=mi, columns=["col"]) + s1 = DataFrame(range(mi.shape[0]), index=mi, columns=["col"]) + if frame_or_series is Series: + s1 = s1["col"] + result = s1.truncate(before=2, after=3) - df = pd.DataFrame.from_dict( + df = DataFrame.from_dict( {"L1": [2, 2, 3, 3], "L2": ["A", "B", "A", "B"], "col": [2, 3, 4, 5]} ) expected = df.set_index(["L1", "L2"]) + if frame_or_series is Series: + expected = expected["col"] - tm.assert_frame_equal(result, expected) + tm.assert_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py index c70e479723644..ecb30cf11319b 100644 --- a/pandas/tests/frame/methods/test_tz_convert.py +++ b/pandas/tests/frame/methods/test_tz_convert.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, date_range +from pandas import DataFrame, Index, MultiIndex, date_range import pandas._testing as tm @@ -89,17 +89,16 @@ def test_tz_convert_and_localize(self, fn): df = DataFrame(index=l0) df = getattr(df, fn)("US/Pacific", level=1) - @pytest.mark.parametrize("klass", [Series, DataFrame]) @pytest.mark.parametrize("copy", [True, False]) - def test_tz_convert_copy_inplace_mutate(self, copy, klass): + def test_tz_convert_copy_inplace_mutate(self, copy, frame_or_series): # GH#6326 - obj = klass( + obj = frame_or_series( np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz="Europe/Berlin"), ) orig = obj.copy() result = obj.tz_convert("UTC", copy=copy) - expected = klass(np.arange(0, 5), index=obj.index.tz_convert("UTC")) + expected = frame_or_series(np.arange(0, 5), index=obj.index.tz_convert("UTC")) tm.assert_equal(result, expected) tm.assert_equal(obj, orig) assert result.index is not obj.index diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py index 183b81ca5298e..aa5ab51fe3d8b 100644 --- a/pandas/tests/frame/methods/test_tz_localize.py +++ b/pandas/tests/frame/methods/test_tz_localize.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, date_range import pandas._testing as tm @@ -23,16 +23,15 @@ def test_frame_tz_localize(self): assert result.columns.tz.zone == "UTC" tm.assert_frame_equal(result, expected.T) - @pytest.mark.parametrize("klass", [Series, DataFrame]) @pytest.mark.parametrize("copy", [True, False]) - def test_tz_localize_copy_inplace_mutate(self, copy, klass): + def test_tz_localize_copy_inplace_mutate(self, copy, frame_or_series): # GH#6326 - obj = klass( + obj = frame_or_series( np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=None) ) orig = obj.copy() result = obj.tz_localize("UTC", copy=copy) - expected = klass( + expected = frame_or_series( np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz="UTC"), ) diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 4b4ef5ea046be..43d40d53dcd21 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -161,9 +161,6 @@ def test_errors(self): def test_all_nans(self): # GH 15713 # series is all nans - result = Series([np.nan]).asof([0]) - expected = Series([np.nan]) - tm.assert_series_equal(result, expected) # testing non-default indexes N = 50 diff --git a/pandas/tests/series/methods/test_droplevel.py b/pandas/tests/series/methods/test_droplevel.py deleted file mode 100644 index 449ddd1cd0e49..0000000000000 --- a/pandas/tests/series/methods/test_droplevel.py +++ /dev/null @@ -1,19 +0,0 @@ -import pytest - -from pandas import MultiIndex, Series -import pandas._testing as tm - - -class TestDropLevel: - def test_droplevel(self): - # GH#20342 - ser = Series([1, 2, 3, 4]) - ser.index = MultiIndex.from_arrays( - [(1, 2, 3, 4), (5, 6, 7, 8)], names=["a", "b"] - ) - expected = ser.reset_index("b", drop=True) - result = ser.droplevel("b", axis="index") - tm.assert_series_equal(result, expected) - # test that droplevel raises ValueError on axis != 0 - with pytest.raises(ValueError, match="No axis named columns"): - ser.droplevel(1, axis="columns") diff --git a/pandas/tests/series/methods/test_first_and_last.py b/pandas/tests/series/methods/test_first_and_last.py deleted file mode 100644 index 7629dc8cda30b..0000000000000 --- a/pandas/tests/series/methods/test_first_and_last.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Note: includes tests for `last` -""" - -import numpy as np -import pytest - -from pandas import Series, date_range -import pandas._testing as tm - - -class TestFirst: - def test_first_subset(self): - rng = date_range("1/1/2000", "1/1/2010", freq="12h") - ts = Series(np.random.randn(len(rng)), index=rng) - result = ts.first("10d") - assert len(result) == 20 - - rng = date_range("1/1/2000", "1/1/2010", freq="D") - ts = Series(np.random.randn(len(rng)), index=rng) - result = ts.first("10d") - assert len(result) == 10 - - result = ts.first("3M") - expected = ts[:"3/31/2000"] - tm.assert_series_equal(result, expected) - - result = ts.first("21D") - expected = ts[:21] - tm.assert_series_equal(result, expected) - - result = ts[:0].first("3M") - tm.assert_series_equal(result, ts[:0]) - - def test_first_raises(self): - # GH#20725 - ser = Series("a b c".split()) - msg = "'first' only supports a DatetimeIndex index" - with pytest.raises(TypeError, match=msg): - ser.first("1D") - - def test_last_subset(self): - rng = date_range("1/1/2000", "1/1/2010", freq="12h") - ts = Series(np.random.randn(len(rng)), index=rng) - result = ts.last("10d") - assert len(result) == 20 - - rng = date_range("1/1/2000", "1/1/2010", freq="D") - ts = Series(np.random.randn(len(rng)), index=rng) - result = ts.last("10d") - assert len(result) == 10 - - result = ts.last("21D") - expected = ts["12/12/2009":] - tm.assert_series_equal(result, expected) - - result = ts.last("21D") - expected = ts[-21:] - tm.assert_series_equal(result, expected) - - result = ts[:0].last("3M") - tm.assert_series_equal(result, ts[:0]) - - def test_last_raises(self): - # GH#20725 - ser = Series("a b c".split()) - msg = "'last' only supports a DatetimeIndex index" - with pytest.raises(TypeError, match=msg): - ser.last("1D") diff --git a/pandas/tests/series/indexing/test_pop.py b/pandas/tests/series/methods/test_pop.py similarity index 100% rename from pandas/tests/series/indexing/test_pop.py rename to pandas/tests/series/methods/test_pop.py diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py index b03f516eeffc5..21de593c0e2af 100644 --- a/pandas/tests/series/methods/test_truncate.py +++ b/pandas/tests/series/methods/test_truncate.py @@ -1,102 +1,11 @@ from datetime import datetime -import numpy as np -import pytest - import pandas as pd from pandas import Series, date_range import pandas._testing as tm -from pandas.tseries.offsets import BDay - class TestTruncate: - def test_truncate(self, datetime_series): - offset = BDay() - - ts = datetime_series[::3] - - start, end = datetime_series.index[3], datetime_series.index[6] - start_missing, end_missing = datetime_series.index[2], datetime_series.index[7] - - # neither specified - truncated = ts.truncate() - tm.assert_series_equal(truncated, ts) - - # both specified - expected = ts[1:3] - - truncated = ts.truncate(start, end) - tm.assert_series_equal(truncated, expected) - - truncated = ts.truncate(start_missing, end_missing) - tm.assert_series_equal(truncated, expected) - - # start specified - expected = ts[1:] - - truncated = ts.truncate(before=start) - tm.assert_series_equal(truncated, expected) - - truncated = ts.truncate(before=start_missing) - tm.assert_series_equal(truncated, expected) - - # end specified - expected = ts[:3] - - truncated = ts.truncate(after=end) - tm.assert_series_equal(truncated, expected) - - truncated = ts.truncate(after=end_missing) - tm.assert_series_equal(truncated, expected) - - # corner case, empty series returned - truncated = ts.truncate(after=datetime_series.index[0] - offset) - assert len(truncated) == 0 - - truncated = ts.truncate(before=datetime_series.index[-1] + offset) - assert len(truncated) == 0 - - msg = "Truncate: 1999-12-31 00:00:00 must be after 2000-02-14 00:00:00" - with pytest.raises(ValueError, match=msg): - ts.truncate( - before=datetime_series.index[-1] + offset, - after=datetime_series.index[0] - offset, - ) - - def test_truncate_nonsortedindex(self): - # GH#17935 - - s = Series(["a", "b", "c", "d", "e"], index=[5, 3, 2, 9, 0]) - msg = "truncate requires a sorted index" - - with pytest.raises(ValueError, match=msg): - s.truncate(before=3, after=9) - - rng = pd.date_range("2011-01-01", "2012-01-01", freq="W") - ts = Series(np.random.randn(len(rng)), index=rng) - msg = "truncate requires a sorted index" - - with pytest.raises(ValueError, match=msg): - ts.sort_values(ascending=False).truncate(before="2011-11", after="2011-12") - - @pytest.mark.parametrize( - "before, after, indices", - [(1, 2, [2, 1]), (None, 2, [2, 1, 0]), (1, None, [3, 2, 1])], - ) - @pytest.mark.parametrize("klass", [pd.Int64Index, pd.DatetimeIndex]) - def test_truncate_decreasing_index(self, before, after, indices, klass): - # https://github.com/pandas-dev/pandas/issues/33756 - idx = klass([3, 2, 1, 0]) - if klass is pd.DatetimeIndex: - before = pd.Timestamp(before) if before is not None else None - after = pd.Timestamp(after) if after is not None else None - indices = [pd.Timestamp(i) for i in indices] - values = Series(range(len(idx)), index=idx) - result = values.truncate(before=before, after=after) - expected = values.loc[indices] - tm.assert_series_equal(result, expected) - def test_truncate_datetimeindex_tz(self): # GH 9243 idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") @@ -133,21 +42,6 @@ def test_truncate_periodindex(self): expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) tm.assert_series_equal(result2, Series([2], index=expected_idx2)) - def test_truncate_multiindex(self): - # GH 34564 - mi = pd.MultiIndex.from_product([[1, 2, 3, 4], ["A", "B"]], names=["L1", "L2"]) - s1 = Series(range(mi.shape[0]), index=mi, name="col") - result = s1.truncate(before=2, after=3) - - df = pd.DataFrame.from_dict( - {"L1": [2, 2, 3, 3], "L2": ["A", "B", "A", "B"], "col": [2, 3, 4, 5]} - ) - return_value = df.set_index(["L1", "L2"], inplace=True) - assert return_value is None - expected = df.col - - tm.assert_series_equal(result, expected) - def test_truncate_one_element_series(self): # GH 35544 series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"])) From 7adbd77b779b0c8d4775c435cf732edeebdefc41 Mon Sep 17 00:00:00 2001 From: Sven Date: Wed, 4 Nov 2020 12:55:11 +1100 Subject: [PATCH 032/147] Gh 36562 typeerror comparison not supported between float and str (#37096) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/algorithms.py | 43 ++++++++++++++----- .../tests/frame/methods/test_combine_first.py | 31 ++++++++++++- pandas/tests/test_sorting.py | 7 +++ 4 files changed, 70 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7111d54d65815..ae6e2de1b819c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -469,6 +469,7 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) +- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`) I/O ^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e9e04ace784b6..ec88eb817b3f8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -2061,27 +2061,25 @@ def safe_sort( dtype, _ = infer_dtype_from_array(values) values = np.asarray(values, dtype=dtype) - def sort_mixed(values): - # order ints before strings, safe in py3 - str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) - nums = np.sort(values[~str_pos]) - strs = np.sort(values[str_pos]) - return np.concatenate([nums, np.asarray(strs, dtype=object)]) - sorter = None + if ( not is_extension_array_dtype(values) and lib.infer_dtype(values, skipna=False) == "mixed-integer" ): - # unorderable in py3 if mixed str/int - ordered = sort_mixed(values) + ordered = _sort_mixed(values) else: try: sorter = values.argsort() ordered = values.take(sorter) except TypeError: - # try this anyway - ordered = sort_mixed(values) + # Previous sorters failed or were not applicable, try `_sort_mixed` + # which would work, but which fails for special case of 1d arrays + # with tuples. + if values.size and isinstance(values[0], tuple): + ordered = _sort_tuples(values) + else: + ordered = _sort_mixed(values) # codes: @@ -2128,3 +2126,26 @@ def sort_mixed(values): np.putmask(new_codes, mask, na_sentinel) return ordered, ensure_platform_int(new_codes) + + +def _sort_mixed(values): + """ order ints before strings in 1d arrays, safe in py3 """ + str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) + nums = np.sort(values[~str_pos]) + strs = np.sort(values[str_pos]) + return np.concatenate([nums, np.asarray(strs, dtype=object)]) + + +def _sort_tuples(values: np.ndarray[tuple]): + """ + Convert array of tuples (1d) to array or array (2d). + We need to keep the columns separately as they contain different types and + nans (can't use `np.sort` as it may fail when str and nan are mixed in a + column as types cannot be compared). + """ + from pandas.core.internals.construction import to_arrays + from pandas.core.sorting import lexsort_indexer + + arrays, _ = to_arrays(values, None) + indexer = lexsort_indexer(arrays, orders=True) + return values[indexer] diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 4850c6a50f8a8..08c4293323500 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series +from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm @@ -365,3 +365,32 @@ def test_combine_first_string_dtype_only_na(self): {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string" ).set_index(["a", "b"]) tm.assert_frame_equal(result, expected) + + +def test_combine_first_with_nan_multiindex(): + # gh-36562 + + mi1 = MultiIndex.from_arrays( + [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"] + ) + df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1) + mi2 = MultiIndex.from_arrays( + [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"] + ) + s = Series([1, 2, 3, 4, 5, 6], index=mi2) + res = df.combine_first(DataFrame({"d": s})) + mi_expected = MultiIndex.from_arrays( + [ + ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan], + [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6], + ], + names=["a", "b"], + ) + expected = DataFrame( + { + "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1], + "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan], + }, + index=mi_expected, + ) + tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 1c9fd46ae451f..5f85ae2ec2318 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -453,3 +453,10 @@ def test_extension_array_codes(self, verify, na_sentinel): expected_codes = np.array([0, 2, na_sentinel, 1], dtype=np.intp) tm.assert_extension_array_equal(result, expected_values) tm.assert_numpy_array_equal(codes, expected_codes) + + +def test_mixed_str_nan(): + values = np.array(["b", np.nan, "a", "b"], dtype=object) + result = safe_sort(values) + expected = np.array([np.nan, "a", "b", "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) From 82a11e997ab832fc4a03caf1ddf75b1f11ee9c9d Mon Sep 17 00:00:00 2001 From: Micael Jarniac Date: Tue, 3 Nov 2020 22:58:12 -0300 Subject: [PATCH 033/147] docs: fix punctuation (#37612) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c90ab9cceea8c..8050ce8b1b636 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2196,7 +2196,7 @@ def to_json( * Series: - default is 'index' - - allowed values are: {'split','records','index','table'}. + - allowed values are: {'split', 'records', 'index', 'table'}. * DataFrame: From 94e24c2e88a69b67b246e521f6811ca1ba918559 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com> Date: Tue, 3 Nov 2020 20:59:21 -0500 Subject: [PATCH 034/147] REGR: pd.to_hdf(..., dropna=True) not dropping missing rows (#37564) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/pytables.py | 3 +++ pandas/tests/io/pytables/test_store.py | 25 ++++++++++++++++++++----- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ae6e2de1b819c..16e6c12488b83 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -493,6 +493,7 @@ I/O - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 347ce6e853794..bf21a8fe2fc74 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -268,6 +268,7 @@ def to_hdf( data_columns=data_columns, errors=errors, encoding=encoding, + dropna=dropna, ) path_or_buf = stringify_path(path_or_buf) @@ -1051,6 +1052,7 @@ def put( encoding=None, errors: str = "strict", track_times: bool = True, + dropna: bool = False, ): """ Store object in HDFStore. @@ -1100,6 +1102,7 @@ def put( encoding=encoding, errors=errors, track_times=track_times, + dropna=dropna, ) def remove(self, key: str, where=None, start=None, stop=None): diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index f37b0aabd3aed..d76a5a6f64055 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -1253,17 +1253,32 @@ def test_append_all_nans(self, setup_path): store.append("df2", df[10:], dropna=False) tm.assert_frame_equal(store["df2"], df) - # Test to make sure defaults are to not drop. - # Corresponding to Issue 9382 + def test_store_dropna(self, setup_path): df_with_missing = DataFrame( - {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]} + {"col1": [0.0, np.nan, 2.0], "col2": [1.0, np.nan, np.nan]}, + index=list("abc"), ) + df_without_missing = DataFrame( + {"col1": [0.0, 2.0], "col2": [1.0, np.nan]}, index=list("ac") + ) + + # # Test to make sure defaults are to not drop. + # # Corresponding to Issue 9382 + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table") + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_with_missing, reloaded) with ensure_clean_path(setup_path) as path: - df_with_missing.to_hdf(path, "df_with_missing", format="table") - reloaded = read_hdf(path, "df_with_missing") + df_with_missing.to_hdf(path, "df", format="table", dropna=False) + reloaded = read_hdf(path, "df") tm.assert_frame_equal(df_with_missing, reloaded) + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df", format="table", dropna=True) + reloaded = read_hdf(path, "df") + tm.assert_frame_equal(df_without_missing, reloaded) + def test_read_missing_key_close_store(self, setup_path): # GH 25766 with ensure_clean_path(setup_path) as path: From 727980ea76f808f13e6f6b8516bd4c51383fd5de Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Nov 2020 18:05:28 -0800 Subject: [PATCH 035/147] parametrize set_axis tests (#37619) --- pandas/tests/frame/test_alter_axes.py | 16 ------ pandas/tests/generic/methods/test_set_axis.py | 22 ++++++++ pandas/tests/series/methods/test_set_name.py | 21 +++++++ pandas/tests/series/test_alter_axes.py | 55 ------------------- 4 files changed, 43 insertions(+), 71 deletions(-) create mode 100644 pandas/tests/series/methods/test_set_name.py delete mode 100644 pandas/tests/series/test_alter_axes.py diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 3cd35e900ee06..4bd1d5fa56468 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,7 +1,6 @@ from datetime import datetime import numpy as np -import pytest from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -24,15 +23,6 @@ class TestDataFrameAlterAxes: - def test_set_index_directly(self, float_string_frame): - df = float_string_frame - idx = Index(np.arange(len(df))[::-1]) - - df.index = idx - tm.assert_index_equal(df.index, idx) - with pytest.raises(ValueError, match="Length mismatch"): - df.index = idx[::2] - def test_convert_dti_to_series(self): # don't cast a DatetimeIndex WITH a tz, leave as object # GH 6032 @@ -101,12 +91,6 @@ def test_convert_dti_to_series(self): df.pop("ts") tm.assert_frame_equal(df, expected) - def test_set_columns(self, float_string_frame): - cols = Index(np.arange(len(float_string_frame.columns))) - float_string_frame.columns = cols - with pytest.raises(ValueError, match="Length mismatch"): - float_string_frame.columns = cols[::2] - def test_dti_set_index_reindex(self): # GH 6631 df = DataFrame(np.random.random(6)) diff --git a/pandas/tests/generic/methods/test_set_axis.py b/pandas/tests/generic/methods/test_set_axis.py index 278d43ef93d2f..a46a91811f40e 100644 --- a/pandas/tests/generic/methods/test_set_axis.py +++ b/pandas/tests/generic/methods/test_set_axis.py @@ -57,6 +57,28 @@ def test_set_axis_invalid_axis_name(self, axis, obj): with pytest.raises(ValueError, match="No axis named"): obj.set_axis(list("abc"), axis=axis) + def test_set_axis_setattr_index_not_collection(self, obj): + # wrong type + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, None was passed" + ) + with pytest.raises(TypeError, match=msg): + obj.index = None + + def test_set_axis_setattr_index_wrong_length(self, obj): + # wrong length + msg = ( + f"Length mismatch: Expected axis has {len(obj)} elements, " + f"new values have {len(obj)-1} elements" + ) + with pytest.raises(ValueError, match=msg): + obj.index = np.arange(len(obj) - 1) + + if obj.ndim == 2: + with pytest.raises(ValueError, match="Length mismatch"): + obj.columns = obj.columns[::2] + class TestDataFrameSetAxis(SharedSetAxisTests): @pytest.fixture diff --git a/pandas/tests/series/methods/test_set_name.py b/pandas/tests/series/methods/test_set_name.py new file mode 100644 index 0000000000000..cbc8ebde7a8ab --- /dev/null +++ b/pandas/tests/series/methods/test_set_name.py @@ -0,0 +1,21 @@ +from datetime import datetime + +from pandas import Series + + +class TestSetName: + def test_set_name(self): + ser = Series([1, 2, 3]) + ser2 = ser._set_name("foo") + assert ser2.name == "foo" + assert ser.name is None + assert ser is not ser2 + + def test_set_name_attribute(self): + ser = Series([1, 2, 3]) + ser2 = Series([1, 2, 3], name="bar") + for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]: + ser.name = name + assert ser.name == name + ser2.name = name + assert ser2.name == name diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py deleted file mode 100644 index 181d7de43d945..0000000000000 --- a/pandas/tests/series/test_alter_axes.py +++ /dev/null @@ -1,55 +0,0 @@ -from datetime import datetime - -import numpy as np -import pytest - -from pandas import Index, Series -import pandas._testing as tm - - -class TestSeriesAlterAxes: - def test_setindex(self, string_series): - # wrong type - msg = ( - r"Index\(\.\.\.\) must be called with a collection of some " - r"kind, None was passed" - ) - with pytest.raises(TypeError, match=msg): - string_series.index = None - - # wrong length - msg = ( - "Length mismatch: Expected axis has 30 elements, " - "new values have 29 elements" - ) - with pytest.raises(ValueError, match=msg): - string_series.index = np.arange(len(string_series) - 1) - - # works - string_series.index = np.arange(len(string_series)) - assert isinstance(string_series.index, Index) - - # Renaming - - def test_set_name_attribute(self): - s = Series([1, 2, 3]) - s2 = Series([1, 2, 3], name="bar") - for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]: - s.name = name - assert s.name == name - s2.name = name - assert s2.name == name - - def test_set_name(self): - s = Series([1, 2, 3]) - s2 = s._set_name("foo") - assert s2.name == "foo" - assert s.name is None - assert s is not s2 - - def test_set_index_makes_timeseries(self): - idx = tm.makeDateIndex(10) - - s = Series(range(10)) - s.index = idx - assert s.index._is_all_dates From cf94a8fb8192f5d40ed9cd5bb3aa3dcc89712345 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Wed, 4 Nov 2020 09:18:04 +0700 Subject: [PATCH 036/147] CLN: clean color selection in _matplotlib/style (#37203) --- pandas/plotting/_matplotlib/style.py | 280 ++++++++++++++++++++++----- pandas/tests/plotting/test_style.py | 157 +++++++++++++++ 2 files changed, 384 insertions(+), 53 deletions(-) create mode 100644 pandas/tests/plotting/test_style.py diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index b919728971505..b2c7b2610845c 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -1,4 +1,14 @@ -# being a bit too dynamic +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterator, + List, + Optional, + Sequence, + Union, + cast, +) import warnings import matplotlib.cm as cm @@ -9,92 +19,256 @@ import pandas.core.common as com +if TYPE_CHECKING: + from matplotlib.colors import Colormap + + +Color = Union[str, Sequence[float]] + def get_standard_colors( - num_colors: int, colormap=None, color_type: str = "default", color=None + num_colors: int, + colormap: Optional["Colormap"] = None, + color_type: str = "default", + color: Optional[Union[Dict[str, Color], Color, Collection[Color]]] = None, ): - import matplotlib.pyplot as plt + """ + Get standard colors based on `colormap`, `color_type` or `color` inputs. + + Parameters + ---------- + num_colors : int + Minimum number of colors to be returned. + Ignored if `color` is a dictionary. + colormap : :py:class:`matplotlib.colors.Colormap`, optional + Matplotlib colormap. + When provided, the resulting colors will be derived from the colormap. + color_type : {"default", "random"}, optional + Type of colors to derive. Used if provided `color` and `colormap` are None. + Ignored if either `color` or `colormap` are not None. + color : dict or str or sequence, optional + Color(s) to be used for deriving sequence of colors. + Can be either be a dictionary, or a single color (single color string, + or sequence of floats representing a single color), + or a sequence of colors. + + Returns + ------- + dict or list + Standard colors. Can either be a mapping if `color` was a dictionary, + or a list of colors with a length of `num_colors` or more. + + Warns + ----- + UserWarning + If both `colormap` and `color` are provided. + Parameter `color` will override. + """ + if isinstance(color, dict): + return color + + colors = _derive_colors( + color=color, + colormap=colormap, + color_type=color_type, + num_colors=num_colors, + ) + + return _cycle_colors(colors, num_colors=num_colors) + + +def _derive_colors( + *, + color: Optional[Union[Color, Collection[Color]]], + colormap: Optional[Union[str, "Colormap"]], + color_type: str, + num_colors: int, +) -> List[Color]: + """ + Derive colors from either `colormap`, `color_type` or `color` inputs. + + Get a list of colors either from `colormap`, or from `color`, + or from `color_type` (if both `colormap` and `color` are None). + + Parameters + ---------- + color : str or sequence, optional + Color(s) to be used for deriving sequence of colors. + Can be either be a single color (single color string, or sequence of floats + representing a single color), or a sequence of colors. + colormap : :py:class:`matplotlib.colors.Colormap`, optional + Matplotlib colormap. + When provided, the resulting colors will be derived from the colormap. + color_type : {"default", "random"}, optional + Type of colors to derive. Used if provided `color` and `colormap` are None. + Ignored if either `color` or `colormap`` are not None. + num_colors : int + Number of colors to be extracted. + Returns + ------- + list + List of colors extracted. + + Warns + ----- + UserWarning + If both `colormap` and `color` are provided. + Parameter `color` will override. + """ if color is None and colormap is not None: - if isinstance(colormap, str): - cmap = colormap - colormap = cm.get_cmap(colormap) - if colormap is None: - raise ValueError(f"Colormap {cmap} is not recognized") - colors = [colormap(num) for num in np.linspace(0, 1, num=num_colors)] + return _get_colors_from_colormap(colormap, num_colors=num_colors) elif color is not None: if colormap is not None: warnings.warn( "'color' and 'colormap' cannot be used simultaneously. Using 'color'" ) - colors = ( - list(color) - if is_list_like(color) and not isinstance(color, dict) - else color - ) + return _get_colors_from_color(color) else: - if color_type == "default": - # need to call list() on the result to copy so we don't - # modify the global rcParams below - try: - colors = [c["color"] for c in list(plt.rcParams["axes.prop_cycle"])] - except KeyError: - colors = list(plt.rcParams.get("axes.color_cycle", list("bgrcmyk"))) - if isinstance(colors, str): - colors = list(colors) - - colors = colors[0:num_colors] - elif color_type == "random": - - def random_color(column): - """ Returns a random color represented as a list of length 3""" - # GH17525 use common._random_state to avoid resetting the seed - rs = com.random_state(column) - return rs.rand(3).tolist() - - colors = [random_color(num) for num in range(num_colors)] - else: - raise ValueError("color_type must be either 'default' or 'random'") + return _get_colors_from_color_type(color_type, num_colors=num_colors) - if isinstance(colors, str) and _is_single_color(colors): - # GH #36972 - colors = [colors] - # Append more colors by cycling if there is not enough color. - # Extra colors will be ignored by matplotlib if there are more colors - # than needed and nothing needs to be done here. +def _cycle_colors(colors: List[Color], num_colors: int) -> List[Color]: + """Append more colors by cycling if there is not enough color. + + Extra colors will be ignored by matplotlib if there are more colors + than needed and nothing needs to be done here. + """ if len(colors) < num_colors: - try: - multiple = num_colors // len(colors) - 1 - except ZeroDivisionError: - raise ValueError("Invalid color argument: ''") + multiple = num_colors // len(colors) - 1 mod = num_colors % len(colors) - colors += multiple * colors colors += colors[:mod] return colors -def _is_single_color(color: str) -> bool: - """Check if ``color`` is a single color. +def _get_colors_from_colormap( + colormap: Union[str, "Colormap"], + num_colors: int, +) -> List[Color]: + """Get colors from colormap.""" + colormap = _get_cmap_instance(colormap) + return [colormap(num) for num in np.linspace(0, 1, num=num_colors)] + + +def _get_cmap_instance(colormap: Union[str, "Colormap"]) -> "Colormap": + """Get instance of matplotlib colormap.""" + if isinstance(colormap, str): + cmap = colormap + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError(f"Colormap {cmap} is not recognized") + return colormap + + +def _get_colors_from_color( + color: Union[Color, Collection[Color]], +) -> List[Color]: + """Get colors from user input color.""" + if len(color) == 0: + raise ValueError(f"Invalid color argument: {color}") + + if _is_single_color(color): + color = cast(Color, color) + return [color] + + color = cast(Collection[Color], color) + return list(_gen_list_of_colors_from_iterable(color)) + + +def _is_single_color(color: Union[Color, Collection[Color]]) -> bool: + """Check if `color` is a single color, not a sequence of colors. + + Single color is of these kinds: + - Named color "red", "C0", "firebrick" + - Alias "g" + - Sequence of floats, such as (0.1, 0.2, 0.3) or (0.1, 0.2, 0.3, 0.4). + + See Also + -------- + _is_single_string_color + """ + if isinstance(color, str) and _is_single_string_color(color): + # GH #36972 + return True + + if _is_floats_color(color): + return True + + return False + + +def _gen_list_of_colors_from_iterable(color: Collection[Color]) -> Iterator[Color]: + """ + Yield colors from string of several letters or from collection of colors. + """ + for x in color: + if _is_single_color(x): + yield x + else: + raise ValueError(f"Invalid color {x}") + + +def _is_floats_color(color: Union[Color, Collection[Color]]) -> bool: + """Check if color comprises a sequence of floats representing color.""" + return bool( + is_list_like(color) + and (len(color) == 3 or len(color) == 4) + and all(isinstance(x, (int, float)) for x in color) + ) + + +def _get_colors_from_color_type(color_type: str, num_colors: int) -> List[Color]: + """Get colors from user input color type.""" + if color_type == "default": + return _get_default_colors(num_colors) + elif color_type == "random": + return _get_random_colors(num_colors) + else: + raise ValueError("color_type must be either 'default' or 'random'") + + +def _get_default_colors(num_colors: int) -> List[Color]: + """Get `num_colors` of default colors from matplotlib rc params.""" + import matplotlib.pyplot as plt + + colors = [c["color"] for c in plt.rcParams["axes.prop_cycle"]] + return colors[0:num_colors] + + +def _get_random_colors(num_colors: int) -> List[Color]: + """Get `num_colors` of random colors.""" + return [_random_color(num) for num in range(num_colors)] + + +def _random_color(column: int) -> List[float]: + """Get a random color represented as a list of length 3""" + # GH17525 use common._random_state to avoid resetting the seed + rs = com.random_state(column) + return rs.rand(3).tolist() + + +def _is_single_string_color(color: Color) -> bool: + """Check if `color` is a single string color. - Examples of single colors: + Examples of single string colors: - 'r' - 'g' - 'red' - 'green' - 'C3' + - 'firebrick' Parameters ---------- - color : string - Color string. + color : Color + Color string or sequence of floats. Returns ------- bool - True if ``color`` looks like a valid color. + True if `color` looks like a valid color. False otherwise. """ conv = matplotlib.colors.ColorConverter() diff --git a/pandas/tests/plotting/test_style.py b/pandas/tests/plotting/test_style.py new file mode 100644 index 0000000000000..665bda15724fd --- /dev/null +++ b/pandas/tests/plotting/test_style.py @@ -0,0 +1,157 @@ +import pytest + +from pandas import Series + +pytest.importorskip("matplotlib") +from pandas.plotting._matplotlib.style import get_standard_colors + + +class TestGetStandardColors: + @pytest.mark.parametrize( + "num_colors, expected", + [ + (3, ["red", "green", "blue"]), + (5, ["red", "green", "blue", "red", "green"]), + (7, ["red", "green", "blue", "red", "green", "blue", "red"]), + (2, ["red", "green"]), + (1, ["red"]), + ], + ) + def test_default_colors_named_from_prop_cycle(self, num_colors, expected): + import matplotlib as mpl + from matplotlib.pyplot import cycler + + mpl_params = { + "axes.prop_cycle": cycler(color=["red", "green", "blue"]), + } + with mpl.rc_context(rc=mpl_params): + result = get_standard_colors(num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, ["b"]), + (3, ["b", "g", "r"]), + (4, ["b", "g", "r", "y"]), + (5, ["b", "g", "r", "y", "b"]), + (7, ["b", "g", "r", "y", "b", "g", "r"]), + ], + ) + def test_default_colors_named_from_prop_cycle_string(self, num_colors, expected): + import matplotlib as mpl + from matplotlib.pyplot import cycler + + mpl_params = { + "axes.prop_cycle": cycler(color="bgry"), + } + with mpl.rc_context(rc=mpl_params): + result = get_standard_colors(num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected_name", + [ + (1, ["C0"]), + (3, ["C0", "C1", "C2"]), + ( + 12, + [ + "C0", + "C1", + "C2", + "C3", + "C4", + "C5", + "C6", + "C7", + "C8", + "C9", + "C0", + "C1", + ], + ), + ], + ) + def test_default_colors_named_undefined_prop_cycle(self, num_colors, expected_name): + import matplotlib as mpl + import matplotlib.colors as mcolors + + with mpl.rc_context(rc={}): + expected = [mcolors.to_hex(x) for x in expected_name] + result = get_standard_colors(num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, ["red", "green", (0.1, 0.2, 0.3)]), + (2, ["red", "green", (0.1, 0.2, 0.3)]), + (3, ["red", "green", (0.1, 0.2, 0.3)]), + (4, ["red", "green", (0.1, 0.2, 0.3), "red"]), + ], + ) + def test_user_input_color_sequence(self, num_colors, expected): + color = ["red", "green", (0.1, 0.2, 0.3)] + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, ["r", "g", "b", "k"]), + (2, ["r", "g", "b", "k"]), + (3, ["r", "g", "b", "k"]), + (4, ["r", "g", "b", "k"]), + (5, ["r", "g", "b", "k", "r"]), + (6, ["r", "g", "b", "k", "r", "g"]), + ], + ) + def test_user_input_color_string(self, num_colors, expected): + color = "rgbk" + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "num_colors, expected", + [ + (1, [(0.1, 0.2, 0.3)]), + (2, [(0.1, 0.2, 0.3), (0.1, 0.2, 0.3)]), + (3, [(0.1, 0.2, 0.3), (0.1, 0.2, 0.3), (0.1, 0.2, 0.3)]), + ], + ) + def test_user_input_color_floats(self, num_colors, expected): + color = (0.1, 0.2, 0.3) + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize( + "color, num_colors, expected", + [ + ("Crimson", 1, ["Crimson"]), + ("DodgerBlue", 2, ["DodgerBlue", "DodgerBlue"]), + ("firebrick", 3, ["firebrick", "firebrick", "firebrick"]), + ], + ) + def test_user_input_named_color_string(self, color, num_colors, expected): + result = get_standard_colors(color=color, num_colors=num_colors) + assert result == expected + + @pytest.mark.parametrize("color", ["", [], (), Series([], dtype="object")]) + def test_empty_color_raises(self, color): + with pytest.raises(ValueError, match="Invalid color argument"): + get_standard_colors(color=color, num_colors=1) + + @pytest.mark.parametrize( + "color", + [ + "bad_color", + ("red", "green", "bad_color"), + (0.1,), + (0.1, 0.2), + (0.1, 0.2, 0.3, 0.4, 0.5), # must be either 3 or 4 floats + ], + ) + def test_bad_color_raises(self, color): + with pytest.raises(ValueError, match="Invalid color"): + get_standard_colors(color=color, num_colors=5) From 71a7f6f4305e9eea6d2ad9588a17fbddfae8cb91 Mon Sep 17 00:00:00 2001 From: Erfan Nariman <34067903+erfannariman@users.noreply.github.com> Date: Wed, 4 Nov 2020 03:21:00 +0100 Subject: [PATCH 037/147] DEPR: DataFrame/Series.slice_shift (#37601) --- doc/source/whatsnew/v1.2.0.rst | 2 ++ pandas/core/generic.py | 13 ++++++++++++- pandas/tests/generic/test_finalize.py | 2 -- pandas/tests/generic/test_generic.py | 11 +++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 16e6c12488b83..fd5451505eefe 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -341,6 +341,8 @@ Deprecations - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`) - :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`) - :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`) +- :meth:`Series.slice_shift` and :meth:`DataFrame.slice_shift` are deprecated, use :meth:`Series.shift` or :meth:`DataFrame.shift` instead (:issue:`37601`) + .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8050ce8b1b636..36ce2c4776bd0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9347,10 +9347,13 @@ def shift( def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: """ Equivalent to `shift` without copying data. - The shifted data will not include the dropped periods and the shifted axis will be smaller than the original. + .. deprecated:: 1.2.0 + slice_shift is deprecated, + use DataFrame/Series.shift instead. + Parameters ---------- periods : int @@ -9365,6 +9368,14 @@ def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: While the `slice_shift` is faster than `shift`, you may pay for it later during alignment. """ + + msg = ( + "The 'slice_shift' method is deprecated " + "and will be removed in a future version. " + "You can use DataFrame/Series.shift instead" + ) + warnings.warn(msg, FutureWarning, stacklevel=2) + if periods == 0: return self diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index d7aadda990f53..d16e854c25ed8 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -424,8 +424,6 @@ (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))), (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))), (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))), - (pd.Series, ([1, 2],), operator.methodcaller("slice_shift")), - (pd.DataFrame, frame_data, operator.methodcaller("slice_shift")), pytest.param( ( pd.Series, diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 930c48cbdc214..7fde448bb36dc 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -480,3 +480,14 @@ def test_flags_identity(self, frame_or_series): assert s.flags is s.flags s2 = s.copy() assert s2.flags is not s.flags + + def test_slice_shift_deprecated(self): + # GH 37601 + df = DataFrame({"A": [1, 2, 3, 4]}) + s = Series([1, 2, 3, 4]) + + with tm.assert_produces_warning(FutureWarning): + df["A"].slice_shift() + + with tm.assert_produces_warning(FutureWarning): + s.slice_shift() From 2cabd7d0e78750bb1efb2a54bf7d527f5c0c55ee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 3 Nov 2020 18:25:34 -0800 Subject: [PATCH 038/147] REF: re-use validate_setitem_value in Categorical.fillna (#37597) --- pandas/core/arrays/categorical.py | 14 ++++---------- pandas/tests/arrays/categorical/test_missing.py | 5 ++++- pandas/tests/frame/methods/test_fillna.py | 2 +- pandas/tests/indexes/categorical/test_fillna.py | 4 ++-- pandas/tests/series/methods/test_fillna.py | 6 +++--- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b1f913e9ea641..9f0414cf7a806 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1655,21 +1655,15 @@ def fillna(self, value=None, method=None, limit=None): codes = self._ndarray.copy() mask = self.isna() + new_codes = self._validate_setitem_value(value) + if isinstance(value, (np.ndarray, Categorical)): # We get ndarray or Categorical if called via Series.fillna, # where it will unwrap another aligned Series before getting here - - not_categories = ~algorithms.isin(value, self.categories) - if not isna(value[not_categories]).all(): - # All entries in `value` must either be a category or NA - raise ValueError("fill value must be in categories") - - values_codes = _get_codes_for_values(value, self.categories) - codes[mask] = values_codes[mask] + codes[mask] = new_codes[mask] else: - new_code = self._validate_fill_value(value) - codes[mask] = new_code + codes[mask] = new_codes return self._from_backing_data(codes) diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 21bea9356dcf0..364c290edc46c 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -60,7 +60,10 @@ def test_set_item_nan(self): ), (dict(), "Must specify a fill 'value' or 'method'."), (dict(method="bad"), "Invalid fill method. Expecting .* bad"), - (dict(value=Series([1, 2, 3, 4, "a"])), "fill value must be in categories"), + ( + dict(value=Series([1, 2, 3, 4, "a"])), + "Cannot setitem on a Categorical with a new category", + ), ], ) def test_fillna_raises(self, fillna_kwargs, msg): diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 9fa1aa65379c5..bbb57da39705b 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -170,7 +170,7 @@ def test_na_actions_categorical(self): res = df.fillna(value={"cats": 3, "vals": "b"}) tm.assert_frame_equal(res, df_exp_fill) - msg = "'fill_value=4' is not present in this Categorical's categories" + msg = "Cannot setitem on a Categorical with a new category" with pytest.raises(ValueError, match=msg): df.fillna(value={"cats": 4, "vals": "c"}) diff --git a/pandas/tests/indexes/categorical/test_fillna.py b/pandas/tests/indexes/categorical/test_fillna.py index f6a6747166011..c8fc55c29054e 100644 --- a/pandas/tests/indexes/categorical/test_fillna.py +++ b/pandas/tests/indexes/categorical/test_fillna.py @@ -14,7 +14,7 @@ def test_fillna_categorical(self): tm.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError - msg = "'fill_value=2.0' is not present in this Categorical's categories" + msg = "Cannot setitem on a Categorical with a new category" with pytest.raises(ValueError, match=msg): idx.fillna(2.0) @@ -36,7 +36,7 @@ def test_fillna_validates_with_no_nas(self): ci = CategoricalIndex([2, 3, 3]) cat = ci._data - msg = "'fill_value=False' is not present in this Categorical's categories" + msg = "Cannot setitem on a Categorical with a new category" with pytest.raises(ValueError, match=msg): ci.fillna(False) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index d45486b9bdb29..aaa58cdb390f7 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -653,14 +653,14 @@ def test_fillna_categorical_raises(self): data = ["a", np.nan, "b", np.nan, np.nan] ser = Series(Categorical(data, categories=["a", "b"])) - msg = "'fill_value=d' is not present in this Categorical's categories" + msg = "Cannot setitem on a Categorical with a new category" with pytest.raises(ValueError, match=msg): ser.fillna("d") - with pytest.raises(ValueError, match="fill value must be in categories"): + with pytest.raises(ValueError, match=msg): ser.fillna(Series("d")) - with pytest.raises(ValueError, match="fill value must be in categories"): + with pytest.raises(ValueError, match=msg): ser.fillna({1: "d", 3: "a"}) msg = '"value" parameter must be a scalar or dict, but you passed a "list"' From 21da60d54a118221128a542768992eb5d404fc6d Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Tue, 3 Nov 2020 20:57:03 -0600 Subject: [PATCH 039/147] PERF: release gil for ewma_time (#37389) --- pandas/_libs/window/aggregations.pyx | 49 ++++++++++++++++------------ 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index b2dbf7802e6f0..3556085bb300b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1,14 +1,13 @@ # cython: boundscheck=False, wraparound=False, cdivision=True import cython -from cython import Py_ssize_t from libcpp.deque cimport deque import numpy as np cimport numpy as cnp -from numpy cimport float32_t, float64_t, int64_t, ndarray, uint8_t +from numpy cimport float32_t, float64_t, int64_t, ndarray cnp.import_array() @@ -1398,7 +1397,7 @@ def roll_weighted_var(float64_t[:] values, float64_t[:] weights, # ---------------------------------------------------------------------- # Exponentially weighted moving average -def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times, +def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, int64_t halflife): """ Compute exponentially-weighted moving average using halflife and time @@ -1416,30 +1415,40 @@ def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times, ndarray """ cdef: - Py_ssize_t i, num_not_nan = 0, N = len(vals) + Py_ssize_t i, j, num_not_nan = 0, N = len(vals) bint is_not_nan - float64_t last_result - ndarray[uint8_t] mask = np.zeros(N, dtype=np.uint8) - ndarray[float64_t] weights, observations, output = np.empty(N, dtype=np.float64) + float64_t last_result, weights_dot, weights_sum, weight, halflife_float + float64_t[:] times_float + float64_t[:] observations = np.zeros(N, dtype=float) + float64_t[:] times_masked = np.zeros(N, dtype=float) + ndarray[float64_t] output = np.empty(N, dtype=float) if N == 0: return output + halflife_float = halflife + times_float = times.astype(float) last_result = vals[0] - for i in range(N): - is_not_nan = vals[i] == vals[i] - num_not_nan += is_not_nan - if is_not_nan: - mask[i] = 1 - weights = 0.5 ** ((times[i] - times[mask.view(np.bool_)]) / halflife) - observations = vals[mask.view(np.bool_)] - last_result = np.sum(weights * observations) / np.sum(weights) - - if num_not_nan >= minp: - output[i] = last_result - else: - output[i] = NaN + with nogil: + for i in range(N): + is_not_nan = vals[i] == vals[i] + num_not_nan += is_not_nan + if is_not_nan: + times_masked[num_not_nan-1] = times_float[i] + observations[num_not_nan-1] = vals[i] + + weights_sum = 0 + weights_dot = 0 + for j in range(num_not_nan): + weight = 0.5 ** ( + (times_float[i] - times_masked[j]) / halflife_float) + weights_sum += weight + weights_dot += weight * observations[j] + + last_result = weights_dot / weights_sum + + output[i] = last_result if num_not_nan >= minp else NaN return output From 08dc807da9063a9b0de53e0e5ce1e2b08dc18977 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Wed, 4 Nov 2020 03:59:02 +0100 Subject: [PATCH 040/147] BUG: Groupy dropped nan groups from result when grouping over single column (#36842) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/_libs/lib.pyx | 29 +++++++++++++-------- pandas/core/groupby/ops.py | 9 +++---- pandas/core/sorting.py | 11 ++++++-- pandas/tests/groupby/test_groupby.py | 7 +++++ pandas/tests/groupby/test_groupby_dropna.py | 20 +++++++++++++- pandas/tests/window/test_rolling.py | 15 +++++++++++ 7 files changed, 72 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index fd5451505eefe..e811bbc9ab7a0 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -527,6 +527,7 @@ Groupby/resample/rolling - Using :meth:`Rolling.var()` instead of :meth:`Rolling.std()` avoids numerical issues for :meth:`Rolling.corr()` when :meth:`Rolling.var()` is still within floating point precision while :meth:`Rolling.std()` is not (:issue:`31286`) - Bug in :meth:`df.groupby(..).quantile() ` and :meth:`df.resample(..).quantile() ` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`) - Bug in :meth:`Rolling.median` and :meth:`Rolling.quantile` returned wrong values for :class:`BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`) +- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e493e5e9d41d3..0b0334d52c1e9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -896,21 +896,28 @@ def indices_fast(ndarray index, const int64_t[:] labels, list keys, if lab != cur: if lab != -1: - tup = PyTuple_New(k) - for j in range(k): - val = keys[j][sorted_labels[j][i - 1]] - PyTuple_SET_ITEM(tup, j, val) - Py_INCREF(val) - + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][i - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][i - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) result[tup] = index[start:i] start = i cur = lab - tup = PyTuple_New(k) - for j in range(k): - val = keys[j][sorted_labels[j][n - 1]] - PyTuple_SET_ITEM(tup, j, val) - Py_INCREF(val) + if k == 1: + # When k = 1 we do not want to return a tuple as key + tup = keys[0][sorted_labels[0][n - 1]] + else: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][n - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) result[tup] = index[start:] return result diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index bca71b5c9646b..ccf23a6f24c42 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -229,12 +229,9 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): @cache_readonly def indices(self): """ dict {group name -> group indices} """ - if len(self.groupings) == 1: - return self.groupings[0].indices - else: - codes_list = [ping.codes for ping in self.groupings] - keys = [ping.group_index for ping in self.groupings] - return get_indexer_dict(codes_list, keys) + codes_list = [ping.codes for ping in self.groupings] + keys = [ping.group_index for ping in self.groupings] + return get_indexer_dict(codes_list, keys) @property def codes(self) -> List[np.ndarray]: diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2e32a7572adc7..e390229b5dcba 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -4,6 +4,7 @@ TYPE_CHECKING, Callable, DefaultDict, + Dict, Iterable, List, Optional, @@ -528,16 +529,22 @@ def get_flattened_list( return [tuple(array) for array in arrays.values()] -def get_indexer_dict(label_list, keys): +def get_indexer_dict( + label_list: List[np.ndarray], keys: List["Index"] +) -> Dict[Union[str, Tuple], np.ndarray]: """ Returns ------- - dict + dict: Labels mapped to indexers. """ shape = [len(x) for x in keys] group_index = get_group_index(label_list, shape, sort=True, xnull=True) + if np.all(group_index == -1): + # When all keys are nan and dropna=True, indices_fast can't handle this + # and the return is empty anyway + return {} ngroups = ( ((group_index.size and group_index.max()) + 1) if is_int64_overflow_possible(shape) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2563eeeb68672..a0c228200e73a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1298,6 +1298,13 @@ def test_groupby_nat_exclude(): grouped.get_group(pd.NaT) +def test_groupby_two_group_keys_all_nan(): + # GH #36842: Grouping over two group keys shouldn't raise an error + df = DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]}) + result = df.groupby(["a", "b"]).indices + assert result == {} + + def test_groupby_2d_malformed(): d = DataFrame(index=range(2)) d["group"] = ["g1", "g2"] diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 29a8f883f0ff5..02ce4dcf2ae2b 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -import pandas.testing as tm +import pandas._testing as tm @pytest.mark.parametrize( @@ -335,3 +335,21 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, expected = pd.DataFrame(selected_data, index=mi) tm.assert_frame_equal(result, expected) + + +def test_groupby_nan_included(): + # GH 35646 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = pd.DataFrame(data) + grouped = df.groupby("group", dropna=False) + result = grouped.indices + dtype = "int64" + expected = { + "g1": np.array([0, 2], dtype=dtype), + "g2": np.array([3], dtype=dtype), + np.nan: np.array([1, 4], dtype=dtype), + } + for result_values, expected_values in zip(result.values(), expected.values()): + tm.assert_numpy_array_equal(result_values, expected_values) + assert np.isnan(list(result.keys())[2]) + assert list(result.keys())[0:2] == ["g1", "g2"] diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 2c8439aae75e5..02bcfab8d3388 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1087,3 +1087,18 @@ def test_rolling_corr_timedelta_index(index, window): result = x.rolling(window).corr(y) expected = Series([np.nan, np.nan, 1, 1, 1], index=index) tm.assert_almost_equal(result, expected) + + +def test_groupby_rolling_nan_included(): + # GH 35542 + data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} + df = DataFrame(data) + result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean() + expected = DataFrame( + {"B": [0.0, 2.0, 3.0, 1.0, 4.0]}, + index=pd.MultiIndex.from_tuples( + [("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)], + names=["group", None], + ), + ) + tm.assert_frame_equal(result, expected) From b7a5e87b3189613ddcefe5081d70eba17b617b5f Mon Sep 17 00:00:00 2001 From: attack68 <24256554+attack68@users.noreply.github.com> Date: Wed, 4 Nov 2020 04:00:05 +0100 Subject: [PATCH 041/147] ENH: implement timeszones support for read_json(orient='table') and astype() from 'object' (#35973) --- doc/source/whatsnew/v1.2.0.rst | 3 ++ pandas/core/arrays/datetimes.py | 8 ++- pandas/io/json/_json.py | 4 +- pandas/io/json/_table_schema.py | 4 -- pandas/tests/frame/methods/test_astype.py | 24 +++++++++ .../tests/io/json/test_json_table_schema.py | 54 ++++++++++++++++--- 6 files changed, 85 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e811bbc9ab7a0..0937ec3866e12 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -217,6 +217,7 @@ Other enhancements - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`) - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) +- - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) - Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`). @@ -393,6 +394,8 @@ Datetimelike - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`) - :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`) - Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`) +- :meth:`to_json` and :meth:`read_json` now implements timezones parsing when orient structure is 'table'. +- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`). - Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`) - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`) - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f655d10881011..905242bfdd8ad 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1968,7 +1968,13 @@ def sequence_to_dt64ns( data, inferred_tz = objects_to_datetime64ns( data, dayfirst=dayfirst, yearfirst=yearfirst ) - tz = _maybe_infer_tz(tz, inferred_tz) + if tz and inferred_tz: + # two timezones: convert to intended from base UTC repr + data = tzconversion.tz_convert_from_utc(data.view("i8"), tz) + data = data.view(DT64NS_DTYPE) + elif inferred_tz: + tz = inferred_tz + data_dtype = data.dtype # `data` may have originally been a Categorical[datetime64[ns, tz]], diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 98b9a585d890e..0cc6ca984b25d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -262,7 +262,9 @@ def __init__( # NotImplemented on a column MultiIndex if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): - raise NotImplementedError("orient='table' is not supported for MultiIndex") + raise NotImplementedError( + "orient='table' is not supported for MultiIndex columns" + ) # TODO: Do this timedelta properly in objToJSON.c See GH #15137 if ( diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 2b4c86b3c4406..0499a35296490 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -323,10 +323,6 @@ def parse_table_schema(json, precise_float): for field in table["schema"]["fields"] } - # Cannot directly use as_type with timezone data on object; raise for now - if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()): - raise NotImplementedError('table="orient" can not yet read timezone data') - # No ISO constructor for Timedelta as of yet, so need to raise if "timedelta64" in dtypes.values(): raise NotImplementedError( diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index d3f256259b15f..f05c90f37ea8a 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -587,3 +587,27 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors): msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): df.astype(float, errors=errors) + + def test_astype_tz_conversion(self): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + df = DataFrame(val) + result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"}) + + expected = df + expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"]) + def test_astype_tz_object_conversion(self, tz): + # GH 35973 + val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} + expected = DataFrame(val) + + # convert expected to object dtype from other tz str (independently tested) + result = expected.astype({"tz": f"datetime64[ns, {tz}]"}) + result = result.astype({"tz": "object"}) + + # do real test: object dtype to a specified tz, different from construction tz. + result = result.astype({"tz": "datetime64[ns, Europe/London]"}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 6e35b224ef4c3..dba4b9214e50c 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -676,6 +676,11 @@ class TestTableOrientReader: {"floats": [1.0, 2.0, 3.0, 4.0]}, {"floats": [1.1, 2.2, 3.3, 4.4]}, {"bools": [True, False, False, True]}, + { + "timezones": pd.date_range( + "2016-01-01", freq="d", periods=4, tz="US/Central" + ) # added in # GH 35973 + }, ], ) @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") @@ -686,22 +691,59 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn): tm.assert_frame_equal(df, result) @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) + @pytest.mark.parametrize( + "vals", + [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}], + ) + def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): + df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) + out = df.to_json(orient="table") + with pytest.raises(NotImplementedError, match="can not yet read "): + pd.read_json(out, orient="table") + + @pytest.mark.parametrize( + "idx", + [ + pd.Index(range(4)), + pd.Index( + pd.date_range( + "2020-08-30", + freq="d", + periods=4, + ), + freq=None, + ), + pd.Index( + pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"), + freq=None, + ), + pd.MultiIndex.from_product( + [ + pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"), + ["x", "y"], + ], + ), + ], + ) @pytest.mark.parametrize( "vals", [ - {"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}, + {"floats": [1.1, 2.2, 3.3, 4.4]}, + {"dates": pd.date_range("2020-08-30", freq="d", periods=4)}, { "timezones": pd.date_range( - "2016-01-01", freq="d", periods=4, tz="US/Central" + "2020-08-30", freq="d", periods=4, tz="Europe/London" ) }, ], ) - def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): - df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) + @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309") + def test_read_json_table_timezones_orient(self, idx, vals, recwarn): + # GH 35973 + df = DataFrame(vals, index=idx) out = df.to_json(orient="table") - with pytest.raises(NotImplementedError, match="can not yet read "): - pd.read_json(out, orient="table") + result = pd.read_json(out, orient="table") + tm.assert_frame_equal(df, result) def test_comprehensive(self): df = DataFrame( From 0b441f5a6cd4b1a2efcdf9bdbbaa67595ab556ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Tue, 3 Nov 2020 23:09:16 -0500 Subject: [PATCH 042/147] REF/BUG/TYP: read_csv shouldn't close user-provided file handles (#36997) * BUG/REF: read_csv shouldn't close user-provided file handles * get_handle: typing, returns is_wrapped, use dataclass, and make sure that all created handlers are returned * remove unused imports * added IOHandleArgs.close * added IOArgs.close * mostly comments * move memory_map from TextReader to CParserWrapper * moved IOArgs and IOHandles * more comments Co-authored-by: Jeff Reback --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/_libs/parsers.pyx | 122 ++------------ pandas/_typing.py | 29 +--- pandas/core/frame.py | 6 +- pandas/io/common.py | 178 ++++++++++++++++----- pandas/io/excel/_base.py | 40 +++-- pandas/io/feather_format.py | 9 +- pandas/io/formats/csvs.py | 54 ++----- pandas/io/formats/format.py | 14 +- pandas/io/json/_json.py | 88 +++++----- pandas/io/orc.py | 1 + pandas/io/parsers.py | 92 +++++------ pandas/io/pickle.py | 42 ++--- pandas/io/sas/sas7bdat.py | 21 +-- pandas/io/sas/sas_xport.py | 19 +-- pandas/io/sas/sasreader.py | 3 +- pandas/io/stata.py | 158 ++++++++---------- pandas/tests/frame/methods/test_to_csv.py | 7 +- pandas/tests/io/json/test_readlines.py | 2 +- pandas/tests/io/parser/test_common.py | 61 ++++++- pandas/tests/io/parser/test_encoding.py | 4 + pandas/tests/io/parser/test_textreader.py | 7 +- pandas/tests/io/test_compression.py | 26 +-- pandas/tests/series/methods/test_to_csv.py | 6 +- 24 files changed, 480 insertions(+), 510 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0937ec3866e12..33e9bd0c2732a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -498,6 +498,7 @@ I/O - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`) - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`) +- :func:`read_csv` was closing user-provided binary file handles when ``engine="c"`` and an ``encoding`` was requested (:issue:`36980`) - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) Plotting diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index b87e46f9b6648..4b7a47c5f93c2 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1,15 +1,10 @@ # Copyright (c) 2012, Lambda Foundry, Inc. # See LICENSE for the license -import bz2 from csv import QUOTE_MINIMAL, QUOTE_NONE, QUOTE_NONNUMERIC from errno import ENOENT -import gzip -import io -import os import sys import time import warnings -import zipfile from libc.stdlib cimport free from libc.string cimport strcasecmp, strlen, strncpy @@ -17,7 +12,7 @@ from libc.string cimport strcasecmp, strlen, strncpy import cython from cython import Py_ssize_t -from cpython.bytes cimport PyBytes_AsString, PyBytes_FromString +from cpython.bytes cimport PyBytes_AsString from cpython.exc cimport PyErr_Fetch, PyErr_Occurred from cpython.object cimport PyObject from cpython.ref cimport Py_XDECREF @@ -67,7 +62,6 @@ from pandas._libs.khash cimport ( khiter_t, ) -from pandas.compat import get_lzma_file, import_lzma from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning from pandas.core.dtypes.common import ( @@ -82,11 +76,10 @@ from pandas.core.dtypes.common import ( ) from pandas.core.dtypes.concat import union_categoricals -lzma = import_lzma() - cdef: float64_t INF = np.inf float64_t NEGINF = -INF + int64_t DEFAULT_CHUNKSIZE = 256 * 1024 cdef extern from "headers/portable.h": @@ -275,14 +268,15 @@ cdef extern from "parser/io.h": size_t *bytes_read, int *status) -DEFAULT_CHUNKSIZE = 256 * 1024 - - cdef class TextReader: """ # source: StringIO or file object + ..versionchange:: 1.2.0 + removed 'compression', 'memory_map', and 'encoding' argument. + These arguments are outsourced to CParserWrapper. + 'source' has to be a file handle. """ cdef: @@ -299,7 +293,7 @@ cdef class TextReader: cdef public: int64_t leading_cols, table_width, skipfooter, buffer_lines - bint allow_leading_cols, mangle_dupe_cols, memory_map, low_memory + bint allow_leading_cols, mangle_dupe_cols, low_memory bint delim_whitespace object delimiter, converters object na_values @@ -307,8 +301,6 @@ cdef class TextReader: object index_col object skiprows object dtype - object encoding - object compression object usecols list dtype_cast_order set unnamed_cols @@ -321,10 +313,8 @@ cdef class TextReader: header_end=0, index_col=None, names=None, - bint memory_map=False, tokenize_chunksize=DEFAULT_CHUNKSIZE, bint delim_whitespace=False, - compression=None, converters=None, bint skipinitialspace=False, escapechar=None, @@ -332,7 +322,6 @@ cdef class TextReader: quotechar=b'"', quoting=0, lineterminator=None, - encoding=None, comment=None, decimal=b'.', thousands=None, @@ -356,15 +345,7 @@ cdef class TextReader: bint skip_blank_lines=True): # set encoding for native Python and C library - if encoding is not None: - if not isinstance(encoding, bytes): - encoding = encoding.encode('utf-8') - encoding = encoding.lower() - self.c_encoding = encoding - else: - self.c_encoding = NULL - - self.encoding = encoding + self.c_encoding = NULL self.parser = parser_new() self.parser.chunksize = tokenize_chunksize @@ -374,9 +355,6 @@ cdef class TextReader: # For timekeeping self.clocks = [] - self.compression = compression - self.memory_map = memory_map - self.parser.usecols = (usecols is not None) self._setup_parser_source(source) @@ -562,11 +540,6 @@ cdef class TextReader: parser_del(self.parser) def close(self): - # we need to properly close an open derived - # filehandle here, e.g. and UTFRecoder - if self.handle is not None: - self.handle.close() - # also preemptively free all allocated memory parser_free(self.parser) if self.true_set: @@ -614,82 +587,15 @@ cdef class TextReader: cdef: void *ptr - self.parser.cb_io = NULL - self.parser.cb_cleanup = NULL - - if self.compression: - if self.compression == 'gzip': - if isinstance(source, str): - source = gzip.GzipFile(source, 'rb') - else: - source = gzip.GzipFile(fileobj=source) - elif self.compression == 'bz2': - source = bz2.BZ2File(source, 'rb') - elif self.compression == 'zip': - zip_file = zipfile.ZipFile(source) - zip_names = zip_file.namelist() - - if len(zip_names) == 1: - file_name = zip_names.pop() - source = zip_file.open(file_name) - - elif len(zip_names) == 0: - raise ValueError(f'Zero files found in compressed ' - f'zip file {source}') - else: - raise ValueError(f'Multiple files found in compressed ' - f'zip file {zip_names}') - elif self.compression == 'xz': - if isinstance(source, str): - source = get_lzma_file(lzma)(source, 'rb') - else: - source = get_lzma_file(lzma)(filename=source) - else: - raise ValueError(f'Unrecognized compression type: ' - f'{self.compression}') - - if (self.encoding and hasattr(source, "read") and - not hasattr(source, "encoding")): - source = io.TextIOWrapper( - source, self.encoding.decode('utf-8'), newline='') - - self.encoding = b'utf-8' - self.c_encoding = self.encoding - - self.handle = source - - if isinstance(source, str): - encoding = sys.getfilesystemencoding() or "utf-8" - usource = source - source = source.encode(encoding) - - if self.memory_map: - ptr = new_mmap(source) - if ptr == NULL: - # fall back - ptr = new_file_source(source, self.parser.chunksize) - self.parser.cb_io = &buffer_file_bytes - self.parser.cb_cleanup = &del_file_source - else: - self.parser.cb_io = &buffer_mmap_bytes - self.parser.cb_cleanup = &del_mmap - else: - ptr = new_file_source(source, self.parser.chunksize) - self.parser.cb_io = &buffer_file_bytes - self.parser.cb_cleanup = &del_file_source - self.parser.source = ptr - - elif hasattr(source, 'read'): - # e.g., StringIO - - ptr = new_rd_source(source) - self.parser.source = ptr - self.parser.cb_io = &buffer_rd_bytes - self.parser.cb_cleanup = &del_rd_source - else: + if not hasattr(source, "read"): raise IOError(f'Expected file path name or file-like object, ' f'got {type(source)} type') + ptr = new_rd_source(source) + self.parser.source = ptr + self.parser.cb_io = &buffer_rd_bytes + self.parser.cb_cleanup = &del_rd_source + cdef _get_header(self): # header is now a list of lists, so field_count should use header[0] diff --git a/pandas/_typing.py b/pandas/_typing.py index 3376559fb23ff..3e89cf24632e2 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -1,6 +1,6 @@ -from dataclasses import dataclass from datetime import datetime, timedelta, tzinfo -from io import IOBase +from io import BufferedIOBase, RawIOBase, TextIOBase, TextIOWrapper +from mmap import mmap from pathlib import Path from typing import ( IO, @@ -10,7 +10,6 @@ Callable, Collection, Dict, - Generic, Hashable, List, Mapping, @@ -77,8 +76,6 @@ "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]] ] DtypeObj = Union[np.dtype, "ExtensionDtype"] -FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase] -FileOrBuffer = Union[str, IO[AnyStr], IOBase] # FrameOrSeriesUnion means either a DataFrame or a Series. E.g. # `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series @@ -133,6 +130,10 @@ "Resampler", ] +# filenames and file-like-objects +Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap] +FileOrBuffer = Union[str, Buffer[T]] +FilePathOrBuffer = Union[Path, FileOrBuffer[T]] # for arbitrary kwargs passed during reading/writing files StorageOptions = Optional[Dict[str, Any]] @@ -150,21 +151,3 @@ # type of float formatter in DataFrameFormatter FloatFormatType = Union[str, Callable, "EngFormatter"] - - -@dataclass -class IOargs(Generic[ModeVar, EncodingVar]): - """ - Return value of io/common.py:get_filepath_or_buffer. - - Note (copy&past from io/parsers): - filepath_or_buffer can be Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] - though mypy handling of conditional imports is difficult. - See https://github.com/python/mypy/issues/1297 - """ - - filepath_or_buffer: FileOrBuffer - encoding: EncodingVar - compression: CompressionDict - should_close: bool - mode: Union[ModeVar, str] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 24b89085ac121..a3130ec27713d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,6 +15,7 @@ import datetime from io import StringIO import itertools +import mmap from textwrap import dedent from typing import ( IO, @@ -2286,10 +2287,9 @@ def to_markdown( if buf is None: return result ioargs = get_filepath_or_buffer(buf, mode=mode, storage_options=storage_options) - assert not isinstance(ioargs.filepath_or_buffer, str) + assert not isinstance(ioargs.filepath_or_buffer, (str, mmap.mmap)) ioargs.filepath_or_buffer.writelines(result) - if ioargs.should_close: - ioargs.filepath_or_buffer.close() + ioargs.close() return None @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") diff --git a/pandas/io/common.py b/pandas/io/common.py index c147ae9fd0aa8..90a79e54015c4 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -2,8 +2,9 @@ import bz2 from collections import abc +import dataclasses import gzip -from io import BufferedIOBase, BytesIO, RawIOBase +from io import BufferedIOBase, BytesIO, RawIOBase, TextIOWrapper import mmap import os import pathlib @@ -13,12 +14,14 @@ Any, AnyStr, Dict, + Generic, List, Mapping, Optional, Tuple, Type, Union, + cast, ) from urllib.parse import ( urljoin, @@ -31,12 +34,12 @@ import zipfile from pandas._typing import ( + Buffer, CompressionDict, CompressionOptions, EncodingVar, FileOrBuffer, FilePathOrBuffer, - IOargs, ModeVar, StorageOptions, ) @@ -56,6 +59,76 @@ from io import IOBase +@dataclasses.dataclass +class IOArgs(Generic[ModeVar, EncodingVar]): + """ + Return value of io/common.py:get_filepath_or_buffer. + + This is used to easily close created fsspec objects. + + Note (copy&past from io/parsers): + filepath_or_buffer can be Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] + though mypy handling of conditional imports is difficult. + See https://github.com/python/mypy/issues/1297 + """ + + filepath_or_buffer: FileOrBuffer + encoding: EncodingVar + mode: Union[ModeVar, str] + compression: CompressionDict + should_close: bool = False + + def close(self) -> None: + """ + Close the buffer if it was created by get_filepath_or_buffer. + """ + if self.should_close: + assert not isinstance(self.filepath_or_buffer, str) + try: + self.filepath_or_buffer.close() + except (OSError, ValueError): + pass + self.should_close = False + + +@dataclasses.dataclass +class IOHandles: + """ + Return value of io/common.py:get_handle + + This is used to easily close created buffers and to handle corner cases when + TextIOWrapper is inserted. + + handle: The file handle to be used. + created_handles: All file handles that are created by get_handle + is_wrapped: Whether a TextIOWrapper needs to be detached. + """ + + handle: Buffer + created_handles: List[Buffer] = dataclasses.field(default_factory=list) + is_wrapped: bool = False + + def close(self) -> None: + """ + Close all created buffers. + + Note: If a TextIOWrapper was inserted, it is flushed and detached to + avoid closing the potentially user-created buffer. + """ + if self.is_wrapped: + assert isinstance(self.handle, TextIOWrapper) + self.handle.flush() + self.handle.detach() + self.created_handles.remove(self.handle) + try: + for handle in self.created_handles: + handle.close() + except (OSError, ValueError): + pass + self.created_handles = [] + self.is_wrapped = False + + def is_url(url) -> bool: """ Check to see if a URL has a valid protocol. @@ -176,7 +249,7 @@ def get_filepath_or_buffer( compression: CompressionOptions = None, mode: ModeVar = None, # type: ignore[assignment] storage_options: StorageOptions = None, -) -> IOargs[ModeVar, EncodingVar]: +) -> IOArgs[ModeVar, EncodingVar]: """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. @@ -201,7 +274,7 @@ def get_filepath_or_buffer( ..versionchange:: 1.2.0 - Returns the dataclass IOargs. + Returns the dataclass IOArgs. """ filepath_or_buffer = stringify_path(filepath_or_buffer) @@ -225,6 +298,10 @@ def get_filepath_or_buffer( compression = dict(compression, method=compression_method) + # uniform encoding names + if encoding is not None: + encoding = encoding.replace("_", "-").lower() + # bz2 and xz do not write the byte order mark for utf-16 and utf-32 # print a warning when writing such files if ( @@ -258,7 +335,7 @@ def get_filepath_or_buffer( compression = {"method": "gzip"} reader = BytesIO(req.read()) req.close() - return IOargs( + return IOArgs( filepath_or_buffer=reader, encoding=encoding, compression=compression, @@ -310,7 +387,7 @@ def get_filepath_or_buffer( filepath_or_buffer, mode=fsspec_mode, **(storage_options or {}) ).open() - return IOargs( + return IOArgs( filepath_or_buffer=file_obj, encoding=encoding, compression=compression, @@ -323,7 +400,7 @@ def get_filepath_or_buffer( ) if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): - return IOargs( + return IOArgs( filepath_or_buffer=_expand_user(filepath_or_buffer), encoding=encoding, compression=compression, @@ -335,7 +412,7 @@ def get_filepath_or_buffer( msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}" raise ValueError(msg) - return IOargs( + return IOArgs( filepath_or_buffer=filepath_or_buffer, encoding=encoding, compression=compression, @@ -455,14 +532,14 @@ def infer_compression( def get_handle( - path_or_buf, + path_or_buf: FilePathOrBuffer, mode: str, - encoding=None, + encoding: Optional[str] = None, compression: CompressionOptions = None, memory_map: bool = False, is_text: bool = True, - errors=None, -): + errors: Optional[str] = None, +) -> IOHandles: """ Get file handle for given path/buffer and mode. @@ -506,14 +583,9 @@ def get_handle( See the errors argument for :func:`open` for a full list of options. - .. versionadded:: 1.1.0 + .. versionchanged:: 1.2.0 - Returns - ------- - f : file-like - A file-like object. - handles : list of file-like objects - A list of file-like object that were opened in this function. + Returns the dataclass IOHandles """ need_text_wrapping: Tuple[Type["IOBase"], ...] try: @@ -532,12 +604,16 @@ def get_handle( except ImportError: pass - handles: List[Union[IO, _MMapWrapper]] = list() - f = path_or_buf + handles: List[Buffer] = list() + + # Windows does not default to utf-8. Set to utf-8 for a consistent behavior + if encoding is None: + encoding = "utf-8" # Convert pathlib.Path/py.path.local or string path_or_buf = stringify_path(path_or_buf) is_path = isinstance(path_or_buf, str) + f = path_or_buf compression, compression_args = get_compression_method(compression) if is_path: @@ -548,25 +624,29 @@ def get_handle( # GZ Compression if compression == "gzip": if is_path: + assert isinstance(path_or_buf, str) f = gzip.GzipFile(filename=path_or_buf, mode=mode, **compression_args) else: - f = gzip.GzipFile(fileobj=path_or_buf, mode=mode, **compression_args) + f = gzip.GzipFile( + fileobj=path_or_buf, # type: ignore[arg-type] + mode=mode, + **compression_args, + ) # BZ Compression elif compression == "bz2": - f = bz2.BZ2File(path_or_buf, mode=mode, **compression_args) + f = bz2.BZ2File( + path_or_buf, mode=mode, **compression_args # type: ignore[arg-type] + ) # ZIP Compression elif compression == "zip": - zf = _BytesZipFile(path_or_buf, mode, **compression_args) - # Ensure the container is closed as well. - handles.append(zf) - if zf.mode == "w": - f = zf - elif zf.mode == "r": - zip_names = zf.namelist() + f = _BytesZipFile(path_or_buf, mode, **compression_args) + if f.mode == "r": + handles.append(f) + zip_names = f.namelist() if len(zip_names) == 1: - f = zf.open(zip_names.pop()) + f = f.open(zip_names.pop()) elif len(zip_names) == 0: raise ValueError(f"Zero files found in ZIP file {path_or_buf}") else: @@ -584,36 +664,40 @@ def get_handle( msg = f"Unrecognized compression type: {compression}" raise ValueError(msg) + assert not isinstance(f, str) handles.append(f) elif is_path: # Check whether the filename is to be opened in binary mode. # Binary mode does not support 'encoding' and 'newline'. is_binary_mode = "b" in mode - + assert isinstance(path_or_buf, str) if encoding and not is_binary_mode: # Encoding f = open(path_or_buf, mode, encoding=encoding, errors=errors, newline="") - elif is_text and not is_binary_mode: - # No explicit encoding - f = open(path_or_buf, mode, errors="replace", newline="") else: # Binary mode f = open(path_or_buf, mode) handles.append(f) # Convert BytesIO or file objects passed with an encoding - if is_text and (compression or isinstance(f, need_text_wrapping)): - from io import TextIOWrapper - - g = TextIOWrapper(f, encoding=encoding, errors=errors, newline="") - if not isinstance(f, (BufferedIOBase, RawIOBase)): - handles.append(g) - f = g + is_wrapped = False + if is_text and ( + compression + or isinstance(f, need_text_wrapping) + or "b" in getattr(f, "mode", "") + ): + f = TextIOWrapper( + f, encoding=encoding, errors=errors, newline="" # type: ignore[arg-type] + ) + handles.append(f) + # do not mark as wrapped when the user provided a string + is_wrapped = not is_path if memory_map and hasattr(f, "fileno"): + assert not isinstance(f, str) try: - wrapped = _MMapWrapper(f) + wrapped = cast(mmap.mmap, _MMapWrapper(f)) # type: ignore[arg-type] f.close() handles.remove(f) handles.append(wrapped) @@ -625,7 +709,13 @@ def get_handle( # leave the file handler as is then pass - return f, handles + handles.reverse() # close the most recently added buffer first + assert not isinstance(f, str) + return IOHandles( + handle=f, + created_handles=handles, + is_wrapped=is_wrapped, + ) # error: Definition of "__exit__" in base class "ZipFile" is incompatible with diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 3461652f4ea24..03c61c3ed8376 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -17,6 +17,7 @@ from pandas.core.frame import DataFrame from pandas.io.common import ( + IOArgs, get_filepath_or_buffer, is_url, stringify_path, @@ -349,24 +350,37 @@ def read_excel( class BaseExcelReader(metaclass=abc.ABCMeta): def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): + self.ioargs = IOArgs( + filepath_or_buffer=filepath_or_buffer, + encoding=None, + mode=None, + compression={"method": None}, + ) # If filepath_or_buffer is a url, load the data into a BytesIO if is_url(filepath_or_buffer): - filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) + self.ioargs = IOArgs( + filepath_or_buffer=BytesIO(urlopen(filepath_or_buffer).read()), + should_close=True, + encoding=None, + mode=None, + compression={"method": None}, + ) elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): - filepath_or_buffer = get_filepath_or_buffer( + self.ioargs = get_filepath_or_buffer( filepath_or_buffer, storage_options=storage_options - ).filepath_or_buffer + ) - if isinstance(filepath_or_buffer, self._workbook_class): - self.book = filepath_or_buffer - elif hasattr(filepath_or_buffer, "read"): + if isinstance(self.ioargs.filepath_or_buffer, self._workbook_class): + self.book = self.ioargs.filepath_or_buffer + elif hasattr(self.ioargs.filepath_or_buffer, "read"): # N.B. xlrd.Book has a read attribute too - filepath_or_buffer.seek(0) - self.book = self.load_workbook(filepath_or_buffer) - elif isinstance(filepath_or_buffer, str): - self.book = self.load_workbook(filepath_or_buffer) - elif isinstance(filepath_or_buffer, bytes): - self.book = self.load_workbook(BytesIO(filepath_or_buffer)) + assert not isinstance(self.ioargs.filepath_or_buffer, str) + self.ioargs.filepath_or_buffer.seek(0) + self.book = self.load_workbook(self.ioargs.filepath_or_buffer) + elif isinstance(self.ioargs.filepath_or_buffer, str): + self.book = self.load_workbook(self.ioargs.filepath_or_buffer) + elif isinstance(self.ioargs.filepath_or_buffer, bytes): + self.book = self.load_workbook(BytesIO(self.ioargs.filepath_or_buffer)) else: raise ValueError( "Must explicitly set engine if not passing in buffer or path for io." @@ -382,7 +396,7 @@ def load_workbook(self, filepath_or_buffer): pass def close(self): - pass + self.ioargs.close() @property @abc.abstractmethod diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 9a42b8289ab47..198acd5862d45 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -81,9 +81,7 @@ def to_feather( feather.write_feather(df, ioargs.filepath_or_buffer, **kwargs) - if ioargs.should_close: - assert not isinstance(ioargs.filepath_or_buffer, str) - ioargs.filepath_or_buffer.close() + ioargs.close() def read_feather( @@ -137,9 +135,6 @@ def read_feather( ioargs.filepath_or_buffer, columns=columns, use_threads=bool(use_threads) ) - # s3fs only validates the credentials when the file is closed. - if ioargs.should_close: - assert not isinstance(ioargs.filepath_or_buffer, str) - ioargs.filepath_or_buffer.close() + ioargs.close() return df diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 6c62d6825bc84..20226dbb3c9d4 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -3,7 +3,6 @@ """ import csv as csvlib -from io import StringIO, TextIOWrapper import os from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Sequence, Union @@ -39,7 +38,7 @@ class CSVFormatter: def __init__( self, formatter: "DataFrameFormatter", - path_or_buf: Optional[FilePathOrBuffer[str]] = None, + path_or_buf: FilePathOrBuffer[str] = "", sep: str = ",", cols: Optional[Sequence[Label]] = None, index_label: Optional[IndexLabel] = None, @@ -60,25 +59,14 @@ def __init__( self.obj = self.fmt.frame - self.encoding = encoding or "utf-8" - - if path_or_buf is None: - path_or_buf = StringIO() - - ioargs = get_filepath_or_buffer( + self.ioargs = get_filepath_or_buffer( path_or_buf, - encoding=self.encoding, + encoding=encoding, compression=compression, mode=mode, storage_options=storage_options, ) - self.compression = ioargs.compression.pop("method") - self.compression_args = ioargs.compression - self.path_or_buf = ioargs.filepath_or_buffer - self.should_close = ioargs.should_close - self.mode = ioargs.mode - self.sep = sep self.index_label = self._initialize_index_label(index_label) self.errors = errors @@ -238,20 +226,19 @@ def save(self) -> None: """ Create the writer & save. """ - # get a handle or wrap an existing handle to take care of 1) compression and - # 2) text -> byte conversion - f, handles = get_handle( - self.path_or_buf, - self.mode, - encoding=self.encoding, + # apply compression and byte/text conversion + handles = get_handle( + self.ioargs.filepath_or_buffer, + self.ioargs.mode, + encoding=self.ioargs.encoding, errors=self.errors, - compression=dict(self.compression_args, method=self.compression), + compression=self.ioargs.compression, ) try: # Note: self.encoding is irrelevant here self.writer = csvlib.writer( - f, + handles.handle, # type: ignore[arg-type] lineterminator=self.line_terminator, delimiter=self.sep, quoting=self.quoting, @@ -263,23 +250,10 @@ def save(self) -> None: self._save() finally: - if self.should_close: - f.close() - elif ( - isinstance(f, TextIOWrapper) - and not f.closed - and f != self.path_or_buf - and hasattr(self.path_or_buf, "write") - ): - # get_handle uses TextIOWrapper for non-binary handles. TextIOWrapper - # closes the wrapped handle if it is not detached. - f.flush() # make sure everything is written - f.detach() # makes f unusable - del f - elif f != self.path_or_buf: - f.close() - for _fh in handles: - _fh.close() + # close compression and byte/text wrapper + handles.close() + # close any fsspec-like objects + self.ioargs.close() def _save(self) -> None: if self._need_to_save_header: diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3c759f477899b..43e76d0aef490 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1046,8 +1046,12 @@ def to_csv( """ from pandas.io.formats.csvs import CSVFormatter + created_buffer = path_or_buf is None + if created_buffer: + path_or_buf = StringIO() + csv_formatter = CSVFormatter( - path_or_buf=path_or_buf, + path_or_buf=path_or_buf, # type: ignore[arg-type] line_terminator=line_terminator, sep=sep, encoding=encoding, @@ -1067,9 +1071,11 @@ def to_csv( ) csv_formatter.save() - if path_or_buf is None: - assert isinstance(csv_formatter.path_or_buf, StringIO) - return csv_formatter.path_or_buf.getvalue() + if created_buffer: + assert isinstance(path_or_buf, StringIO) + content = path_or_buf.getvalue() + path_or_buf.close() + return content return None diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 0cc6ca984b25d..040279b9f3e67 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1,10 +1,10 @@ from abc import ABC, abstractmethod from collections import abc import functools -from io import BytesIO, StringIO +from io import StringIO from itertools import islice import os -from typing import IO, Any, Callable, List, Mapping, Optional, Tuple, Type, Union +from typing import Any, Callable, Mapping, Optional, Tuple, Type, Union import numpy as np @@ -26,7 +26,12 @@ from pandas.core.generic import NDFrame from pandas.core.reshape.concat import concat -from pandas.io.common import get_compression_method, get_filepath_or_buffer, get_handle +from pandas.io.common import ( + IOHandles, + get_compression_method, + get_filepath_or_buffer, + get_handle, +) from pandas.io.json._normalize import convert_to_line_delimits from pandas.io.json._table_schema import build_table_schema, parse_table_schema from pandas.io.parsers import validate_integer @@ -59,17 +64,6 @@ def to_json( "'index=False' is only valid when 'orient' is 'split' or 'table'" ) - if path_or_buf is not None: - ioargs = get_filepath_or_buffer( - path_or_buf, - compression=compression, - mode="wt", - storage_options=storage_options, - ) - path_or_buf = ioargs.filepath_or_buffer - should_close = ioargs.should_close - compression = ioargs.compression - if lines and orient != "records": raise ValueError("'lines' keyword only valid when 'orient' is records") @@ -101,20 +95,27 @@ def to_json( if lines: s = convert_to_line_delimits(s) - if isinstance(path_or_buf, str): - fh, handles = get_handle(path_or_buf, "w", compression=compression) + if path_or_buf is not None: + # open fsspec URLs + ioargs = get_filepath_or_buffer( + path_or_buf, + compression=compression, + mode="wt", + storage_options=storage_options, + ) + # apply compression and byte/text conversion + handles = get_handle( + ioargs.filepath_or_buffer, "w", compression=ioargs.compression + ) try: - fh.write(s) + handles.handle.write(s) finally: - fh.close() - for handle in handles: - handle.close() - elif path_or_buf is None: - return s + # close compression and byte/text wrapper + handles.close() + # close any fsspec-like objects + ioargs.close() else: - path_or_buf.write(s) - if should_close: - path_or_buf.close() + return s class Writer(ABC): @@ -547,12 +548,10 @@ def read_json( dtype = True if convert_axes is None and orient != "table": convert_axes = True - if encoding is None: - encoding = "utf-8" ioargs = get_filepath_or_buffer( path_or_buf, - encoding=encoding, + encoding=encoding or "utf-8", compression=compression, storage_options=storage_options, ) @@ -579,9 +578,7 @@ def read_json( return json_reader result = json_reader.read() - if ioargs.should_close: - assert not isinstance(ioargs.filepath_or_buffer, str) - ioargs.filepath_or_buffer.close() + ioargs.close() return result @@ -631,9 +628,8 @@ def __init__( self.lines = lines self.chunksize = chunksize self.nrows_seen = 0 - self.should_close = False self.nrows = nrows - self.file_handles: List[IO] = [] + self.handles: Optional[IOHandles] = None if self.chunksize is not None: self.chunksize = validate_integer("chunksize", self.chunksize, 1) @@ -672,30 +668,25 @@ def _get_data_from_filepath(self, filepath_or_buffer): This method turns (1) into (2) to simplify the rest of the processing. It returns input types (2) and (3) unchanged. """ - data = filepath_or_buffer - + # if it is a string but the file does not exist, it might be a JSON string exists = False - if isinstance(data, str): + if isinstance(filepath_or_buffer, str): try: exists = os.path.exists(filepath_or_buffer) # gh-5874: if the filepath is too long will raise here except (TypeError, ValueError): pass - if exists or self.compression["method"] is not None: - data, self.file_handles = get_handle( + if exists or not isinstance(filepath_or_buffer, str): + self.handles = get_handle( filepath_or_buffer, "r", encoding=self.encoding, compression=self.compression, ) - self.should_close = True - self.open_stream = data - - if isinstance(data, BytesIO): - data = data.getvalue().decode() + filepath_or_buffer = self.handles.handle - return data + return filepath_or_buffer def _combine_lines(self, lines) -> str: """ @@ -759,13 +750,8 @@ def close(self): If an open stream or file was passed, we leave it open. """ - if self.should_close: - try: - self.open_stream.close() - except (OSError, AttributeError): - pass - for file_handle in self.file_handles: - file_handle.close() + if self.handles is not None: + self.handles.close() def __next__(self): if self.nrows: diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 829ff6408d86d..5a734f0878a0c 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -53,4 +53,5 @@ def read_orc( ioargs = get_filepath_or_buffer(path) orc_file = pyarrow.orc.ORCFile(ioargs.filepath_or_buffer) result = orc_file.read(columns=columns, **kwargs).to_pandas() + ioargs.close() return result diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 2110a2d400be8..3b72869188344 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -5,7 +5,7 @@ from collections import abc, defaultdict import csv import datetime -from io import StringIO, TextIOWrapper +from io import StringIO import itertools import re import sys @@ -63,7 +63,13 @@ from pandas.core.series import Series from pandas.core.tools import datetimes as tools -from pandas.io.common import get_filepath_or_buffer, get_handle, validate_header_arg +from pandas.io.common import ( + get_compression_method, + get_filepath_or_buffer, + get_handle, + stringify_path, + validate_header_arg, +) from pandas.io.date_converters import generic_parser # BOM character (byte order mark) @@ -428,17 +434,16 @@ def _validate_names(names): def _read(filepath_or_buffer: FilePathOrBuffer, kwds): """Generic reader of line files.""" - encoding = kwds.get("encoding", None) storage_options = kwds.get("storage_options", None) - if encoding is not None: - encoding = re.sub("_", "-", encoding).lower() - kwds["encoding"] = encoding - compression = kwds.get("compression", "infer") ioargs = get_filepath_or_buffer( - filepath_or_buffer, encoding, compression, storage_options=storage_options + filepath_or_buffer, + kwds.get("encoding", None), + kwds.get("compression", "infer"), + storage_options=storage_options, ) kwds["compression"] = ioargs.compression + kwds["encoding"] = ioargs.encoding if kwds.get("date_parser", None) is not None: if isinstance(kwds["parse_dates"], bool): @@ -461,14 +466,10 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): try: data = parser.read(nrows) finally: + # close compression and byte/text wrapper parser.close() - - if ioargs.should_close: - assert not isinstance(ioargs.filepath_or_buffer, str) - try: - ioargs.filepath_or_buffer.close() - except ValueError: - pass + # close any fsspec-like objects + ioargs.close() return data @@ -1350,10 +1351,6 @@ def __init__(self, kwds): self._first_chunk = True - # GH 13932 - # keep references to file handles opened by the parser itself - self.handles = [] - def _validate_parse_dates_presence(self, columns: List[str]) -> None: """ Check if parse_dates are in columns. @@ -1403,8 +1400,7 @@ def _validate_parse_dates_presence(self, columns: List[str]) -> None: ) def close(self): - for f in self.handles: - f.close() + self.handles.close() @property def _has_complex_date_col(self): @@ -1838,23 +1834,29 @@ def __init__(self, src, **kwds): ParserBase.__init__(self, kwds) - encoding = kwds.get("encoding") + if kwds.get("memory_map", False): + # memory-mapped files are directly handled by the TextReader. + src = stringify_path(src) - # parsers.TextReader doesn't support compression dicts - if isinstance(kwds.get("compression"), dict): - kwds["compression"] = kwds["compression"]["method"] - - if kwds.get("compression") is None and encoding: - if isinstance(src, str): - src = open(src, "rb") - self.handles.append(src) - - # Handle the file object with universal line mode enabled. - # We will handle the newline character ourselves later on. - if hasattr(src, "read") and not hasattr(src, "encoding"): - src = TextIOWrapper(src, encoding=encoding, newline="") + if get_compression_method(kwds.get("compression", None))[0] is not None: + raise ValueError( + "read_csv does not support compression with memory_map=True. " + + "Please use memory_map=False instead." + ) - kwds["encoding"] = "utf-8" + self.handles = get_handle( + src, + mode="r", + encoding=kwds.get("encoding", None), + compression=kwds.get("compression", None), + memory_map=kwds.get("memory_map", False), + is_text=True, + ) + kwds.pop("encoding", None) + kwds.pop("memory_map", None) + kwds.pop("compression", None) + if kwds.get("memory_map", False) and hasattr(self.handles.handle, "mmap"): + self.handles.handle = self.handles.handle.mmap # #2442 kwds["allow_leading_cols"] = self.index_col is not False @@ -1863,7 +1865,7 @@ def __init__(self, src, **kwds): self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"]) kwds["usecols"] = self.usecols - self._reader = parsers.TextReader(src, **kwds) + self._reader = parsers.TextReader(self.handles.handle, **kwds) self.unnamed_cols = self._reader.unnamed_cols passed_names = self.names is None @@ -1942,11 +1944,10 @@ def __init__(self, src, **kwds): self._implicit_index = self._reader.leading_cols > 0 - def close(self): - for f in self.handles: - f.close() + def close(self) -> None: + super().close() - # close additional handles opened by C parser (for compression) + # close additional handles opened by C parser try: self._reader.close() except ValueError: @@ -2237,20 +2238,19 @@ def __init__(self, f, **kwds): self.comment = kwds["comment"] self._comment_lines = [] - f, handles = get_handle( + self.handles = get_handle( f, "r", encoding=self.encoding, compression=self.compression, memory_map=self.memory_map, ) - self.handles.extend(handles) # Set self.data to something that can read lines. - if hasattr(f, "readline"): - self._make_reader(f) + if hasattr(self.handles.handle, "readline"): + self._make_reader(self.handles.handle) else: - self.data = f + self.data = self.handles.handle # Get columns in two steps: infer from data, then # infer column indices from self.usecols if it is specified. diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 426a40a65b522..6fa044b4651a5 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -92,25 +92,18 @@ def to_pickle( mode="wb", storage_options=storage_options, ) - f, fh = get_handle( + handles = get_handle( ioargs.filepath_or_buffer, "wb", compression=ioargs.compression, is_text=False ) if protocol < 0: protocol = pickle.HIGHEST_PROTOCOL try: - pickle.dump(obj, f, protocol=protocol) + pickle.dump(obj, handles.handle, protocol=protocol) # type: ignore[arg-type] finally: - if f != filepath_or_buffer: - # do not close user-provided file objects GH 35679 - f.close() - for _f in fh: - _f.close() - if ioargs.should_close: - assert not isinstance(ioargs.filepath_or_buffer, str) - try: - ioargs.filepath_or_buffer.close() - except ValueError: - pass + # close compression and byte/text wrapper + handles.close() + # close any fsspec-like objects + ioargs.close() def read_pickle( @@ -193,7 +186,7 @@ def read_pickle( ioargs = get_filepath_or_buffer( filepath_or_buffer, compression=compression, storage_options=storage_options ) - f, fh = get_handle( + handles = get_handle( ioargs.filepath_or_buffer, "rb", compression=ioargs.compression, is_text=False ) @@ -208,24 +201,17 @@ def read_pickle( with warnings.catch_warnings(record=True): # We want to silence any warnings about, e.g. moved modules. warnings.simplefilter("ignore", Warning) - return pickle.load(f) + return pickle.load(handles.handle) # type: ignore[arg-type] except excs_to_catch: # e.g. # "No module named 'pandas.core.sparse.series'" # "Can't get attribute '__nat_unpickle' on None: def close(self) -> None: """ close the handle if its open """ - try: - self.path_or_buf.close() - except OSError: - pass + self.ioargs.close() def _set_encoding(self) -> None: """ @@ -1938,7 +1936,7 @@ def _open_file_binary_write( fname: FilePathOrBuffer, compression: CompressionOptions, storage_options: StorageOptions = None, -) -> Tuple[BinaryIO, bool, CompressionOptions]: +) -> Tuple[IOHandles, CompressionOptions]: """ Open a binary file or no-op if file-like. @@ -1958,34 +1956,22 @@ def _open_file_binary_write( docs for the set of allowed keys and values .. versionadded:: 1.2.0 - - Returns - ------- - file : file-like object - File object supporting write - own : bool - True if the file was created, otherwise False """ - if hasattr(fname, "write"): - # See https://github.com/python/mypy/issues/1424 for hasattr challenges - # error: Incompatible return value type (got "Tuple[Union[str, Path, - # IO[Any]], bool, None]", expected "Tuple[BinaryIO, bool, Union[str, - # Mapping[str, str], None]]") - return fname, False, None # type: ignore[return-value] - elif isinstance(fname, (str, Path)): - # Extract compression mode as given, if dict - ioargs = get_filepath_or_buffer( - fname, mode="wb", compression=compression, storage_options=storage_options - ) - f, _ = get_handle( - ioargs.filepath_or_buffer, - "wb", - compression=ioargs.compression, - is_text=False, - ) - return f, True, ioargs.compression - else: - raise TypeError("fname must be a binary file, buffer or path-like.") + ioargs = get_filepath_or_buffer( + fname, mode="wb", compression=compression, storage_options=storage_options + ) + handles = get_handle( + ioargs.filepath_or_buffer, + "wb", + compression=ioargs.compression, + is_text=False, + ) + if ioargs.filepath_or_buffer != fname and not isinstance( + ioargs.filepath_or_buffer, str + ): + # add handle created by get_filepath_or_buffer + handles.created_handles.append(ioargs.filepath_or_buffer) + return handles, ioargs.compression def _set_endianness(endianness: str) -> str: @@ -2236,9 +2222,8 @@ def __init__( self._time_stamp = time_stamp self._data_label = data_label self._variable_labels = variable_labels - self._own_file = True self._compression = compression - self._output_file: Optional[BinaryIO] = None + self._output_file: Optional[Buffer] = None # attach nobs, nvars, data, varlist, typlist self._prepare_pandas(data) self.storage_options = storage_options @@ -2249,21 +2234,20 @@ def __init__( self._fname = stringify_path(fname) self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} self._converted_names: Dict[Label, str] = {} - self._file: Optional[BinaryIO] = None def _write(self, to_write: str) -> None: """ Helper to call encode before writing to file for Python 3 compat. """ - assert self._file is not None - self._file.write(to_write.encode(self._encoding)) + self.handles.handle.write( + to_write.encode(self._encoding) # type: ignore[arg-type] + ) def _write_bytes(self, value: bytes) -> None: """ Helper to assert file is open before writing. """ - assert self._file is not None - self._file.write(value) + self.handles.handle.write(value) # type: ignore[arg-type] def _prepare_categoricals(self, data: DataFrame) -> DataFrame: """ @@ -2527,12 +2511,14 @@ def _encode_strings(self) -> None: self.data[col] = encoded def write_file(self) -> None: - self._file, self._own_file, compression = _open_file_binary_write( + self.handles, compression = _open_file_binary_write( self._fname, self._compression, storage_options=self.storage_options ) if compression is not None: - self._output_file = self._file - self._file = BytesIO() + # ZipFile creates a file (with the same name) for each write call. + # Write it first into a buffer and then write the buffer to the ZipFile. + self._output_file = self.handles.handle + self.handles.handle = BytesIO() try: self._write_header(data_label=self._data_label, time_stamp=self._time_stamp) self._write_map() @@ -2552,10 +2538,9 @@ def write_file(self) -> None: self._write_map() except Exception as exc: self._close() - if self._own_file: + if isinstance(self._fname, (str, Path)): try: - if isinstance(self._fname, (str, Path)): - os.unlink(self._fname) + os.unlink(self._fname) except OSError: warnings.warn( f"This save was not successful but {self._fname} could not " @@ -2571,24 +2556,18 @@ def _close(self) -> None: Close the file if it was created by the writer. If a buffer or file-like object was passed in, for example a GzipFile, - then leave this file open for the caller to close. In either case, - attempt to flush the file contents to ensure they are written to disk - (if supported) + then leave this file open for the caller to close. """ - # Some file-like objects might not support flush - assert self._file is not None + # write compression if self._output_file is not None: - assert isinstance(self._file, BytesIO) - bio = self._file + assert isinstance(self.handles.handle, BytesIO) + bio = self.handles.handle bio.seek(0) - self._file = self._output_file - self._file.write(bio.read()) - try: - self._file.flush() - except AttributeError: - pass - if self._own_file: - self._file.close() + self.handles.handle = self._output_file + self.handles.handle.write(bio.read()) # type: ignore[arg-type] + bio.close() + # close any created handles + self.handles.close() def _write_map(self) -> None: """No-op, future compatibility""" @@ -3140,8 +3119,8 @@ def _tag(val: Union[str, bytes], tag: str) -> bytes: def _update_map(self, tag: str) -> None: """Update map location for tag with file position""" - assert self._file is not None - self._map[tag] = self._file.tell() + assert self.handles.handle is not None + self._map[tag] = self.handles.handle.tell() def _write_header( self, @@ -3208,12 +3187,11 @@ def _write_map(self) -> None: the map with 0s. The second call writes the final map locations when all blocks have been written. """ - assert self._file is not None if not self._map: self._map = dict( ( ("stata_data", 0), - ("map", self._file.tell()), + ("map", self.handles.handle.tell()), ("variable_types", 0), ("varnames", 0), ("sortlist", 0), @@ -3229,7 +3207,7 @@ def _write_map(self) -> None: ) ) # Move to start of map - self._file.seek(self._map["map"]) + self.handles.handle.seek(self._map["map"]) bio = BytesIO() for val in self._map.values(): bio.write(struct.pack(self._byteorder + "Q", val)) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 5bf1ce508dfc4..3103f6e1ba0b1 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -1034,11 +1034,12 @@ def test_to_csv_compression(self, df, encoding, compression): tm.assert_frame_equal(df, result) # test the round trip using file handle - to_csv -> read_csv - f, _handles = get_handle( + handles = get_handle( filename, "w", compression=compression, encoding=encoding ) - with f: - df.to_csv(f, encoding=encoding) + df.to_csv(handles.handle, encoding=encoding) + assert not handles.handle.closed + handles.close() result = pd.read_csv( filename, compression=compression, diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 933bdc462e3f8..2e68d3306c7d1 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -143,7 +143,7 @@ def test_readjson_chunks_closes(chunksize): ) reader.read() assert ( - reader.open_stream.closed + reader.handles.handle.closed ), f"didn't close stream with chunksize = {chunksize}" diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index b33289213e258..e61a5fce99c69 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -6,7 +6,7 @@ import csv from datetime import datetime from inspect import signature -from io import StringIO +from io import BytesIO, StringIO import os import platform from urllib.error import URLError @@ -2253,3 +2253,62 @@ def test_dict_keys_as_names(all_parsers): result = parser.read_csv(StringIO(data), names=keys) expected = DataFrame({"a": [1], "b": [2]}) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("io_class", [StringIO, BytesIO]) +@pytest.mark.parametrize("encoding", [None, "utf-8"]) +def test_read_csv_file_handle(all_parsers, io_class, encoding): + """ + Test whether read_csv does not close user-provided file handles. + + GH 36980 + """ + parser = all_parsers + expected = DataFrame({"a": [1], "b": [2]}) + + content = "a,b\n1,2" + if io_class == BytesIO: + content = content.encode("utf-8") + handle = io_class(content) + + tm.assert_frame_equal(parser.read_csv(handle, encoding=encoding), expected) + assert not handle.closed + + +def test_memory_map_compression_error(c_parser_only): + """ + c-parsers do not support memory_map=True with compression. + + GH 36997 + """ + parser = c_parser_only + df = DataFrame({"a": [1], "b": [2]}) + msg = ( + "read_csv does not support compression with memory_map=True. " + + "Please use memory_map=False instead." + ) + + with tm.ensure_clean() as path: + df.to_csv(path, compression="gzip", index=False) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(path, memory_map=True, compression="gzip") + + +def test_memory_map_file_handle(all_parsers): + """ + Support some buffers with memory_map=True. + + GH 36997 + """ + parser = all_parsers + expected = DataFrame({"a": [1], "b": [2]}) + + handle = StringIO() + expected.to_csv(handle, index=False) + handle.seek(0) + + tm.assert_frame_equal( + parser.read_csv(handle, memory_map=True), + expected, + ) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 876696ecdad9c..e74265da3e966 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -152,14 +152,17 @@ def test_binary_mode_file_buffers( with open(fpath, mode="r", encoding=encoding) as fa: result = parser.read_csv(fa) + assert not fa.closed tm.assert_frame_equal(expected, result) with open(fpath, mode="rb") as fb: result = parser.read_csv(fb, encoding=encoding) + assert not fb.closed tm.assert_frame_equal(expected, result) with open(fpath, mode="rb", buffering=0) as fb: result = parser.read_csv(fb, encoding=encoding) + assert not fb.closed tm.assert_frame_equal(expected, result) @@ -199,6 +202,7 @@ def test_encoding_named_temp_file(all_parsers): result = parser.read_csv(f, encoding=encoding) tm.assert_frame_equal(result, expected) + assert not f.closed @pytest.mark.parametrize( diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 1c2518646bb29..413b78a52ad38 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -31,13 +31,10 @@ def test_file_handle(self): reader = TextReader(f) reader.read() - def test_string_filename(self): - reader = TextReader(self.csv1, header=None) - reader.read() - def test_file_handle_mmap(self): + # this was never using memory_map=True with open(self.csv1, "rb") as f: - reader = TextReader(f, memory_map=True, header=None) + reader = TextReader(f, header=None) reader.read() def test_StringIO(self): diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 31e9ad4cf4416..8d7d5d85cbb48 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -47,18 +47,18 @@ def test_compression_size(obj, method, compression_only): @pytest.mark.parametrize("method", ["to_csv", "to_json"]) def test_compression_size_fh(obj, method, compression_only): with tm.ensure_clean() as path: - f, handles = icom.get_handle(path, "w", compression=compression_only) - with f: - getattr(obj, method)(f) - assert not f.closed - assert f.closed + handles = icom.get_handle(path, "w", compression=compression_only) + getattr(obj, method)(handles.handle) + assert not handles.handle.closed + handles.close() + assert handles.handle.closed compressed_size = os.path.getsize(path) with tm.ensure_clean() as path: - f, handles = icom.get_handle(path, "w", compression=None) - with f: - getattr(obj, method)(f) - assert not f.closed - assert f.closed + handles = icom.get_handle(path, "w", compression=None) + getattr(obj, method)(handles.handle) + assert not handles.handle.closed + handles.close() + assert handles.handle.closed uncompressed_size = os.path.getsize(path) assert uncompressed_size > compressed_size @@ -111,10 +111,10 @@ def test_compression_warning(compression_only): columns=["X", "Y", "Z"], ) with tm.ensure_clean() as path: - f, handles = icom.get_handle(path, "w", compression=compression_only) + handles = icom.get_handle(path, "w", compression=compression_only) with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): - with f: - df.to_csv(f, compression=compression_only) + df.to_csv(handles.handle, compression=compression_only) + handles.close() def test_compression_binary(compression_only): diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index a72e860340f25..714173158f4d6 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -143,11 +143,11 @@ def test_to_csv_compression(self, s, encoding, compression): tm.assert_series_equal(s, result) # test the round trip using file handle - to_csv -> read_csv - f, _handles = get_handle( + handles = get_handle( filename, "w", compression=compression, encoding=encoding ) - with f: - s.to_csv(f, encoding=encoding, header=True) + s.to_csv(handles.handle, encoding=encoding, header=True) + handles.close() result = pd.read_csv( filename, compression=compression, From 1b1cedc101afede0fe428173da26ebfcfc0f75a1 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Nov 2020 11:01:25 +0000 Subject: [PATCH 043/147] more typing checks to pre-commit (#37539) --- .pre-commit-config.yaml | 30 +++++++++++++++++++++++++++ ci/code_checks.sh | 23 -------------------- scripts/validate_unwanted_patterns.py | 2 +- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0f35087dc922..0c1e4e330c903 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -119,6 +119,36 @@ repos: entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" types: [python] exclude: ^(asv_bench|pandas/tests|doc)/ + - id: FrameOrSeriesUnion + name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias + entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\] + language: pygrep + types: [python] + exclude: ^pandas/_typing\.py$ + - id: type-not-class + name: Check for use of foo.__class__ instead of type(foo) + entry: \.__class__ + language: pygrep + files: \.(py|pyx)$ + - id: unwanted-typing + name: Check for use of comment-based annotation syntax and missing error codes + entry: | + (?x) + \#\ type:\ (?!ignore)| + \#\ type:\s?ignore(?!\[) + language: pygrep + types: [python] + - id: no-os-remove + name: Check code for instances of os.remove + entry: os\.remove + language: pygrep + types: [python] + files: ^pandas/tests/ + exclude: | + (?x)^ + pandas/tests/io/excel/test_writers\.py| + pandas/tests/io/pytables/common\.py| + pandas/tests/io/pytables/test_store\.py$ - repo: https://github.com/asottile/yesqa rev: v1.2.2 hooks: diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7c48905135f89..b5d63e259456b 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,29 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" # ------------------------------------------------------------------------- - # Type annotations - - MSG='Check for use of comment-based annotation syntax' ; echo $MSG - invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Check for missing error codes with # type: ignore' ; echo $MSG - invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias' ; echo $MSG - invgrep -R --include="*.py" --exclude=_typing.py -E 'Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]' pandas - RET=$(($RET + $?)) ; echo $MSG "DONE" - - # ------------------------------------------------------------------------- - MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG - invgrep -R --include=*.{py,pyx} '\.__class__' pandas - RET=$(($RET + $?)) ; echo $MSG "DONE" - - MSG='Check code for instances of os.remove' ; echo $MSG - invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/ - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do check_namespace ${class} diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 7b648a589bc61..9c58a55cb907e 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -474,7 +474,7 @@ def main( sys.exit( main( - function=globals().get(args.validation_type), # type: ignore + function=globals().get(args.validation_type), source_path=args.paths, output_format=args.format, ) From 2884c458a5004854b733cf1aca9a5a4c22112bee Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Wed, 4 Nov 2020 18:04:32 +0700 Subject: [PATCH 044/147] TST: 32bit dtype compat test_groupby_dropna (#37623) --- pandas/tests/groupby/test_groupby_dropna.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 02ce4dcf2ae2b..e38fa5e8de87e 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -343,7 +343,7 @@ def test_groupby_nan_included(): df = pd.DataFrame(data) grouped = df.groupby("group", dropna=False) result = grouped.indices - dtype = "int64" + dtype = np.intp expected = { "g1": np.array([0, 2], dtype=dtype), "g2": np.array([3], dtype=dtype), From ded6b535d64d713483bba0511586198fd68064b2 Mon Sep 17 00:00:00 2001 From: Janus Date: Wed, 4 Nov 2020 14:22:54 +0100 Subject: [PATCH 045/147] BUG: Metadata propagation for groupby iterator (#37461) --- doc/source/whatsnew/v1.1.5.rst | 2 +- pandas/core/groupby/ops.py | 15 ++++++++++++--- pandas/tests/groupby/test_groupby_subclass.py | 9 +++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index cf728d94b2a55..a122154904996 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -23,7 +23,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ccf23a6f24c42..f807b740abaf2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -140,9 +140,16 @@ def get_iterator( splitter = self._get_splitter(data, axis=axis) keys = self._get_group_keys() for key, (i, group) in zip(keys, splitter): - yield key, group + yield key, group.__finalize__(data, method="groupby") def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter": + """ + Returns + ------- + Generator yielding subsetted objects + + __finalize__ has not been called for the the subsetted objects returned. + """ comp_ids, _, ngroups = self.group_info return get_splitter(data, comp_ids, ngroups, axis=axis) @@ -918,7 +925,8 @@ class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: # fastpath equivalent to `sdata.iloc[slice_obj]` mgr = sdata._mgr.get_slice(slice_obj) - return type(sdata)(mgr, name=sdata.name, fastpath=True) + # __finalize__ not called here, must be applied by caller if applicable + return sdata._constructor(mgr, name=sdata.name, fastpath=True) class FrameSplitter(DataSplitter): @@ -934,7 +942,8 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # else: # return sdata.iloc[:, slice_obj] mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) - return type(sdata)(mgr) + # __finalize__ not called here, must be applied by caller if applicable + return sdata._constructor(mgr) def get_splitter( diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index cc7a79e976513..d268d87708552 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -51,6 +51,15 @@ def test_groupby_preserves_subclass(obj, groupby_func): tm.assert_series_equal(result1, result2) +def test_groupby_preserves_metadata(): + # GH-37343 + custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]}) + assert "testattr" in custom_df._metadata + custom_df.testattr = "hello" + for _, group_df in custom_df.groupby("c"): + assert group_df.testattr == "hello" + + @pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame]) def test_groupby_resample_preserves_subclass(obj): # GH28330 -- preserve subclass through groupby.resample() From c55925518597937ecfeb9343d303d2d147b2c031 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 4 Nov 2020 05:43:46 -0800 Subject: [PATCH 046/147] BUG: read-only values in cython funcs (#37613) --- doc/source/whatsnew/v1.2.0.rst | 2 ++ pandas/_libs/join.pyx | 2 +- pandas/_libs/tslibs/strptime.pyx | 4 ++-- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/core/arrays/datetimelike.py | 3 +-- pandas/tests/libs/test_join.py | 7 ++++++- pandas/tests/tools/test_to_datetime.py | 10 ++++++++++ pandas/tests/tools/test_to_timedelta.py | 10 ++++++++++ 8 files changed, 33 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 33e9bd0c2732a..2e976371c0ac8 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -399,11 +399,13 @@ Datetimelike - Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`) - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`) - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`) +- Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`) Timedelta ^^^^^^^^^ - Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`) - Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`) +- Bug in :func:`to_timedelta` with a read-only array incorrectly raising (:issue:`34857`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 13c7187923473..1b79d68c13570 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -268,7 +268,7 @@ ctypedef fused join_t: @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique(join_t[:] left, join_t[:] right): +def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right): cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t] indexer diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index d2690be905a68..bc4632ad028ab 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -12,7 +12,7 @@ from _thread import allocate_lock as _thread_allocate_lock import numpy as np import pytz -from numpy cimport int64_t +from numpy cimport int64_t, ndarray from pandas._libs.tslibs.nattype cimport ( NPY_NAT, @@ -51,7 +51,7 @@ cdef dict _parse_code_table = {'y': 0, 'u': 22} -def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'): +def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='raise'): """ Calculates the datetime structs represented by the passed array of strings diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 45f32d92c7a74..29e8c58055f9e 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -227,7 +227,7 @@ cdef convert_to_timedelta64(object ts, str unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(object[:] values, str unit=None, str errors="raise"): +def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="raise"): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1955a96160a4a..e845dbf39dbc9 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1025,9 +1025,8 @@ def _addsub_object_array(self, other: np.ndarray, op): result : same class as self """ assert op in [operator.add, operator.sub] - if len(other) == 1: + if len(other) == 1 and self.ndim == 1: # If both 1D then broadcasting is unambiguous - # TODO(EA2D): require self.ndim == other.ndim here return op(self, other[0]) warnings.warn( diff --git a/pandas/tests/libs/test_join.py b/pandas/tests/libs/test_join.py index 95d6dcbaf3baf..f3f09d7a42204 100644 --- a/pandas/tests/libs/test_join.py +++ b/pandas/tests/libs/test_join.py @@ -135,9 +135,14 @@ def test_cython_inner_join(self): tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) -def test_left_join_indexer_unique(): +@pytest.mark.parametrize("readonly", [True, False]) +def test_left_join_indexer_unique(readonly): a = np.array([1, 2, 3, 4, 5], dtype=np.int64) b = np.array([2, 2, 3, 4, 4], dtype=np.int64) + if readonly: + # GH#37312, GH#37264 + a.setflags(write=False) + b.setflags(write=False) result = libjoin.left_join_indexer_unique(b, a) expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index ebe118252c8cf..10bda16655586 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -36,6 +36,16 @@ class TestTimeConversionFormats: + @pytest.mark.parametrize("readonly", [True, False]) + def test_to_datetime_readonly(self, readonly): + # GH#34857 + arr = np.array([], dtype=object) + if readonly: + arr.setflags(write=False) + result = to_datetime(arr) + expected = to_datetime([]) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_format(self, cache): values = ["1/1/2000", "1/2/2000", "1/3/2000"] diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 8e48295c533cc..5be7e81df53f2 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -9,6 +9,16 @@ class TestTimedeltas: + @pytest.mark.parametrize("readonly", [True, False]) + def test_to_timedelta_readonly(self, readonly): + # GH#34857 + arr = np.array([], dtype=object) + if readonly: + arr.setflags(write=False) + result = to_timedelta(arr) + expected = to_timedelta([]) + tm.assert_index_equal(result, expected) + def test_to_timedelta(self): result = to_timedelta(["", ""]) From b1cdb16432d75d3975d08758f1e0469494fe955a Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Nov 2020 13:47:36 +0000 Subject: [PATCH 047/147] CLN refactor core/arrays (#37581) --- pandas/core/arrays/base.py | 9 ++++----- pandas/core/arrays/boolean.py | 20 +++++++++---------- pandas/core/arrays/categorical.py | 8 +++----- pandas/core/arrays/datetimelike.py | 6 ++---- pandas/core/arrays/masked.py | 7 ++++--- pandas/core/arrays/numpy_.py | 6 ++---- pandas/core/arrays/period.py | 6 ++---- pandas/core/arrays/sparse/array.py | 32 ++++++++++-------------------- pandas/core/arrays/timedeltas.py | 6 ++---- 9 files changed, 40 insertions(+), 60 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 57f8f11d4d04c..82d79cc47a4ae 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -460,7 +460,7 @@ def astype(self, dtype, copy=True): if is_dtype_equal(dtype, self.dtype): if not copy: return self - elif copy: + else: return self.copy() if isinstance(dtype, StringDtype): # allow conversion to StringArrays return dtype.construct_array_type()._from_sequence(self, copy=False) @@ -544,14 +544,13 @@ def argsort( ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) values = self._values_for_argsort() - result = nargsort( + return nargsort( values, kind=kind, ascending=ascending, na_position=na_position, mask=np.asarray(self.isna()), ) - return result def argmin(self): """ @@ -780,12 +779,12 @@ def equals(self, other: object) -> bool: boolean Whether the arrays are equivalent. """ - if not type(self) == type(other): + if type(self) != type(other): return False other = cast(ExtensionArray, other) if not is_dtype_equal(self.dtype, other.dtype): return False - elif not len(self) == len(other): + elif len(self) != len(other): return False else: equal_values = self == other diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 73aa97c832848..21306455573b8 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -170,12 +170,13 @@ def coerce_to_array( values[~mask_values] = values_object[~mask_values].astype(bool) # if the values were integer-like, validate it were actually 0/1's - if inferred_dtype in integer_like: - if not np.all( + if (inferred_dtype in integer_like) and not ( + np.all( values[~mask_values].astype(float) == values_object[~mask_values].astype(float) - ): - raise TypeError("Need to pass bool-like values") + ) + ): + raise TypeError("Need to pass bool-like values") if mask is None and mask_values is None: mask = np.zeros(len(values), dtype=bool) @@ -193,9 +194,9 @@ def coerce_to_array( if mask_values is not None: mask = mask | mask_values - if not values.ndim == 1: + if values.ndim != 1: raise ValueError("values must be a 1D list-like") - if not mask.ndim == 1: + if mask.ndim != 1: raise ValueError("mask must be a 1D list-like") return values, mask @@ -395,9 +396,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False ) # for integer, error if there are missing values - if is_integer_dtype(dtype): - if self._hasna: - raise ValueError("cannot convert NA to integer") + if is_integer_dtype(dtype) and self._hasna: + raise ValueError("cannot convert NA to integer") # for float dtype, ensure we use np.nan before casting (numpy cannot # deal with pd.NA) na_value = self._na_value @@ -576,7 +576,7 @@ def _logical_method(self, other, op): elif isinstance(other, np.bool_): other = other.item() - if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)): + if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other): raise TypeError( "'other' should be pandas.NA or a bool. " f"Got {type(other).__name__} instead." diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9f0414cf7a806..626fb495dec03 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1314,8 +1314,7 @@ def isna(self): Categorical.notna : Boolean inverse of Categorical.isna. """ - ret = self._codes == -1 - return ret + return self._codes == -1 isnull = isna @@ -1363,7 +1362,7 @@ def value_counts(self, dropna=True): from pandas import CategoricalIndex, Series code, cat = self._codes, self.categories - ncat, mask = len(cat), 0 <= code + ncat, mask = (len(cat), code >= 0) ix, clean = np.arange(ncat), mask.all() if dropna or clean: @@ -1920,8 +1919,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: ) counts = counts.cumsum() _result = (r[start:end] for start, end in zip(counts, counts[1:])) - result = dict(zip(categories, _result)) - return result + return dict(zip(categories, _result)) # ------------------------------------------------------------------ # Reductions diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e845dbf39dbc9..404511895ddf0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1062,8 +1062,7 @@ def _time_shift(self, periods, freq=None): if isinstance(freq, str): freq = to_offset(freq) offset = periods * freq - result = self + offset - return result + return self + offset if periods == 0 or len(self) == 0: # GH#14811 empty case @@ -1533,10 +1532,9 @@ def _round(self, freq, mode, ambiguous, nonexistent): self = cast("DatetimeArray", self) naive = self.tz_localize(None) result = naive._round(freq, mode, ambiguous, nonexistent) - aware = result.tz_localize( + return result.tz_localize( self.tz, ambiguous=ambiguous, nonexistent=nonexistent ) - return aware values = self.view("i8") result = round_nsint64(values, mode, freq) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 9febba0f544ac..b633f268049e5 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -84,9 +84,9 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): "mask should be boolean numpy array. Use " "the 'pd.array' function instead" ) - if not values.ndim == 1: + if values.ndim != 1: raise ValueError("values must be a 1D array") - if not mask.ndim == 1: + if mask.ndim != 1: raise ValueError("mask must be a 1D array") if copy: @@ -209,7 +209,8 @@ def to_numpy( dtype = object if self._hasna: if ( - not (is_object_dtype(dtype) or is_string_dtype(dtype)) + not is_object_dtype(dtype) + and not is_string_dtype(dtype) and na_value is libmissing.NA ): raise ValueError( diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index cd48f6cbc8170..e1a424b719a4a 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -281,17 +281,15 @@ def all(self, *, axis=None, out=None, keepdims=False, skipna=True): def min(self, *, skipna: bool = True, **kwargs) -> Scalar: nv.validate_min((), kwargs) - result = masked_reductions.min( + return masked_reductions.min( values=self.to_numpy(), mask=self.isna(), skipna=skipna ) - return result def max(self, *, skipna: bool = True, **kwargs) -> Scalar: nv.validate_max((), kwargs) - result = masked_reductions.max( + return masked_reductions.max( values=self.to_numpy(), mask=self.isna(), skipna=skipna ) - return result def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: nv.validate_sum((), kwargs) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d808ade53ad33..8de84a0187e95 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -589,7 +589,7 @@ def astype(self, dtype, copy: bool = True): if is_dtype_equal(dtype, self._dtype): if not copy: return self - elif copy: + else: return self.copy() if is_period_dtype(dtype): return self.asfreq(dtype.freq) @@ -1080,11 +1080,9 @@ def _make_field_arrays(*fields): elif length is None: length = len(x) - arrays = [ + return [ np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries)) else np.repeat(x, length) for x in fields ] - - return arrays diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 4346e02069667..5f4cd4b269a2a 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -316,9 +316,8 @@ def __init__( raise Exception("must only pass scalars with an index") if is_scalar(data): - if index is not None: - if data is None: - data = np.nan + if index is not None and data is None: + data = np.nan if index is not None: npoints = len(index) @@ -575,8 +574,7 @@ def density(self): >>> s.density 0.6 """ - r = float(self.sp_index.npoints) / float(self.sp_index.length) - return r + return float(self.sp_index.npoints) / float(self.sp_index.length) @property def npoints(self) -> int: @@ -736,25 +734,17 @@ def value_counts(self, dropna=True): keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna) fcounts = self.sp_index.ngaps - if fcounts > 0: - if self._null_fill_value and dropna: - pass + if fcounts > 0 and (not self._null_fill_value or not dropna): + mask = isna(keys) if self._null_fill_value else keys == self.fill_value + if mask.any(): + counts[mask] += fcounts else: - if self._null_fill_value: - mask = isna(keys) - else: - mask = keys == self.fill_value - - if mask.any(): - counts[mask] += fcounts - else: - keys = np.insert(keys, 0, self.fill_value) - counts = np.insert(counts, 0, fcounts) + keys = np.insert(keys, 0, self.fill_value) + counts = np.insert(counts, 0, fcounts) if not isinstance(keys, ABCIndexClass): keys = Index(keys) - result = Series(counts, index=keys) - return result + return Series(counts, index=keys) # -------- # Indexing @@ -1062,7 +1052,7 @@ def astype(self, dtype=None, copy=True): if is_dtype_equal(dtype, self._dtype): if not copy: return self - elif copy: + else: return self.copy() dtype = self.dtype.update_dtype(dtype) subtype = dtype._subtype_with_str diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index e4a844fd4c6ef..8a87df18b6adb 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -227,8 +227,7 @@ def _from_sequence( data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None) freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False) - result = cls._simple_new(data, freq=freq) - return result + return cls._simple_new(data, freq=freq) @classmethod def _from_sequence_not_strict( @@ -334,10 +333,9 @@ def astype(self, dtype, copy: bool = True): if self._hasnans: # avoid double-copying result = self._data.astype(dtype, copy=False) - values = self._maybe_mask_results( + return self._maybe_mask_results( result, fill_value=None, convert="float64" ) - return values result = self._data.astype(dtype, copy=copy) return result.astype("i8") elif is_timedelta64_ns_dtype(dtype): From 41ee33f4fe4e25570442965cc94f0c69139262f8 Mon Sep 17 00:00:00 2001 From: Joel Whittier Date: Wed, 4 Nov 2020 10:52:45 -0600 Subject: [PATCH 048/147] Fixed Metadata Propogation in DataFrame (#37381) --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/frame.py | 18 ++++++++++-------- pandas/core/reshape/merge.py | 4 ++-- pandas/tests/generic/test_finalize.py | 11 ++--------- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2e976371c0ac8..9ac3585aa9002 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -569,7 +569,7 @@ Other - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) - Fixed bug in metadata propagation incorrectly copying DataFrame columns as metadata when the column name overlaps with the metadata name (:issue:`37037`) -- Fixed metadata propagation in the :class:`Series.dt` and :class:`Series.str` accessors and :class:`DataFrame.duplicated` and :class:`DataFrame.stack` and :class:`DataFrame.unstack` and :class:`DataFrame.pivot` methods (:issue:`28283`) +- Fixed metadata propagation in the :class:`Series.dt`, :class:`Series.str` accessors, :class:`DataFrame.duplicated`, :class:`DataFrame.stack`, :class:`DataFrame.unstack`, :class:`DataFrame.pivot`, :class:`DataFrame.append`, :class:`DataFrame.diff`, :class:`DataFrame.applymap` and :class:`DataFrame.update` methods (:issue:`28283`) (:issue:`37381`) - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a3130ec27713d..ae35ba36ebac9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7401,7 +7401,7 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: return self - self.shift(periods, axis=axis) new_data = self._mgr.diff(n=periods, axis=bm_axis) - return self._constructor(new_data) + return self._constructor(new_data).__finalize__(self, "diff") # ---------------------------------------------------------------------- # Function application @@ -7780,7 +7780,7 @@ def infer(x): return lib.map_infer(x, func, ignore_na=ignore_na) return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na) - return self.apply(infer) + return self.apply(infer).__finalize__(self, "applymap") # ---------------------------------------------------------------------- # Merging / joining methods @@ -7917,12 +7917,14 @@ def append( to_concat = [self, *other] else: to_concat = [self, other] - return concat( - to_concat, - ignore_index=ignore_index, - verify_integrity=verify_integrity, - sort=sort, - ) + return ( + concat( + to_concat, + ignore_index=ignore_index, + verify_integrity=verify_integrity, + sort=sort, + ) + ).__finalize__(self, method="append") def join( self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 1219fefd7ea92..978597e3c7686 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -689,7 +689,7 @@ def get_result(self): self._maybe_restore_index_levels(result) - return result + return result.__finalize__(self, method="merge") def _indicator_pre_merge( self, left: "DataFrame", right: "DataFrame" @@ -1505,7 +1505,7 @@ def get_result(self): ) typ = self.left._constructor - result = typ(result_data).__finalize__(self, method=self._merge_type) + result = typ(result_data) self._maybe_add_join_keys(result, left_indexer, right_indexer) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index d16e854c25ed8..e38936baca758 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -178,20 +178,14 @@ marks=not_implemented_mark, ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("diff")), - marks=not_implemented_mark, - ), - pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("applymap", lambda x: x)), - marks=not_implemented_mark, + (pd.DataFrame, frame_data, operator.methodcaller("applymap", lambda x: x)) ), pytest.param( ( pd.DataFrame, frame_data, operator.methodcaller("append", pd.DataFrame({"A": [1]})), - ), - marks=not_implemented_mark, + ) ), pytest.param( ( @@ -199,7 +193,6 @@ frame_data, operator.methodcaller("append", pd.DataFrame({"B": [1]})), ), - marks=not_implemented_mark, ), pytest.param( ( From 6e5640e6b1f89b564043e5db2f04133afa8ba5ae Mon Sep 17 00:00:00 2001 From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com> Date: Wed, 4 Nov 2020 11:53:06 -0500 Subject: [PATCH 049/147] TYP: add Shape alias to pandas._typing (#37128) --- pandas/_typing.py | 2 ++ pandas/core/arrays/_mixins.py | 5 +++-- pandas/core/arrays/base.py | 4 ++-- pandas/core/dtypes/cast.py | 4 ++-- pandas/core/groupby/ops.py | 4 ++-- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/multi.py | 4 ++-- pandas/core/internals/blocks.py | 4 ++-- pandas/core/internals/concat.py | 4 ++-- pandas/core/internals/managers.py | 6 +++--- pandas/core/ops/array_ops.py | 6 +++--- pandas/io/pytables.py | 4 ++-- 12 files changed, 27 insertions(+), 24 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 3e89cf24632e2..55a1c17b0aa53 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -15,6 +15,7 @@ Mapping, Optional, Sequence, + Tuple, Type, TypeVar, Union, @@ -93,6 +94,7 @@ Label = Optional[Hashable] IndexLabel = Union[Label, Sequence[Label]] Level = Union[Label, int] +Shape = Tuple[int, ...] Ordered = Optional[bool] JSONSerializable = Optional[Union[PythonScalar, List, Dict]] Axes = Collection diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index a2371a39a0efa..67ac2a3688214 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -1,8 +1,9 @@ -from typing import Any, Sequence, Tuple, TypeVar +from typing import Any, Sequence, TypeVar import numpy as np from pandas._libs import lib +from pandas._typing import Shape from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc @@ -93,7 +94,7 @@ def _validate_fill_value(self, fill_value): # TODO: make this a cache_readonly; for that to work we need to remove # the _index_data kludge in libreduction @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> Shape: return self._ndarray.shape def __len__(self) -> int: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 82d79cc47a4ae..be105fd1f2a46 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -12,7 +12,7 @@ import numpy as np from pandas._libs import lib -from pandas._typing import ArrayLike +from pandas._typing import ArrayLike, Shape from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -403,7 +403,7 @@ def dtype(self) -> ExtensionDtype: raise AbstractMethodError(self) @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> Shape: """ Return a tuple of the array dimensions. """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 692da8f8e021e..aded0af6aca0e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -32,7 +32,7 @@ ints_to_pytimedelta, ) from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar +from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar, Shape from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( @@ -1591,7 +1591,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj: def cast_scalar_to_array( - shape: Tuple, value: Scalar, dtype: Optional[DtypeObj] = None + shape: Shape, value: Scalar, dtype: Optional[DtypeObj] = None ) -> np.ndarray: """ Create np.ndarray of specified shape and dtype, filled with values. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index f807b740abaf2..15725230d850a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -24,7 +24,7 @@ from pandas._libs import NaT, iNaT, lib import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import F, FrameOrSeries, Label +from pandas._typing import F, FrameOrSeries, Label, Shape from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -116,7 +116,7 @@ def groupings(self) -> List["grouper.Grouping"]: return self._groupings @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> Shape: return tuple(ping.ngroups for ping in self.groupings) def __iter__(self): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b220756a24f9f..98ec3b55e65d9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -27,7 +27,7 @@ from pandas._libs.lib import is_datetime_array, no_default from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime, Timestamp from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import AnyArrayLike, Dtype, DtypeObj, Label, final +from pandas._typing import AnyArrayLike, Dtype, DtypeObj, Label, Shape, final from pandas.compat.numpy import function as nv from pandas.errors import DuplicateLabelError, InvalidIndexError from pandas.util._decorators import Appender, cache_readonly, doc @@ -5644,7 +5644,7 @@ def _maybe_disable_logical_methods(self, opname: str_t): make_invalid_op(opname)(self) @property - def shape(self): + def shape(self) -> Shape: """ Return a tuple of the shape of the underlying data. """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index dc5e6877a6bf5..65e71a6109a5a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -20,7 +20,7 @@ from pandas._libs import algos as libalgos, index as libindex, lib from pandas._libs.hashtable import duplicated_int64 -from pandas._typing import AnyArrayLike, Label, Scalar +from pandas._typing import AnyArrayLike, Label, Scalar, Shape from pandas.compat.numpy import function as nv from pandas.errors import InvalidIndexError, PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly, doc @@ -702,7 +702,7 @@ def array(self): ) @property - def shape(self): + def shape(self) -> Shape: """ Return a tuple of the shape of the underlying data. """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 24b00199611bf..ee630909cb990 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -10,7 +10,7 @@ from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import ArrayLike, Scalar +from pandas._typing import ArrayLike, Scalar, Shape from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -2762,7 +2762,7 @@ def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: return values -def safe_reshape(arr, new_shape): +def safe_reshape(arr, new_shape: Shape): """ If possible, reshape `arr` to have shape `new_shape`, with a couple of exceptions (see gh-13012): diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 8559fe72972b8..8efba87b14ce5 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import NaT, internals as libinternals -from pandas._typing import DtypeObj +from pandas._typing import DtypeObj, Shape from pandas.util._decorators import cache_readonly from pandas.core.dtypes.cast import maybe_promote @@ -175,7 +175,7 @@ def _get_mgr_concatenation_plan(mgr, indexers): class JoinUnit: - def __init__(self, block, shape, indexers=None): + def __init__(self, block, shape: Shape, indexers=None): # Passing shape explicitly is required for cases when block is None. if indexers is None: indexers = {} diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 49ca8f9ad55e9..a06d57e268fe2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -17,7 +17,7 @@ import numpy as np from pandas._libs import internals as libinternals, lib -from pandas._typing import ArrayLike, DtypeObj, Label +from pandas._typing import ArrayLike, DtypeObj, Label, Shape from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -204,7 +204,7 @@ def __nonzero__(self) -> bool: __bool__ = __nonzero__ @property - def shape(self) -> Tuple[int, ...]: + def shape(self) -> Shape: return tuple(len(ax) for ax in self.axes) @property @@ -1825,7 +1825,7 @@ def _asarray_compat(x): else: return np.asarray(x) - def _shape_compat(x): + def _shape_compat(x) -> Shape: if isinstance(x, ABCSeries): return (len(x),) else: diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 97fa7988c1774..8142fc3e695a3 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -5,13 +5,13 @@ from datetime import timedelta from functools import partial import operator -from typing import Any, Tuple +from typing import Any import warnings import numpy as np from pandas._libs import Timedelta, Timestamp, lib, ops as libops -from pandas._typing import ArrayLike +from pandas._typing import ArrayLike, Shape from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, @@ -427,7 +427,7 @@ def maybe_upcast_datetimelike_array(obj: ArrayLike) -> ArrayLike: return obj -def _maybe_upcast_for_op(obj, shape: Tuple[int, ...]): +def _maybe_upcast_for_op(obj, shape: Shape): """ Cast non-pandas objects to pandas types to unify behavior of arithmetic and comparison operations. diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bf21a8fe2fc74..890195688b1cb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -28,7 +28,7 @@ from pandas._libs import lib, writers as libwriters from pandas._libs.tslibs import timezones -from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion, Label +from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion, Label, Shape from pandas.compat._optional import import_optional_dependency from pandas.compat.pickle_compat import patch_pickle from pandas.errors import PerformanceWarning @@ -3091,7 +3091,7 @@ class BlockManagerFixed(GenericFixed): nblocks: int @property - def shape(self): + def shape(self) -> Optional[Shape]: try: ndim = self.ndim From ff48c89a1c7b906242b8b727486c07b0a28cf184 Mon Sep 17 00:00:00 2001 From: Micael Jarniac Date: Wed, 4 Nov 2020 13:53:38 -0300 Subject: [PATCH 050/147] DOC: Fix typo (#37630) --- pandas/core/shared_docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 38e36c8ff8d01..c9940c78b8d7d 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -91,7 +91,7 @@ index. If a dict or Series is passed, the Series or dict VALUES will be used to determine the groups (the Series' values are first aligned; see ``.align()`` method). If an ndarray is passed, the - values are used as-is determine the groups. A label or list of + values are used as-is to determine the groups. A label or list of labels may be passed to group by the columns in ``self``. Notice that a tuple is interpreted as a (single) key. axis : {0 or 'index', 1 or 'columns'}, default 0 From 64b7278c45889442cec32557a763925d88eb0428 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Thu, 5 Nov 2020 00:25:27 +0700 Subject: [PATCH 051/147] CLN: parametrize test_nat_comparisons (#37195) --- pandas/tests/arithmetic/test_datetime64.py | 40 ++++++++++++---------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index cefd2ae7a9ddb..b0b8f1345e4d3 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -173,7 +173,26 @@ class TestDatetime64SeriesComparison: ) @pytest.mark.parametrize("reverse", [True, False]) @pytest.mark.parametrize("dtype", [None, object]) - def test_nat_comparisons(self, dtype, index_or_series, reverse, pair): + @pytest.mark.parametrize( + "op, expected", + [ + (operator.eq, Series([False, False, True])), + (operator.ne, Series([True, True, False])), + (operator.lt, Series([False, False, False])), + (operator.gt, Series([False, False, False])), + (operator.ge, Series([False, False, True])), + (operator.le, Series([False, False, True])), + ], + ) + def test_nat_comparisons( + self, + dtype, + index_or_series, + reverse, + pair, + op, + expected, + ): box = index_or_series l, r = pair if reverse: @@ -182,25 +201,10 @@ def test_nat_comparisons(self, dtype, index_or_series, reverse, pair): left = Series(l, dtype=dtype) right = box(r, dtype=dtype) - # Series, Index - expected = Series([False, False, True]) - tm.assert_series_equal(left == right, expected) + result = op(left, right) - expected = Series([True, True, False]) - tm.assert_series_equal(left != right, expected) - - expected = Series([False, False, False]) - tm.assert_series_equal(left < right, expected) - - expected = Series([False, False, False]) - tm.assert_series_equal(left > right, expected) - - expected = Series([False, False, True]) - tm.assert_series_equal(left >= right, expected) - - expected = Series([False, False, True]) - tm.assert_series_equal(left <= right, expected) + tm.assert_series_equal(result, expected) def test_comparison_invalid(self, tz_naive_fixture, box_with_array): # GH#4968 From e19d42c05ed516e32d83a4caa23800909a23efe7 Mon Sep 17 00:00:00 2001 From: taytzehao Date: Thu, 5 Nov 2020 01:26:22 +0800 Subject: [PATCH 052/147] dataframe dataclass docstring updated (#37632) --- pandas/core/frame.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ae35ba36ebac9..3ec575a849abe 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -360,7 +360,7 @@ class DataFrame(NDFrame, OpsMixin): Parameters ---------- data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame - Dict can contain Series, arrays, constants, or list-like objects. If + Dict can contain Series, arrays, constants, dataclass or list-like objects. If data is a dict, column order follows insertion-order. .. versionchanged:: 0.25.0 @@ -420,6 +420,16 @@ class DataFrame(NDFrame, OpsMixin): 0 1 2 3 1 4 5 6 2 7 8 9 + + Constructing DataFrame from dataclass: + + >>> from dataclasses import make_dataclass + >>> Point = make_dataclass("Point", [("x", int), ("y", int)]) + >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)]) + x y + 0 0 0 + 1 0 3 + 2 2 3 """ _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set From 663012c7e9e4f1cf0dda5642e48a40839d46eecc Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Nov 2020 22:57:59 +0000 Subject: [PATCH 053/147] refactor core/groupby (#37583) --- pandas/core/groupby/base.py | 5 ++--- pandas/core/groupby/generic.py | 22 ++++++++-------------- pandas/core/groupby/groupby.py | 13 ++++++------- pandas/core/groupby/grouper.py | 34 ++++++++++++++++++---------------- pandas/core/groupby/ops.py | 3 +-- 5 files changed, 35 insertions(+), 42 deletions(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 2387427d15670..8e278dc81a8cc 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -63,9 +63,8 @@ def _gotitem(self, key, ndim, subset=None): self = type(self)(subset, groupby=groupby, parent=self, **kwargs) self._reset_cache() - if subset.ndim == 2: - if is_scalar(key) and key in subset or is_list_like(key): - self._selection = key + if subset.ndim == 2 and (is_scalar(key) and key in subset or is_list_like(key)): + self._selection = key return self diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 457aed3a72799..d0b58e8abc4ee 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -512,12 +512,9 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): elif func not in base.transform_kernel_allowlist: msg = f"'{func}' is not a valid function name for transform(name)" raise ValueError(msg) - elif func in base.cythonized_kernels: + elif func in base.cythonized_kernels or func in base.transformation_kernels: # cythonized transform or canned "agg+broadcast" return getattr(self, func)(*args, **kwargs) - elif func in base.transformation_kernels: - return getattr(self, func)(*args, **kwargs) - # If func is a reduction, we need to broadcast the # result to the whole group. Compute func result # and deal with possible broadcasting below. @@ -1111,8 +1108,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike: # unwrap DataFrame to get array result = result._mgr.blocks[0].values - res_values = cast_agg_result(result, bvalues, how) - return res_values + return cast_agg_result(result, bvalues, how) # TypeError -> we may have an exception in trying to aggregate # continue and exclude the block @@ -1368,12 +1364,9 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): elif func not in base.transform_kernel_allowlist: msg = f"'{func}' is not a valid function name for transform(name)" raise ValueError(msg) - elif func in base.cythonized_kernels: + elif func in base.cythonized_kernels or func in base.transformation_kernels: # cythonized transformation or canned "reduction+broadcast" return getattr(self, func)(*args, **kwargs) - elif func in base.transformation_kernels: - return getattr(self, func)(*args, **kwargs) - # GH 30918 # Use _transform_fast only when we know func is an aggregation if func in base.reduction_kernels: @@ -1401,9 +1394,10 @@ def _transform_fast(self, result: DataFrame) -> DataFrame: # by take operation ids, _, ngroup = self.grouper.group_info result = result.reindex(self.grouper.result_index, copy=False) - output = [] - for i, _ in enumerate(result.columns): - output.append(algorithms.take_1d(result.iloc[:, i].values, ids)) + output = [ + algorithms.take_1d(result.iloc[:, i].values, ids) + for i, _ in enumerate(result.columns) + ] return self.obj._constructor._from_arrays( output, columns=result.columns, index=obj.index @@ -1462,7 +1456,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: else: inds.append(i) - if len(output) == 0: + if not output: raise TypeError("Transform function invalid for data types") columns = obj.columns diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c5bc9b563ea5e..32023576b0a91 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1001,7 +1001,7 @@ def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs): key = base.OutputKey(label=name, position=idx) output[key] = result - if len(output) == 0: + if not output: raise DataError("No numeric types to aggregate") return self._wrap_transformed_output(output) @@ -1084,7 +1084,7 @@ def _cython_agg_general( output[key] = maybe_cast_result(result, obj, how=how) idx += 1 - if len(output) == 0: + if not output: raise DataError("No numeric types to aggregate") return self._wrap_aggregated_output(output, index=self.grouper.result_index) @@ -1182,7 +1182,7 @@ def _python_agg_general(self, func, *args, **kwargs): key = base.OutputKey(label=name, position=idx) output[key] = maybe_cast_result(result, obj, numeric_only=True) - if len(output) == 0: + if not output: return self._python_apply_general(f, self._selected_obj) if self.grouper._filter_empty_groups: @@ -2550,9 +2550,8 @@ def _get_cythonized_result( """ if result_is_index and aggregate: raise ValueError("'result_is_index' and 'aggregate' cannot both be True!") - if post_processing: - if not callable(post_processing): - raise ValueError("'post_processing' must be a callable!") + if post_processing and not callable(post_processing): + raise ValueError("'post_processing' must be a callable!") if pre_processing: if not callable(pre_processing): raise ValueError("'pre_processing' must be a callable!") @@ -2631,7 +2630,7 @@ def _get_cythonized_result( output[key] = result # error_msg is "" on an frame/series with no rows or columns - if len(output) == 0 and error_msg != "": + if not output and error_msg != "": raise TypeError(error_msg) if aggregate: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 9f0d953a2cc71..ff5379567f090 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -593,23 +593,25 @@ def group_index(self) -> Index: return self._group_index def _make_codes(self) -> None: - if self._codes is None or self._group_index is None: - # we have a list of groupers - if isinstance(self.grouper, ops.BaseGrouper): - codes = self.grouper.codes_info - uniques = self.grouper.result_index + if self._codes is not None and self._group_index is not None: + return + + # we have a list of groupers + if isinstance(self.grouper, ops.BaseGrouper): + codes = self.grouper.codes_info + uniques = self.grouper.result_index + else: + # GH35667, replace dropna=False with na_sentinel=None + if not self.dropna: + na_sentinel = None else: - # GH35667, replace dropna=False with na_sentinel=None - if not self.dropna: - na_sentinel = None - else: - na_sentinel = -1 - codes, uniques = algorithms.factorize( - self.grouper, sort=self.sort, na_sentinel=na_sentinel - ) - uniques = Index(uniques, name=self.name) - self._codes = codes - self._group_index = uniques + na_sentinel = -1 + codes, uniques = algorithms.factorize( + self.grouper, sort=self.sort, na_sentinel=na_sentinel + ) + uniques = Index(uniques, name=self.name) + self._codes = codes + self._group_index = uniques @cache_readonly def groups(self) -> Dict[Hashable, np.ndarray]: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 15725230d850a..438030008bb4d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -322,10 +322,9 @@ def result_index(self) -> Index: codes = self.reconstructed_codes levels = [ping.result_index for ping in self.groupings] - result = MultiIndex( + return MultiIndex( levels=levels, codes=codes, verify_integrity=False, names=self.names ) - return result def get_group_levels(self) -> List[Index]: if not self.compressed and len(self.groupings) == 1: From 56348e5854d7506e8d211c4c1a1546ddd9b712fe Mon Sep 17 00:00:00 2001 From: ma3da <34522496+ma3da@users.noreply.github.com> Date: Thu, 5 Nov 2020 00:47:58 +0100 Subject: [PATCH 054/147] BUG: set index of DataFrame.apply(f) when f returns dict (#37544) (#37606) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/apply.py | 6 ++++-- pandas/tests/frame/apply/test_frame_apply.py | 17 +++++++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9ac3585aa9002..23b84bfbd69e6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -547,6 +547,7 @@ Reshaping - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) - Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) +- Bug in :meth:`DataFrame.apply` not setting index of return value when ``func`` return type is ``dict`` (:issue:`37544`) Sparse ^^^^^^ diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 002e260742dc5..a14debce6eea7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -362,8 +362,10 @@ def wrap_results_for_axis( isinstance(x, dict) for x in results.values() ): # Our operation was a to_dict op e.g. - # test_apply_dict GH#8735, test_apply_reduce_rows_to_dict GH#25196 - return self.obj._constructor_sliced(results) + # test_apply_dict GH#8735, test_apply_reduce_to_dict GH#25196 #37544 + res = self.obj._constructor_sliced(results) + res.index = res_index + return res try: result = self.obj._constructor(data=results) diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index 03498b278f890..162035b53d68d 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -356,12 +356,17 @@ def test_apply_reduce_Series(self, float_frame): result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) - def test_apply_reduce_rows_to_dict(self): - # GH 25196 - data = DataFrame([[1, 2], [3, 4]]) - expected = Series([{0: 1, 1: 3}, {0: 2, 1: 4}]) - result = data.apply(dict) - tm.assert_series_equal(result, expected) + def test_apply_reduce_to_dict(self): + # GH 25196 37544 + data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"]) + + result0 = data.apply(dict, axis=0) + expected0 = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns) + tm.assert_series_equal(result0, expected0) + + result1 = data.apply(dict, axis=1) + expected1 = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index) + tm.assert_series_equal(result1, expected1) def test_apply_differently_indexed(self): df = DataFrame(np.random.randn(20, 10)) From 2cf6cf2ff137fec2dd8f97adb6b0c9dfe7e387ca Mon Sep 17 00:00:00 2001 From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com> Date: Wed, 4 Nov 2020 18:50:33 -0500 Subject: [PATCH 055/147] BUG: to_dict should return a native datetime object for NumPy backed dataframes (#37571) --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/arrays/sparse/array.py | 3 +- pandas/core/common.py | 35 +--------------- pandas/core/dtypes/cast.py | 43 +++++++++++++++++++- pandas/core/frame.py | 7 ++-- pandas/core/indexes/interval.py | 5 ++- pandas/core/internals/blocks.py | 3 +- pandas/core/internals/construction.py | 3 +- pandas/tests/dtypes/cast/test_dict_compat.py | 14 +++++++ pandas/tests/frame/methods/test_to_dict.py | 37 +++++++++++------ pandas/tests/test_common.py | 11 +---- 11 files changed, 97 insertions(+), 66 deletions(-) create mode 100644 pandas/tests/dtypes/cast/test_dict_compat.py diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 23b84bfbd69e6..5cceb2a9bce8c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -435,7 +435,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`) - Strings diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 5f4cd4b269a2a..9152ce72d75aa 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -22,6 +22,7 @@ construct_1d_arraylike_from_scalar, find_common_type, infer_dtype_from_scalar, + maybe_box_datetimelike, ) from pandas.core.dtypes.common import ( is_array_like, @@ -805,7 +806,7 @@ def _get_val_at(self, loc): return self.fill_value else: val = self.sp_values[sp_loc] - val = com.maybe_box_datetimelike(val, self.sp_values.dtype) + val = maybe_box_datetimelike(val, self.sp_values.dtype) return val def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray": diff --git a/pandas/core/common.py b/pandas/core/common.py index b860c83f89cbc..9b6133d2f7627 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -6,7 +6,6 @@ from collections import abc, defaultdict import contextlib -from datetime import datetime, timedelta from functools import partial import inspect from typing import Any, Collection, Iterable, Iterator, List, Union, cast @@ -14,7 +13,7 @@ import numpy as np -from pandas._libs import lib, tslibs +from pandas._libs import lib from pandas._typing import AnyArrayLike, Scalar, T from pandas.compat.numpy import np_version_under1p18 @@ -78,21 +77,6 @@ def consensus_name_attr(objs): return name -def maybe_box_datetimelike(value, dtype=None): - # turn a datetime like into a Timestamp/timedelta as needed - if dtype == object: - # If we dont have datetime64/timedelta64 dtype, we dont want to - # box datetimelike scalars - return value - - if isinstance(value, (np.datetime64, datetime)): - value = tslibs.Timestamp(value) - elif isinstance(value, (np.timedelta64, timedelta)): - value = tslibs.Timedelta(value) - - return value - - def is_bool_indexer(key: Any) -> bool: """ Check whether `key` is a valid boolean indexer. @@ -347,23 +331,6 @@ def apply_if_callable(maybe_callable, obj, **kwargs): return maybe_callable -def dict_compat(d): - """ - Helper function to convert datetimelike-keyed dicts - to Timestamp-keyed dict. - - Parameters - ---------- - d: dict like object - - Returns - ------- - dict - - """ - return {maybe_box_datetimelike(key): value for key, value in d.items()} - - def standardize_mapping(into): """ Helper function to standardize a supplied mapping. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index aded0af6aca0e..9758eae60c262 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -7,6 +7,7 @@ from typing import ( TYPE_CHECKING, Any, + Dict, List, Optional, Sequence, @@ -19,7 +20,7 @@ import numpy as np -from pandas._libs import lib, tslib +from pandas._libs import lib, tslib, tslibs from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, @@ -134,6 +135,30 @@ def is_nested_object(obj) -> bool: return False +def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar: + """ + Cast scalar to Timestamp or Timedelta if scalar is datetime-like + and dtype is not object. + + Parameters + ---------- + value : scalar + dtype : Dtype, optional + + Returns + ------- + scalar + """ + if dtype == object: + pass + elif isinstance(value, (np.datetime64, datetime)): + value = tslibs.Timestamp(value) + elif isinstance(value, (np.timedelta64, timedelta)): + value = tslibs.Timedelta(value) + + return value + + def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): """ try to cast to the specified dtype (e.g. convert back to bool/int @@ -791,6 +816,22 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, return dtype, val +def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: + """ + Convert datetimelike-keyed dicts to a Timestamp-keyed dict. + + Parameters + ---------- + d: dict-like object + + Returns + ------- + dict + + """ + return {maybe_box_datetimelike(key): value for key, value in d.items()} + + def infer_dtype_from_array( arr, pandas_dtype: bool = False ) -> Tuple[DtypeObj, ArrayLike]: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ec575a849abe..9d223ba2bab0c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -84,6 +84,7 @@ find_common_type, infer_dtype_from_scalar, invalidate_string_dtypes, + maybe_box_datetimelike, maybe_cast_to_datetime, maybe_casted_values, maybe_convert_platform, @@ -1538,7 +1539,7 @@ def to_dict(self, orient="dict", into=dict): ( "data", [ - list(map(com.maybe_box_datetimelike, t)) + list(map(maybe_box_datetimelike, t)) for t in self.itertuples(index=False, name=None) ], ), @@ -1546,7 +1547,7 @@ def to_dict(self, orient="dict", into=dict): ) elif orient == "series": - return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items()) + return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items()) elif orient == "records": columns = self.columns.tolist() @@ -1555,7 +1556,7 @@ def to_dict(self, orient="dict", into=dict): for row in self.itertuples(index=False, name=None) ) return [ - into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items()) + into_c((k, maybe_box_datetimelike(v)) for k, v in row.items()) for row in rows ] diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 2061e652a4c01..c700acc24f411 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.cast import ( find_common_type, infer_dtype_from_scalar, + maybe_box_datetimelike, maybe_downcast_to_dtype, ) from pandas.core.dtypes.common import ( @@ -1193,8 +1194,8 @@ def interval_range( IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], closed='both', dtype='interval[int64]') """ - start = com.maybe_box_datetimelike(start) - end = com.maybe_box_datetimelike(end) + start = maybe_box_datetimelike(start) + end = maybe_box_datetimelike(end) endpoint = start if start is not None else end if freq is None and com.any_none(periods, start, end): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ee630909cb990..1f34e91d71077 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -19,6 +19,7 @@ find_common_type, infer_dtype_from, infer_dtype_from_scalar, + maybe_box_datetimelike, maybe_downcast_numeric, maybe_downcast_to_dtype, maybe_infer_dtype_type, @@ -843,7 +844,7 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray: if isna(s): return ~mask - s = com.maybe_box_datetimelike(s) + s = maybe_box_datetimelike(s) return compare_or_regex_search(self.values, s, regex, mask) # Calculate the mask once, prior to the call of comp diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index bb8283604abb0..bcafa2c2fdca7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -14,6 +14,7 @@ from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, construct_1d_ndarray_preserving_na, + dict_compat, maybe_cast_to_datetime, maybe_convert_platform, maybe_infer_to_datetimelike, @@ -346,7 +347,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]): oindex = index.astype("O") if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)): - val = com.dict_compat(val) + val = dict_compat(val) else: val = dict(val) val = lib.fast_multiget(val, oindex._values, default=np.nan) diff --git a/pandas/tests/dtypes/cast/test_dict_compat.py b/pandas/tests/dtypes/cast/test_dict_compat.py new file mode 100644 index 0000000000000..13dc82d779f95 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_dict_compat.py @@ -0,0 +1,14 @@ +import numpy as np + +from pandas.core.dtypes.cast import dict_compat + +from pandas import Timestamp + + +def test_dict_compat(): + data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} + data_unchanged = {1: 2, 3: 4, 5: 6} + expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} + assert dict_compat(data_datetime64) == expected + assert dict_compat(expected) == expected + assert dict_compat(data_unchanged) == data_unchanged diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index f1656b46cf356..f8feef7a95eab 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -257,17 +257,30 @@ def test_to_dict_wide(self): assert result == expected def test_to_dict_orient_dtype(self): - # GH#22620 - # Input Data - input_data = {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["X", "Y", "Z"]} - df = DataFrame(input_data) - # Expected Dtypes - expected = {"a": int, "b": float, "c": str} - # Extracting dtypes out of to_dict operation - for df_dict in df.to_dict("records"): - result = { - "a": type(df_dict["a"]), - "b": type(df_dict["b"]), - "c": type(df_dict["c"]), + # GH22620 & GH21256 + + df = DataFrame( + { + "bool": [True, True, False], + "datetime": [ + datetime(2018, 1, 1), + datetime(2019, 2, 2), + datetime(2020, 3, 3), + ], + "float": [1.0, 2.0, 3.0], + "int": [1, 2, 3], + "str": ["X", "Y", "Z"], } + ) + + expected = { + "int": int, + "float": float, + "str": str, + "datetime": Timestamp, + "bool": bool, + } + + for df_dict in df.to_dict("records"): + result = {col: type(df_dict[col]) for col in list(df.columns)} assert result == expected diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 366a1970f6f64..81d866ba63bc0 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -9,7 +9,7 @@ from pandas.compat.numpy import np_version_under1p17 import pandas as pd -from pandas import Series, Timestamp +from pandas import Series import pandas._testing as tm from pandas.core import ops import pandas.core.common as com @@ -109,15 +109,6 @@ def test_maybe_match_name(left, right, expected): assert ops.common._maybe_match_name(left, right) == expected -def test_dict_compat(): - data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} - data_unchanged = {1: 2, 3: 4, 5: 6} - expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} - assert com.dict_compat(data_datetime64) == expected - assert com.dict_compat(expected) == expected - assert com.dict_compat(data_unchanged) == data_unchanged - - def test_standardize_mapping(): # No uninitialized defaultdicts msg = r"to_dict\(\) only accepts initialized defaultdicts" From b3aa7208c5133fe885efd6fb94783ba7f58bb561 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 4 Nov 2020 19:40:03 -0500 Subject: [PATCH 056/147] ENH: memory_map for compressed files (#37621) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/common.py | 135 ++++++++++++++++---------- pandas/io/parsers.py | 20 +--- pandas/tests/io/parser/test_common.py | 44 ++++----- 4 files changed, 109 insertions(+), 91 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5cceb2a9bce8c..690e6b8f725ad 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -230,6 +230,7 @@ Other enhancements - :class:`DatetimeIndex` and :class:`Series` with ``datetime64`` or ``datetime64tz`` dtypes now support ``std`` (:issue:`37436`) - :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`) - :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`) +- :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`) .. _whatsnew_120.api_breaking.python: diff --git a/pandas/io/common.py b/pandas/io/common.py index 90a79e54015c4..910eb23d9a2d0 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -107,6 +107,7 @@ class IOHandles: handle: Buffer created_handles: List[Buffer] = dataclasses.field(default_factory=list) is_wrapped: bool = False + is_mmap: bool = False def close(self) -> None: """ @@ -604,49 +605,49 @@ def get_handle( except ImportError: pass - handles: List[Buffer] = list() - # Windows does not default to utf-8. Set to utf-8 for a consistent behavior if encoding is None: encoding = "utf-8" # Convert pathlib.Path/py.path.local or string - path_or_buf = stringify_path(path_or_buf) - is_path = isinstance(path_or_buf, str) - f = path_or_buf + handle = stringify_path(path_or_buf) compression, compression_args = get_compression_method(compression) - if is_path: - compression = infer_compression(path_or_buf, compression) + compression = infer_compression(handle, compression) - if compression: + # memory mapping needs to be the first step + handle, memory_map, handles = _maybe_memory_map( + handle, memory_map, encoding, mode, errors + ) + is_path = isinstance(handle, str) + if compression: # GZ Compression if compression == "gzip": if is_path: - assert isinstance(path_or_buf, str) - f = gzip.GzipFile(filename=path_or_buf, mode=mode, **compression_args) + assert isinstance(handle, str) + handle = gzip.GzipFile(filename=handle, mode=mode, **compression_args) else: - f = gzip.GzipFile( - fileobj=path_or_buf, # type: ignore[arg-type] + handle = gzip.GzipFile( + fileobj=handle, # type: ignore[arg-type] mode=mode, **compression_args, ) # BZ Compression elif compression == "bz2": - f = bz2.BZ2File( - path_or_buf, mode=mode, **compression_args # type: ignore[arg-type] + handle = bz2.BZ2File( + handle, mode=mode, **compression_args # type: ignore[arg-type] ) # ZIP Compression elif compression == "zip": - f = _BytesZipFile(path_or_buf, mode, **compression_args) - if f.mode == "r": - handles.append(f) - zip_names = f.namelist() + handle = _BytesZipFile(handle, mode, **compression_args) + if handle.mode == "r": + handles.append(handle) + zip_names = handle.namelist() if len(zip_names) == 1: - f = f.open(zip_names.pop()) + handle = handle.open(zip_names.pop()) elif len(zip_names) == 0: raise ValueError(f"Zero files found in ZIP file {path_or_buf}") else: @@ -657,64 +658,52 @@ def get_handle( # XZ Compression elif compression == "xz": - f = get_lzma_file(lzma)(path_or_buf, mode) + handle = get_lzma_file(lzma)(handle, mode) # Unrecognized Compression else: msg = f"Unrecognized compression type: {compression}" raise ValueError(msg) - assert not isinstance(f, str) - handles.append(f) + assert not isinstance(handle, str) + handles.append(handle) elif is_path: # Check whether the filename is to be opened in binary mode. # Binary mode does not support 'encoding' and 'newline'. - is_binary_mode = "b" in mode - assert isinstance(path_or_buf, str) - if encoding and not is_binary_mode: + assert isinstance(handle, str) + if encoding and "b" not in mode: # Encoding - f = open(path_or_buf, mode, encoding=encoding, errors=errors, newline="") + handle = open(handle, mode, encoding=encoding, errors=errors, newline="") else: # Binary mode - f = open(path_or_buf, mode) - handles.append(f) + handle = open(handle, mode) + handles.append(handle) # Convert BytesIO or file objects passed with an encoding is_wrapped = False if is_text and ( compression - or isinstance(f, need_text_wrapping) - or "b" in getattr(f, "mode", "") + or isinstance(handle, need_text_wrapping) + or "b" in getattr(handle, "mode", "") ): - f = TextIOWrapper( - f, encoding=encoding, errors=errors, newline="" # type: ignore[arg-type] + handle = TextIOWrapper( + handle, # type: ignore[arg-type] + encoding=encoding, + errors=errors, + newline="", ) - handles.append(f) + handles.append(handle) # do not mark as wrapped when the user provided a string is_wrapped = not is_path - if memory_map and hasattr(f, "fileno"): - assert not isinstance(f, str) - try: - wrapped = cast(mmap.mmap, _MMapWrapper(f)) # type: ignore[arg-type] - f.close() - handles.remove(f) - handles.append(wrapped) - f = wrapped - except Exception: - # we catch any errors that may have occurred - # because that is consistent with the lower-level - # functionality of the C engine (pd.read_csv), so - # leave the file handler as is then - pass - handles.reverse() # close the most recently added buffer first - assert not isinstance(f, str) + assert not isinstance(handle, str) return IOHandles( - handle=f, + handle=handle, created_handles=handles, is_wrapped=is_wrapped, + is_mmap=memory_map, ) @@ -778,9 +767,16 @@ class _MMapWrapper(abc.Iterator): """ def __init__(self, f: IO): + self.attributes = {} + for attribute in ("seekable", "readable", "writeable"): + if not hasattr(f, attribute): + continue + self.attributes[attribute] = getattr(f, attribute)() self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) def __getattr__(self, name: str): + if name in self.attributes: + return lambda: self.attributes[name] return getattr(self.mmap, name) def __iter__(self) -> "_MMapWrapper": @@ -799,3 +795,42 @@ def __next__(self) -> str: if newline == "": raise StopIteration return newline + + +def _maybe_memory_map( + handle: FileOrBuffer, + memory_map: bool, + encoding: str, + mode: str, + errors: Optional[str], +) -> Tuple[FileOrBuffer, bool, List[Buffer]]: + """Try to use memory map file/buffer.""" + handles: List[Buffer] = [] + memory_map &= hasattr(handle, "fileno") or isinstance(handle, str) + if not memory_map: + return handle, memory_map, handles + + # need to open the file first + if isinstance(handle, str): + if encoding and "b" not in mode: + # Encoding + handle = open(handle, mode, encoding=encoding, errors=errors, newline="") + else: + # Binary mode + handle = open(handle, mode) + handles.append(handle) + + try: + wrapped = cast(mmap.mmap, _MMapWrapper(handle)) # type: ignore[arg-type] + handle.close() + handles.remove(handle) + handles.append(wrapped) + handle = wrapped + except Exception: + # we catch any errors that may have occurred + # because that is consistent with the lower-level + # functionality of the C engine (pd.read_csv), so + # leave the file handler as is then + memory_map = False + + return handle, memory_map, handles diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3b72869188344..e4895d280c241 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -63,13 +63,7 @@ from pandas.core.series import Series from pandas.core.tools import datetimes as tools -from pandas.io.common import ( - get_compression_method, - get_filepath_or_buffer, - get_handle, - stringify_path, - validate_header_arg, -) +from pandas.io.common import get_filepath_or_buffer, get_handle, validate_header_arg from pandas.io.date_converters import generic_parser # BOM character (byte order mark) @@ -1834,16 +1828,6 @@ def __init__(self, src, **kwds): ParserBase.__init__(self, kwds) - if kwds.get("memory_map", False): - # memory-mapped files are directly handled by the TextReader. - src = stringify_path(src) - - if get_compression_method(kwds.get("compression", None))[0] is not None: - raise ValueError( - "read_csv does not support compression with memory_map=True. " - + "Please use memory_map=False instead." - ) - self.handles = get_handle( src, mode="r", @@ -1855,7 +1839,7 @@ def __init__(self, src, **kwds): kwds.pop("encoding", None) kwds.pop("memory_map", None) kwds.pop("compression", None) - if kwds.get("memory_map", False) and hasattr(self.handles.handle, "mmap"): + if self.handles.is_mmap and hasattr(self.handles.handle, "mmap"): self.handles.handle = self.handles.handle.mmap # #2442 diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index e61a5fce99c69..8f63d06859f62 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2275,40 +2275,38 @@ def test_read_csv_file_handle(all_parsers, io_class, encoding): assert not handle.closed -def test_memory_map_compression_error(c_parser_only): +def test_memory_map_file_handle_silent_fallback(all_parsers, compression): """ - c-parsers do not support memory_map=True with compression. + Do not fail for buffers with memory_map=True (cannot memory map BytesIO). - GH 36997 + GH 37621 """ - parser = c_parser_only - df = DataFrame({"a": [1], "b": [2]}) - msg = ( - "read_csv does not support compression with memory_map=True. " - + "Please use memory_map=False instead." - ) + parser = all_parsers + expected = DataFrame({"a": [1], "b": [2]}) - with tm.ensure_clean() as path: - df.to_csv(path, compression="gzip", index=False) + handle = BytesIO() + expected.to_csv(handle, index=False, compression=compression, mode="wb") + handle.seek(0) - with pytest.raises(ValueError, match=msg): - parser.read_csv(path, memory_map=True, compression="gzip") + tm.assert_frame_equal( + parser.read_csv(handle, memory_map=True, compression=compression), + expected, + ) -def test_memory_map_file_handle(all_parsers): +def test_memory_map_compression(all_parsers, compression): """ - Support some buffers with memory_map=True. + Support memory map for compressed files. - GH 36997 + GH 37621 """ parser = all_parsers expected = DataFrame({"a": [1], "b": [2]}) - handle = StringIO() - expected.to_csv(handle, index=False) - handle.seek(0) + with tm.ensure_clean() as path: + expected.to_csv(path, index=False, compression=compression) - tm.assert_frame_equal( - parser.read_csv(handle, memory_map=True), - expected, - ) + tm.assert_frame_equal( + parser.read_csv(path, memory_map=True, compression=compression), + expected, + ) From 3f74c341d36f8166d6abfb920debc972c60b02e5 Mon Sep 17 00:00:00 2001 From: junk Date: Thu, 5 Nov 2020 10:22:23 +0900 Subject: [PATCH 057/147] DOC: add example & prose of slicing with labels when index has duplicate labels (#36814) * DOC: add example & prose of slicing with labels when index has duplicate labels #36251 * DOC: proofread the sentence. Co-authored-by: Jun Kudo --- doc/source/user_guide/indexing.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 4493ddd0b2822..98c981539d207 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -422,6 +422,17 @@ above example, ``s.loc[1:6]`` would raise ``KeyError``. For the rationale behind this behavior, see :ref:`Endpoints are inclusive `. +.. ipython:: python + + s = pd.Series(list('abcdef'), index=[0, 3, 2, 5, 4, 2]) + s.loc[3:5] + +Also, if the index has duplicate labels *and* either the start or the stop label is dupulicated, +an error will be raised. For instance, in the above example, ``s.loc[2:5]`` would raise a ``KeyError``. + +For more information about duplicate labels, see +:ref:`Duplicate Labels `. + .. _indexing.integer: Selection by position From 44bfe10bfac179cd52ebaa5b433e3c53109581cd Mon Sep 17 00:00:00 2001 From: Micael Jarniac Date: Thu, 5 Nov 2020 09:35:46 -0300 Subject: [PATCH 058/147] DOC: Fix typo (#37636) "columns(s)" sounded odd, I believe it was supposed to be just "column(s)". --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9d223ba2bab0c..049d2c4888a69 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6449,7 +6449,7 @@ def update( See Also -------- dict.update : Similar method for dictionaries. - DataFrame.merge : For column(s)-on-columns(s) operations. + DataFrame.merge : For column(s)-on-column(s) operations. Examples -------- @@ -7985,7 +7985,7 @@ def join( See Also -------- - DataFrame.merge : For column(s)-on-columns(s) operations. + DataFrame.merge : For column(s)-on-column(s) operations. Notes ----- From 561b9af68c3ba80e35568e343c325430cace6a0f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Nov 2020 16:09:02 -0800 Subject: [PATCH 059/147] CI: troubleshoot win py38 builds (#37652) --- pandas/_libs/tslibs/tzconversion.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 4c62b16d430bd..4a3fac1954ab7 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -500,9 +500,11 @@ cdef int64_t[:] _tz_convert_from_utc(const int64_t[:] vals, tzinfo tz): return converted +# OSError may be thrown by tzlocal on windows at or close to 1970-01-01 +# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, bint to_utc, - bint *fold=NULL): + bint *fold=NULL) except? -1: """ Calculate offset in nanoseconds needed to convert the i8 representation of a datetime from a tzlocal timezone to UTC, or vice-versa. From 58a257fc265bad416674721e8e23c172f6f26953 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Nov 2020 16:26:13 -0800 Subject: [PATCH 060/147] TST/REF: collect indexing tests by method (#37638) --- pandas/tests/frame/indexing/test_indexing.py | 109 ++------- pandas/tests/frame/indexing/test_setitem.py | 54 +++++ pandas/tests/frame/indexing/test_sparse.py | 37 --- pandas/tests/indexing/test_at.py | 33 ++- pandas/tests/indexing/test_categorical.py | 10 - pandas/tests/indexing/test_datetime.py | 92 +------- pandas/tests/indexing/test_loc.py | 221 +++++++++++++++++- pandas/tests/indexing/test_partial.py | 68 +----- pandas/tests/indexing/test_scalar.py | 22 -- pandas/tests/indexing/test_timedelta.py | 16 -- pandas/tests/series/indexing/test_datetime.py | 134 ++++------- pandas/tests/series/indexing/test_getitem.py | 71 +++++- pandas/tests/series/indexing/test_numeric.py | 11 - pandas/tests/series/indexing/test_setitem.py | 10 + 14 files changed, 455 insertions(+), 433 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9eaa0d0ae6876..4214ac14cba49 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -66,21 +66,6 @@ def test_getitem_dupe_cols(self): with pytest.raises(KeyError, match=re.escape(msg)): df[["baf"]] - @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) - def test_loc_iterable(self, float_frame, key_type): - idx = key_type(["A", "B", "C"]) - result = float_frame.loc[:, idx] - expected = float_frame.loc[:, ["A", "B", "C"]] - tm.assert_frame_equal(result, expected) - - def test_loc_timedelta_0seconds(self): - # GH#10583 - df = DataFrame(np.random.normal(size=(10, 4))) - df.index = pd.timedelta_range(start="0s", periods=10, freq="s") - expected = df.loc[pd.Timedelta("0s") :, :] - result = df.loc["0s":, :] - tm.assert_frame_equal(expected, result) - @pytest.mark.parametrize( "idx_type", [ @@ -125,28 +110,20 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): with pytest.raises(KeyError, match="not in index"): frame[idx] - @pytest.mark.parametrize( - "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] - ) - def test_loc_uint64(self, val, expected): - # see gh-19399 - df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) - result = df.loc[val] - - expected.name = val - tm.assert_series_equal(result, expected) - def test_getitem_callable(self, float_frame): # GH 12533 result = float_frame[lambda x: "A"] - tm.assert_series_equal(result, float_frame.loc[:, "A"]) + expected = float_frame.loc[:, "A"] + tm.assert_series_equal(result, expected) result = float_frame[lambda x: ["A", "B"]] + expected = float_frame.loc[:, ["A", "B"]] tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) df = float_frame[:3] result = df[lambda x: [True, False, True]] - tm.assert_frame_equal(result, float_frame.iloc[[0, 2], :]) + expected = float_frame.iloc[[0, 2], :] + tm.assert_frame_equal(result, expected) def test_setitem_list(self, float_frame): @@ -181,11 +158,6 @@ def test_setitem_list(self, float_frame): expected = Series(["1", "2"], df.columns, name=1) tm.assert_series_equal(result, expected) - def test_setitem_list_not_dataframe(self, float_frame): - data = np.random.randn(len(float_frame), 2) - float_frame[["A", "B"]] = data - tm.assert_almost_equal(float_frame[["A", "B"]].values, data) - def test_setitem_list_of_tuples(self, float_frame): tuples = list(zip(float_frame["A"], float_frame["B"])) float_frame["tuples"] = tuples @@ -273,14 +245,6 @@ def test_setitem_multi_index(self): df[("joe", "last")] = df[("jolie", "first")].loc[i, j] tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) - def test_setitem_callable(self): - # GH 12533 - df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) - df[lambda x: "A"] = [11, 12, 13, 14] - - exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) - tm.assert_frame_equal(df, exp) - def test_setitem_other_callable(self): # GH 13299 def inc(x): @@ -518,18 +482,13 @@ def test_setitem(self, float_frame): df.loc[0] = np.nan tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) - def test_setitem_dtype(self, dtype, float_frame): - arr = np.random.randn(len(float_frame)) - - float_frame[dtype] = np.array(arr, dtype=dtype) - assert float_frame[dtype].dtype.name == dtype - def test_setitem_tuple(self, float_frame): float_frame["A", "B"] = float_frame["A"] - tm.assert_series_equal( - float_frame["A", "B"], float_frame["A"], check_names=False - ) + assert ("A", "B") in float_frame.columns + + result = float_frame["A", "B"] + expected = float_frame["A"] + tm.assert_series_equal(result, expected, check_names=False) def test_setitem_always_copy(self, float_frame): s = float_frame["A"].copy() @@ -588,25 +547,6 @@ def test_setitem_boolean(self, float_frame): np.putmask(expected.values, mask.values, df.values * 2) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize( - "mask_type", - [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], - ids=["dataframe", "array"], - ) - def test_setitem_boolean_mask(self, mask_type, float_frame): - - # Test for issue #18582 - df = float_frame.copy() - mask = mask_type(df) - - # index with boolean mask - result = df.copy() - result[mask] = np.nan - - expected = df.copy() - expected.values[np.array(mask)] = np.nan - tm.assert_frame_equal(result, expected) - def test_setitem_cast(self, float_frame): float_frame["D"] = float_frame["D"].astype("i8") assert float_frame["D"].dtype == np.int64 @@ -821,19 +761,6 @@ def test_getitem_empty_frame_with_boolean(self): df2 = df[df > 0] tm.assert_frame_equal(df, df2) - def test_slice_floats(self): - index = [52195.504153, 52196.303147, 52198.369883] - df = DataFrame(np.random.rand(3, 2), index=index) - - s1 = df.loc[52195.1:52196.5] - assert len(s1) == 2 - - s1 = df.loc[52195.1:52196.6] - assert len(s1) == 2 - - s1 = df.loc[52195.1:52198.9] - assert len(s1) == 3 - def test_getitem_fancy_slice_integers_step(self): df = DataFrame(np.random.randn(10, 5)) @@ -883,15 +810,6 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): assert (float_frame["C"] == 4).all() - def test_setitem_slice_position(self): - # GH#31469 - df = DataFrame(np.zeros((100, 1))) - df[-4:] = 1 - arr = np.zeros((100, 1)) - arr[-4:] = 1 - expected = DataFrame(arr) - tm.assert_frame_equal(df, expected) - def test_getitem_setitem_non_ix_labels(self): df = tm.makeTimeDataFrame() @@ -1000,14 +918,13 @@ def test_getitem_fancy_ints(self, float_frame): expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]] tm.assert_frame_equal(result, expected) - def test_getitem_setitem_fancy_exceptions(self, float_frame): - ix = float_frame.iloc + def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): with pytest.raises(IndexingError, match="Too many indexers"): - ix[:, :, :] + float_frame.iloc[:, :, :] with pytest.raises(IndexError, match="too many indices for array"): # GH#32257 we let numpy do validation, get their exception - ix[:, :, :] = 1 + float_frame.iloc[:, :, :] = 1 def test_getitem_setitem_boolean_misaligned(self, float_frame): # boolean index misaligned labels diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index e1ce10970f07b..cb04a61b9e1cb 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -22,6 +22,18 @@ class TestDataFrameSetItem: + @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) + def test_setitem_dtype(self, dtype, float_frame): + arr = np.random.randn(len(float_frame)) + + float_frame[dtype] = np.array(arr, dtype=dtype) + assert float_frame[dtype].dtype.name == dtype + + def test_setitem_list_not_dataframe(self, float_frame): + data = np.random.randn(len(float_frame), 2) + float_frame[["A", "B"]] = data + tm.assert_almost_equal(float_frame[["A", "B"]].values, data) + def test_setitem_error_msmgs(self): # GH 7432 @@ -285,3 +297,45 @@ def test_iloc_setitem_bool_indexer(self, klass): df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemSlicing: + def test_setitem_slice_position(self): + # GH#31469 + df = DataFrame(np.zeros((100, 1))) + df[-4:] = 1 + arr = np.zeros((100, 1)) + arr[-4:] = 1 + expected = DataFrame(arr) + tm.assert_frame_equal(df, expected) + + +class TestDataFrameSetItemCallable: + def test_setitem_callable(self): + # GH#12533 + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) + df[lambda x: "A"] = [11, 12, 13, 14] + + exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) + tm.assert_frame_equal(df, exp) + + +class TestDataFrameSetItemBooleanMask: + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], + ids=["dataframe", "array"], + ) + def test_setitem_boolean_mask(self, mask_type, float_frame): + + # Test for issue #18582 + df = float_frame.copy() + mask = mask_type(df) + + # index with boolean mask + result = df.copy() + result[mask] = np.nan + + expected = df.copy() + expected.values[np.array(mask)] = np.nan + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py index c0cd7faafb4db..47e4ae1f9f9e1 100644 --- a/pandas/tests/frame/indexing/test_sparse.py +++ b/pandas/tests/frame/indexing/test_sparse.py @@ -1,12 +1,6 @@ -import numpy as np -import pytest - -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm from pandas.arrays import SparseArray -from pandas.core.arrays.sparse import SparseDtype class TestSparseDataFrameIndexing: @@ -23,34 +17,3 @@ def test_getitem_sparse_column(self): result = df.loc[:, "A"] tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) - @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) - @td.skip_if_no_scipy - def test_loc_getitem_from_spmatrix(self, spmatrix_t, dtype): - import scipy.sparse - - spmatrix_t = getattr(scipy.sparse, spmatrix_t) - - # The bug is triggered by a sparse matrix with purely sparse columns. So the - # recipe below generates a rectangular matrix of dimension (5, 7) where all the - # diagonal cells are ones, meaning the last two columns are purely sparse. - rows, cols = 5, 7 - spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) - df = pd.DataFrame.sparse.from_spmatrix(spmatrix) - - # regression test for #34526 - itr_idx = range(2, rows) - result = df.loc[itr_idx].values - expected = spmatrix.toarray()[itr_idx] - tm.assert_numpy_array_equal(result, expected) - - # regression test for #34540 - result = df.loc[itr_idx].dtypes.values - expected = np.full(cols, SparseDtype(dtype, fill_value=0)) - tm.assert_numpy_array_equal(result, expected) - - def test_all_sparse(self): - df = pd.DataFrame({"A": pd.array([0, 0], dtype=pd.SparseDtype("int64"))}) - result = df.loc[[0, 1]] - tm.assert_frame_equal(result, df) diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 9c2d88f1589c2..2e06d8c73d7d1 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -1,14 +1,41 @@ from datetime import datetime, timezone -import pandas as pd +import numpy as np +import pytest + +from pandas import DataFrame import pandas._testing as tm def test_at_timezone(): # https://github.com/pandas-dev/pandas/issues/33544 - result = pd.DataFrame({"foo": [datetime(2000, 1, 1)]}) + result = DataFrame({"foo": [datetime(2000, 1, 1)]}) result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) - expected = pd.DataFrame( + expected = DataFrame( {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object ) tm.assert_frame_equal(result, expected) + + +class TestAtWithDuplicates: + def test_at_with_duplicate_axes_requires_scalar_lookup(self): + # GH#33041 check that falling back to loc doesn't allow non-scalar + # args to slip in + + arr = np.random.randn(6).reshape(3, 2) + df = DataFrame(arr, columns=["A", "A"]) + + msg = "Invalid call for scalar access" + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] + + with pytest.raises(ValueError, match=msg): + df.at[[1, 2]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[1, ["A"]] = 1 + with pytest.raises(ValueError, match=msg): + df.at[:, "A"] = 1 diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 854ca176fd2f4..6cdd73d37aec8 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -73,16 +73,6 @@ def test_loc_scalar(self): with pytest.raises(KeyError, match="^1$"): df.loc[1] - def test_getitem_scalar(self): - - cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) - - s = Series([1, 2], index=cats) - - expected = s.iloc[0] - result = s[cats[0]] - assert result == expected - def test_slicing(self): cat = Series(Categorical([1, 2, 3, 4])) reversed = cat[::-1] diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 4879f805b5a2d..fad3478499929 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -1,6 +1,3 @@ -from datetime import date, datetime, timedelta - -from dateutil import tz import numpy as np import pytest @@ -206,26 +203,6 @@ def test_partial_setting_with_datetimelike_dtype(self): df.loc[mask, "C"] = df.loc[mask].index tm.assert_frame_equal(df, expected) - def test_loc_setitem_datetime(self): - - # GH 9516 - dt1 = Timestamp("20130101 09:00:00") - dt2 = Timestamp("20130101 10:00:00") - - for conv in [ - lambda x: x, - lambda x: x.to_datetime64(), - lambda x: x.to_pydatetime(), - lambda x: np.datetime64(x), - ]: - - df = DataFrame() - df.loc[conv(dt1), "one"] = 100 - df.loc[conv(dt2), "one"] = 200 - - expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) - tm.assert_frame_equal(df, expected) - def test_series_partial_set_datetime(self): # GH 11497 @@ -245,7 +222,8 @@ def test_series_partial_set_datetime(self): exp = Series( [0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s" ) - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + result = ser.loc[keys] + tm.assert_series_equal(result, exp, check_index_type=True) keys = [ Timestamp("2011-01-03"), @@ -273,7 +251,8 @@ def test_series_partial_set_period(self): pd.Period("2011-01-01", freq="D"), ] exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s") - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + result = ser.loc[keys] + tm.assert_series_equal(result, exp, check_index_type=True) keys = [ pd.Period("2011-01-03", freq="D"), @@ -297,33 +276,6 @@ def test_nanosecond_getitem_setitem_with_tz(self): expected = DataFrame(-1, index=index, columns=["a"]) tm.assert_frame_equal(result, expected) - def test_loc_getitem_across_dst(self): - # GH 21846 - idx = pd.date_range( - "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" - ) - series2 = Series([0, 1, 2, 3, 4], index=idx) - - t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min") - t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min") - result = series2.loc[t_1:t_2] - expected = Series([2, 3], index=idx[2:4]) - tm.assert_series_equal(result, expected) - - result = series2[t_1] - expected = 2 - assert result == expected - - def test_loc_incremental_setitem_with_dst(self): - # GH 20724 - base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific")) - idxs = [base + timedelta(seconds=i * 900) for i in range(16)] - result = Series([0], index=[idxs[0]]) - for ts in idxs: - result.loc[ts] = 1 - expected = Series(1, index=idxs) - tm.assert_series_equal(result, expected) - def test_loc_setitem_with_existing_dst(self): # GH 18308 start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") @@ -339,39 +291,3 @@ def test_loc_setitem_with_existing_dst(self): dtype=object, ) tm.assert_frame_equal(result, expected) - - def test_loc_str_slicing(self): - ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") - ser = ix.to_series() - result = ser.loc[:"2017-12"] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) - - def test_loc_label_slicing(self): - ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") - ser = ix.to_series() - result = ser.loc[: ix[-2]] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "slice_, positions", - [ - [slice(date(2018, 1, 1), None), [0, 1, 2]], - [slice(date(2019, 1, 2), None), [2]], - [slice(date(2020, 1, 1), None), []], - [slice(None, date(2020, 1, 1)), [0, 1, 2]], - [slice(None, date(2019, 1, 1)), [0]], - ], - ) - def test_getitem_slice_date(self, slice_, positions): - # https://github.com/pandas-dev/pandas/issues/31501 - s = Series( - [0, 1, 2], - pd.DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), - ) - result = s[slice_] - expected = s.take(positions) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c1a5db992d3df..fff4c0f78f38a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,15 +1,28 @@ """ test label based indexing with loc """ -from datetime import time +from datetime import datetime, time, timedelta from io import StringIO import re +from dateutil.tz import gettz import numpy as np import pytest from pandas.compat.numpy import is_numpy_dev +import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + SparseDtype, + Timedelta, + Timestamp, + date_range, + timedelta_range, + to_datetime, +) import pandas._testing as tm from pandas.api.types import is_scalar from pandas.tests.indexing.common import Base @@ -1014,6 +1027,73 @@ def test_loc_getitem_time_object(self, frame_or_series): expected.index = expected.index._with_freq(None) tm.assert_equal(result, expected) + @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex]) + @td.skip_if_no_scipy + def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype): + import scipy.sparse + + spmatrix_t = getattr(scipy.sparse, spmatrix_t) + + # The bug is triggered by a sparse matrix with purely sparse columns. So the + # recipe below generates a rectangular matrix of dimension (5, 7) where all the + # diagonal cells are ones, meaning the last two columns are purely sparse. + rows, cols = 5, 7 + spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype) + df = DataFrame.sparse.from_spmatrix(spmatrix) + + # regression test for GH#34526 + itr_idx = range(2, rows) + result = df.loc[itr_idx].values + expected = spmatrix.toarray()[itr_idx] + tm.assert_numpy_array_equal(result, expected) + + # regression test for GH#34540 + result = df.loc[itr_idx].dtypes.values + expected = np.full(cols, SparseDtype(dtype, fill_value=0)) + tm.assert_numpy_array_equal(result, expected) + + def test_loc_getitem_listlike_all_retains_sparse(self): + df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))}) + result = df.loc[[0, 1]] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) + def test_loc_getitem_iterable(self, float_frame, key_type): + idx = key_type(["A", "B", "C"]) + result = float_frame.loc[:, idx] + expected = float_frame.loc[:, ["A", "B", "C"]] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_timedelta_0seconds(self): + # GH#10583 + df = DataFrame(np.random.normal(size=(10, 4))) + df.index = timedelta_range(start="0s", periods=10, freq="s") + expected = df.loc[Timedelta("0s") :, :] + result = df.loc["0s":, :] + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] + ) + def test_loc_getitem_uint64_scalar(self, val, expected): + # see GH#19399 + df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) + result = df.loc[val] + + expected.name = val + tm.assert_series_equal(result, expected) + + def test_loc_setitem_int_label_with_float64index(self): + # note labels are floats + ser = Series(["a", "b", "c"], index=[0, 0.5, 1]) + tmp = ser.copy() + + ser.loc[1] = "zoo" + tmp.iloc[2] = "zoo" + + tm.assert_series_equal(ser, tmp) + class TestLocWithMultiIndex: @pytest.mark.parametrize( @@ -1103,6 +1183,11 @@ def test_loc_setitem_multiindex_slice(self): tm.assert_series_equal(result, expected) + def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): + times = date_range("2000-01-01", freq="10min", periods=100000) + ser = Series(range(100000), times) + ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] + class TestLocSetitemWithExpansion: @pytest.mark.slow @@ -1113,6 +1198,59 @@ def test_loc_setitem_with_expansion_large_dataframe(self): expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") tm.assert_frame_equal(result, expected) + def test_loc_setitem_empty_series(self): + # GH#5226 + + # partially set with an empty object series + ser = Series(dtype=object) + ser.loc[1] = 1 + tm.assert_series_equal(ser, Series([1], index=[1])) + ser.loc[3] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=[1, 3])) + + ser = Series(dtype=object) + ser.loc[1] = 1.0 + tm.assert_series_equal(ser, Series([1.0], index=[1])) + ser.loc[3] = 3.0 + tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3])) + + ser = Series(dtype=object) + ser.loc["foo"] = 1 + tm.assert_series_equal(ser, Series([1], index=["foo"])) + ser.loc["bar"] = 3 + tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"])) + ser.loc[3] = 4 + tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3])) + + def test_loc_setitem_incremental_with_dst(self): + # GH#20724 + base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific")) + idxs = [base + timedelta(seconds=i * 900) for i in range(16)] + result = Series([0], index=[idxs[0]]) + for ts in idxs: + result.loc[ts] = 1 + expected = Series(1, index=idxs) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_datetime_keys_cast(self): + # GH#9516 + dt1 = Timestamp("20130101 09:00:00") + dt2 = Timestamp("20130101 10:00:00") + + for conv in [ + lambda x: x, + lambda x: x.to_datetime64(), + lambda x: x.to_pydatetime(), + lambda x: np.datetime64(x), + ]: + + df = DataFrame() + df.loc[conv(dt1), "one"] = 100 + df.loc[conv(dt2), "one"] = 200 + + expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) + tm.assert_frame_equal(df, expected) + class TestLocCallable: def test_frame_loc_getitem_callable(self): @@ -1280,6 +1418,85 @@ def test_frame_loc_setitem_callable(self): tm.assert_frame_equal(res, exp) +class TestPartialStringSlicing: + def test_loc_getitem_partial_string_slicing_datetimeindex(self): + # GH#35509 + df = DataFrame( + {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, + index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), + ) + expected = DataFrame( + {"col1": ["a", "c"], "col2": [1, 3]}, + index=to_datetime(["2020-08-01", "2020-08-05"]), + ) + result = df.loc["2020-08"] + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_periodindex(self): + pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = pi.to_series() + result = ser.loc[:"2017-12"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): + ix = timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[:"1 days"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + +class TestLabelSlicing: + def test_loc_getitem_label_slice_across_dst(self): + # GH#21846 + idx = date_range( + "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" + ) + series2 = Series([0, 1, 2, 3, 4], index=idx) + + t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min") + t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min") + result = series2.loc[t_1:t_2] + expected = Series([2, 3], index=idx[2:4]) + tm.assert_series_equal(result, expected) + + result = series2[t_1] + expected = 2 + assert result == expected + + def test_loc_getitem_label_slice_period(self): + ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = ix.to_series() + result = ser.loc[: ix[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_label_slice_timedelta64(self): + ix = timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[: ix[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_getitem_slice_floats_inexact(self): + index = [52195.504153, 52196.303147, 52198.369883] + df = DataFrame(np.random.rand(3, 2), index=index) + + s1 = df.loc[52195.1:52196.5] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52196.6] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52198.9] + assert len(s1) == 3 + + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 80b7947eb5239..01db937153b3a 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -351,31 +351,6 @@ def test_partial_set_invalid(self): tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" - def test_partial_set_empty_series(self): - - # GH5226 - - # partially set with an empty object series - s = Series(dtype=object) - s.loc[1] = 1 - tm.assert_series_equal(s, Series([1], index=[1])) - s.loc[3] = 3 - tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) - - s = Series(dtype=object) - s.loc[1] = 1.0 - tm.assert_series_equal(s, Series([1.0], index=[1])) - s.loc[3] = 3.0 - tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3])) - - s = Series(dtype=object) - s.loc["foo"] = 1 - tm.assert_series_equal(s, Series([1], index=["foo"])) - s.loc["bar"] = 3 - tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"])) - s.loc[3] = 4 - tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3])) - def test_partial_set_empty_frame(self): # partially set with an empty object @@ -504,10 +479,12 @@ def test_partial_set_empty_frame_set_series(self): # GH 5756 # setting with empty Series df = DataFrame(Series(dtype=object)) - tm.assert_frame_equal(df, DataFrame({0: Series(dtype=object)})) + expected = DataFrame({0: Series(dtype=object)}) + tm.assert_frame_equal(df, expected) df = DataFrame(Series(name="foo", dtype=object)) - tm.assert_frame_equal(df, DataFrame({"foo": Series(dtype=object)})) + expected = DataFrame({"foo": Series(dtype=object)}) + tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_copy_assignment(self): # GH 5932 @@ -565,19 +542,17 @@ def test_partial_set_empty_frame_empty_consistencies(self): ], ) def test_loc_with_list_of_strings_representing_datetimes( - self, idx, labels, expected_idx + self, idx, labels, expected_idx, frame_or_series ): # GH 11278 - s = Series(range(20), index=idx) - df = DataFrame(range(20), index=idx) + obj = frame_or_series(range(20), index=idx) expected_value = [3, 7, 11] - expected_s = Series(expected_value, expected_idx) - expected_df = DataFrame(expected_value, expected_idx) + expected = frame_or_series(expected_value, expected_idx) - tm.assert_series_equal(expected_s, s.loc[labels]) - tm.assert_series_equal(expected_s, s[labels]) - tm.assert_frame_equal(expected_df, df.loc[labels]) + tm.assert_equal(expected, obj.loc[labels]) + if frame_or_series is Series: + tm.assert_series_equal(expected, obj[labels]) @pytest.mark.parametrize( "idx,labels", @@ -651,16 +626,6 @@ def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( with pytest.raises(KeyError, match=msg): df.loc[labels] - def test_indexing_timeseries_regression(self): - # Issue 34860 - arr = date_range("1/1/2008", "1/1/2009") - result = arr.to_series()["2008"] - - rng = date_range(start="2008-01-01", end="2008-12-31") - expected = Series(rng, index=rng) - - tm.assert_series_equal(result, expected) - def test_index_name_empty(self): # GH 31368 df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) @@ -689,16 +654,3 @@ def test_slice_irregular_datetime_index_with_nan(self): expected = DataFrame(range(len(index[:3])), index=index[:3]) result = df["2012-01-01":"2012-01-04"] tm.assert_frame_equal(result, expected) - - def test_slice_datetime_index(self): - # GH35509 - df = DataFrame( - {"col1": ["a", "b", "c"], "col2": [1, 2, 3]}, - index=pd.to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]), - ) - expected = DataFrame( - {"col1": ["a", "c"], "col2": [1, 3]}, - index=pd.to_datetime(["2020-08-01", "2020-08-05"]), - ) - result = df.loc["2020-08"] - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 4337f01ea33e0..72296bb222a5a 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -146,28 +146,6 @@ def test_frame_at_with_duplicate_axes(self): expected = Series([2.0, 2.0], index=["A", "A"], name=1) tm.assert_series_equal(df.iloc[1], expected) - def test_frame_at_with_duplicate_axes_requires_scalar_lookup(self): - # GH#33041 check that falling back to loc doesn't allow non-scalar - # args to slip in - - arr = np.random.randn(6).reshape(3, 2) - df = DataFrame(arr, columns=["A", "A"]) - - msg = "Invalid call for scalar access" - with pytest.raises(ValueError, match=msg): - df.at[[1, 2]] - with pytest.raises(ValueError, match=msg): - df.at[1, ["A"]] - with pytest.raises(ValueError, match=msg): - df.at[:, "A"] - - with pytest.raises(ValueError, match=msg): - df.at[[1, 2]] = 1 - with pytest.raises(ValueError, match=msg): - df.at[1, ["A"]] = 1 - with pytest.raises(ValueError, match=msg): - df.at[:, "A"] = 1 - def test_series_at_raises_type_error(self): # at should not fallback # GH 7814 diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py index 7da368e4bb321..9461bb74b2a87 100644 --- a/pandas/tests/indexing/test_timedelta.py +++ b/pandas/tests/indexing/test_timedelta.py @@ -104,19 +104,3 @@ def test_roundtrip_thru_setitem(self): assert expected == result tm.assert_frame_equal(df, df_copy) - - def test_loc_str_slicing(self): - ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H") - ser = ix.to_series() - result = ser.loc[:"1 days"] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) - - def test_loc_slicing(self): - ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H") - ser = ix.to_series() - result = ser.loc[: ix[-2]] - expected = ser.iloc[:-1] - - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index c25b8936c1b29..b2fc2e2d0619d 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -4,14 +4,23 @@ from datetime import datetime, timedelta import re +from dateutil.tz import gettz, tzutc import numpy as np import pytest +import pytz -from pandas._libs import iNaT -import pandas._libs.index as _index +from pandas._libs import iNaT, index as libindex import pandas as pd -from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + NaT, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm @@ -65,13 +74,6 @@ def test_dti_reset_index_round_trip(): assert df.reset_index()["Date"][0] == stamp -@pytest.mark.slow -def test_slice_locs_indexerror(): - times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] - s = Series(range(100000), times) - s.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] - - def test_slicing_datetimes(): # GH 7523 @@ -114,8 +116,6 @@ def test_slicing_datetimes(): def test_getitem_setitem_datetime_tz_pytz(): - from pytz import timezone as tz - N = 50 # testing with timezone, GH #2785 rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") @@ -134,23 +134,20 @@ def test_getitem_setitem_datetime_tz_pytz(): # repeat with datetimes result = ts.copy() - result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0 - result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4] + result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=pytz.timezone("UTC"))] = ts[4] tm.assert_series_equal(result, ts) result = ts.copy() # comparison dates with datetime MUST be localized! - date = tz("US/Central").localize(datetime(1990, 1, 1, 3)) + date = pytz.timezone("US/Central").localize(datetime(1990, 1, 1, 3)) result[date] = 0 result[date] = ts[4] tm.assert_series_equal(result, ts) def test_getitem_setitem_datetime_tz_dateutil(): - from dateutil.tz import tzutc - - from pandas._libs.tslibs.timezones import dateutil_gettz as gettz tz = ( lambda x: tzutc() if x == "UTC" else gettz(x) @@ -295,7 +292,6 @@ def test_getitem_setitem_datetimeindex(): def test_getitem_setitem_periodindex(): - from pandas import period_range N = 50 rng = period_range("1/1/1990", periods=N, freq="H") @@ -466,72 +462,50 @@ def test_duplicate_dates_indexing(dups): assert ts[datetime(2000, 1, 6)] == 0 -def test_range_slice(): - idx = DatetimeIndex(["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]) - - ts = Series(np.random.randn(len(idx)), index=idx) - - result = ts["1/2/2000":] - expected = ts[1:] - tm.assert_series_equal(result, expected) - - result = ts["1/2/2000":"1/3/2000"] - expected = ts[1:4] - tm.assert_series_equal(result, expected) - - def test_groupby_average_dup_values(dups): result = dups.groupby(level=0).mean() expected = dups.groupby(dups.index).mean() tm.assert_series_equal(result, expected) -def test_indexing_over_size_cutoff(): - import datetime - +def test_indexing_over_size_cutoff(monkeypatch): # #1821 - old_cutoff = _index._SIZE_CUTOFF - try: - _index._SIZE_CUTOFF = 1000 - - # create large list of non periodic datetime - dates = [] - sec = datetime.timedelta(seconds=1) - half_sec = datetime.timedelta(microseconds=500000) - d = datetime.datetime(2011, 12, 5, 20, 30) - n = 1100 - for i in range(n): - dates.append(d) - dates.append(d + sec) - dates.append(d + sec + half_sec) - dates.append(d + sec + sec + half_sec) - d += 3 * sec - - # duplicate some values in the list - duplicate_positions = np.random.randint(0, len(dates) - 1, 20) - for p in duplicate_positions: - dates[p + 1] = dates[p] - - df = DataFrame( - np.random.randn(len(dates), 4), index=dates, columns=list("ABCD") - ) - - pos = n * 3 - timestamp = df.index[pos] - assert timestamp in df.index - - # it works! - df.loc[timestamp] - assert len(df.loc[[timestamp]]) > 0 - finally: - _index._SIZE_CUTOFF = old_cutoff + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000) + + # create large list of non periodic datetime + dates = [] + sec = timedelta(seconds=1) + half_sec = timedelta(microseconds=500000) + d = datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD")) + + pos = n * 3 + timestamp = df.index[pos] + assert timestamp in df.index + + # it works! + df.loc[timestamp] + assert len(df.loc[[timestamp]]) > 0 def test_indexing_over_size_cutoff_period_index(monkeypatch): # GH 27136 - monkeypatch.setattr(_index, "_SIZE_CUTOFF", 1000) + monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000) n = 1100 idx = pd.period_range("1/1/2000", freq="T", periods=n) @@ -654,19 +628,3 @@ def test_indexing(): msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)" with pytest.raises(KeyError, match=msg): df[df.index[2]] - - -""" -test NaT support -""" - - -def test_setitem_tuple_with_datetimetz(): - # GH 20441 - arr = date_range("2017", periods=4, tz="US/Eastern") - index = [(0, 1), (0, 2), (0, 3), (0, 4)] - result = Series(arr, index=index) - expected = result.copy() - result[(0, 1)] = np.nan - expected.iloc[0] = np.nan - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 2933983a5b18b..71bcce12796f5 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -1,7 +1,7 @@ """ Series.__getitem__ test classes are organized by the type of key passed. """ -from datetime import datetime, time +from datetime import date, datetime, time import numpy as np import pytest @@ -9,7 +9,16 @@ from pandas._libs.tslibs import conversion, timezones import pandas as pd -from pandas import DataFrame, Index, Series, Timestamp, date_range, period_range +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -93,8 +102,46 @@ def test_getitem_time_object(self): result.index = result.index._with_freq(None) tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ + # Series with CategoricalIndex + + def test_getitem_scalar_categorical_index(self): + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + ser = Series([1, 2], index=cats) + + expected = ser.iloc[0] + result = ser[cats[0]] + assert result == expected + class TestSeriesGetitemSlices: + def test_getitem_partial_str_slice_with_datetimeindex(self): + # GH#34860 + arr = date_range("1/1/2008", "1/1/2009") + ser = arr.to_series() + result = ser["2008"] + + rng = date_range(start="2008-01-01", end="2008-12-31") + expected = Series(rng, index=rng) + + tm.assert_series_equal(result, expected) + + def test_getitem_slice_strings_with_datetimeindex(self): + idx = DatetimeIndex( + ["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"] + ) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["1/2/2000":] + expected = ts[1:] + tm.assert_series_equal(result, expected) + + result = ts["1/2/2000":"1/3/2000"] + expected = ts[1:4] + tm.assert_series_equal(result, expected) + def test_getitem_slice_2d(self, datetime_series): # GH#30588 multi-dimensional indexing deprecated @@ -119,6 +166,26 @@ def test_getitem_median_slice_bug(self): expected = s[indexer[0]] tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( + "slc, positions", + [ + [slice(date(2018, 1, 1), None), [0, 1, 2]], + [slice(date(2019, 1, 2), None), [2]], + [slice(date(2020, 1, 1), None), []], + [slice(None, date(2020, 1, 1)), [0, 1, 2]], + [slice(None, date(2019, 1, 1)), [0]], + ], + ) + def test_getitem_slice_date(self, slc, positions): + # https://github.com/pandas-dev/pandas/issues/31501 + ser = Series( + [0, 1, 2], + DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), + ) + result = ser[slc] + expected = ser.take(positions) + tm.assert_series_equal(result, expected) + class TestSeriesGetitemListLike: @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index f35f1375732cb..86af29eac1bae 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -71,17 +71,6 @@ def test_getitem_setitem_slice_integers(): assert not (s[4:] == 0).any() -def test_setitem_float_labels(): - # note labels are floats - s = Series(["a", "b", "c"], index=[0, 0.5, 1]) - tmp = s.copy() - - s.loc[1] = "zoo" - tmp.iloc[2] = "zoo" - - tm.assert_series_equal(s, tmp) - - def test_slice_float_get_set(datetime_series): msg = ( "cannot do slice indexing on DatetimeIndex with these indexers " diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index b4c5ac0195d26..7e25e5200d610 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -61,6 +61,16 @@ def test_setitem_with_different_tz_casts_to_object(self): ) tm.assert_series_equal(ser, expected) + def test_setitem_tuple_with_datetimetz_values(self): + # GH#20441 + arr = date_range("2017", periods=4, tz="US/Eastern") + index = [(0, 1), (0, 2), (0, 3), (0, 4)] + result = Series(arr, index=index) + expected = result.copy() + result[(0, 1)] = np.nan + expected.iloc[0] = np.nan + tm.assert_series_equal(result, expected) + class TestSetitemPeriodDtype: @pytest.mark.parametrize("na_val", [None, np.nan]) From d15e552b086af57406237634d9af90e72ac48fee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Nov 2020 16:26:51 -0800 Subject: [PATCH 061/147] TST/REF: collect tests for get_numeric_data (#37634) * misplaced loc test * TST/REF: collect get_numeric_data tests --- .../frame/methods/test_get_numeric_data.py | 96 +++++++++++++++++++ pandas/tests/frame/test_block_internals.py | 85 +--------------- pandas/tests/generic/test_frame.py | 8 -- pandas/tests/generic/test_series.py | 22 +---- pandas/tests/indexing/test_loc.py | 5 + pandas/tests/internals/test_internals.py | 6 -- .../series/methods/test_get_numeric_data.py | 25 +++++ 7 files changed, 128 insertions(+), 119 deletions(-) create mode 100644 pandas/tests/frame/methods/test_get_numeric_data.py create mode 100644 pandas/tests/series/methods/test_get_numeric_data.py diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py new file mode 100644 index 0000000000000..d73dbdf045be3 --- /dev/null +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -0,0 +1,96 @@ +import numpy as np + +from pandas import Categorical, DataFrame, Index, Series, Timestamp +import pandas._testing as tm +from pandas.core.arrays import IntervalArray, integer_array + + +class TestGetNumericData: + def test_get_numeric_data_preserve_dtype(self): + # get the numeric data + obj = DataFrame({"A": [1, "2", 3.0]}) + result = obj._get_numeric_data() + expected = DataFrame(index=[0, 1, 2], dtype=object) + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data(self): + + datetime64name = np.dtype("M8[ns]").name + objectname = np.dtype(np.object_).name + + df = DataFrame( + {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [ + np.dtype("float64"), + np.dtype("int64"), + np.dtype(objectname), + np.dtype(datetime64name), + ], + index=["a", "b", "c", "f"], + ) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "d": np.array([1.0] * 10, dtype="float32"), + "e": np.array([1] * 10, dtype="int32"), + "f": np.array([1] * 10, dtype="int16"), + "g": Timestamp("20010102"), + }, + index=np.arange(10), + ) + + result = df._get_numeric_data() + expected = df.loc[:, ["a", "b", "d", "e", "f"]] + tm.assert_frame_equal(result, expected) + + only_obj = df.loc[:, ["c", "g"]] + result = only_obj._get_numeric_data() + expected = df.loc[:, []] + tm.assert_frame_equal(result, expected) + + df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]}) + result = df._get_numeric_data() + expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]}) + tm.assert_frame_equal(result, expected) + + df = result.copy() + result = df._get_numeric_data() + expected = df + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data_mixed_dtype(self): + # numeric and object columns + + df = DataFrame( + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["foo", "bar", "baz"], + "d": [None, None, None], + "e": [3.14, 0.577, 2.773], + } + ) + result = df._get_numeric_data() + tm.assert_index_equal(result.columns, Index(["a", "b", "e"])) + + def test_get_numeric_data_extension_dtype(self): + # GH#22290 + df = DataFrame( + { + "A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), + "B": Categorical(list("abcabc")), + "C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), + "D": IntervalArray.from_breaks(range(7)), + } + ) + result = df._get_numeric_data() + expected = df.loc[:, ["A", "C"]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 34aa11eb76306..5513262af8100 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -16,7 +16,6 @@ option_context, ) import pandas._testing as tm -from pandas.core.arrays import IntervalArray, integer_array from pandas.core.internals import ObjectBlock from pandas.core.internals.blocks import IntBlock @@ -306,73 +305,6 @@ def test_is_mixed_type(self, float_frame, float_string_frame): assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type - def test_get_numeric_data(self): - - datetime64name = np.dtype("M8[ns]").name - objectname = np.dtype(np.object_).name - - df = DataFrame( - {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, - index=np.arange(10), - ) - result = df.dtypes - expected = Series( - [ - np.dtype("float64"), - np.dtype("int64"), - np.dtype(objectname), - np.dtype(datetime64name), - ], - index=["a", "b", "c", "f"], - ) - tm.assert_series_equal(result, expected) - - df = DataFrame( - { - "a": 1.0, - "b": 2, - "c": "foo", - "d": np.array([1.0] * 10, dtype="float32"), - "e": np.array([1] * 10, dtype="int32"), - "f": np.array([1] * 10, dtype="int16"), - "g": Timestamp("20010102"), - }, - index=np.arange(10), - ) - - result = df._get_numeric_data() - expected = df.loc[:, ["a", "b", "d", "e", "f"]] - tm.assert_frame_equal(result, expected) - - only_obj = df.loc[:, ["c", "g"]] - result = only_obj._get_numeric_data() - expected = df.loc[:, []] - tm.assert_frame_equal(result, expected) - - df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]}) - result = df._get_numeric_data() - expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]}) - tm.assert_frame_equal(result, expected) - - df = result.copy() - result = df._get_numeric_data() - expected = df - tm.assert_frame_equal(result, expected) - - def test_get_numeric_data_extension_dtype(self): - # GH 22290 - df = DataFrame( - { - "A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), - "B": Categorical(list("abcabc")), - "C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), - "D": IntervalArray.from_breaks(range(7)), - } - ) - result = df._get_numeric_data() - expected = df.loc[:, ["A", "C"]] - tm.assert_frame_equal(result, expected) - def test_stale_cached_series_bug_473(self): # this is chained, but ok @@ -390,21 +322,6 @@ def test_stale_cached_series_bug_473(self): exp = Y["g"].sum() # noqa assert pd.isna(Y["g"]["c"]) - def test_get_X_columns(self): - # numeric and object columns - - df = DataFrame( - { - "a": [1, 2, 3], - "b": [True, False, True], - "c": ["foo", "bar", "baz"], - "d": [None, None, None], - "e": [3.14, 0.577, 2.773], - } - ) - - tm.assert_index_equal(df._get_numeric_data().columns, pd.Index(["a", "b", "e"])) - def test_strange_column_corruption_issue(self): # FIXME: dont leave commented-out # (wesm) Unclear how exactly this is related to internal matters @@ -458,7 +375,7 @@ def test_update_inplace_sets_valid_block_values(): df["a"].fillna(1, inplace=True) # check we havent put a Series into any block.values - assert isinstance(df._mgr.blocks[0].values, pd.Categorical) + assert isinstance(df._mgr.blocks[0].values, Categorical) # smoketest for OP bug from GH#35731 assert df.isnull().sum().sum() == 0 diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index da02a82890adc..757f71730819d 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -61,14 +61,6 @@ def test_nonzero_single_element(self): with pytest.raises(ValueError, match=msg): bool(df) - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = DataFrame({"A": [1, "2", 3.0]}) - result = o._get_numeric_data() - expected = DataFrame(index=[0, 1, 2], dtype=object) - self._compare(result, expected) - def test_metadata_propagation_indiv_groupby(self): # groupby df = DataFrame( diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 0a05a42f0fc39..474661e0f2e0a 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -35,31 +35,11 @@ def test_set_axis_name_raises(self): with pytest.raises(ValueError, match=msg): s._set_axis_name(name="a", axis=1) - def test_get_numeric_data_preserve_dtype(self): - - # get the numeric data - o = Series([1, 2, 3]) - result = o._get_numeric_data() - self._compare(result, o) - - o = Series([1, "2", 3.0]) - result = o._get_numeric_data() - expected = Series([], dtype=object, index=pd.Index([], dtype=object)) - self._compare(result, expected) - - o = Series([True, False, True]) - result = o._get_numeric_data() - self._compare(result, o) - + def test_get_bool_data_preserve_dtype(self): o = Series([True, False, True]) result = o._get_bool_data() self._compare(result, o) - o = Series(date_range("20130101", periods=3)) - result = o._get_numeric_data() - expected = Series([], dtype="M8[ns]", index=pd.Index([], dtype=object)) - self._compare(result, expected) - def test_nonzero_single_element(self): # allow single item via bool method diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index fff4c0f78f38a..3b0fae537a6e5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -132,6 +132,11 @@ def test_setitem_from_duplicate_axis(self): class TestLoc2: # TODO: better name, just separating out things that rely on base class + def test_loc_getitem_missing_unicode_key(self): + df = DataFrame({"a": [1]}) + with pytest.raises(KeyError, match="\u05d0"): + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError + def test_loc_getitem_dups(self): # GH 5678 # repeated getitems on a dup index returning a ndarray diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index bddc50a3cbcc1..88b91ecc79060 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1146,12 +1146,6 @@ def test_make_block_no_pandas_array(): assert result.is_extension is False -def test_missing_unicode_key(): - df = DataFrame({"a": [1]}) - with pytest.raises(KeyError, match="\u05d0"): - df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError - - def test_single_block_manager_fastpath_deprecated(): # GH#33092 ser = Series(range(3)) diff --git a/pandas/tests/series/methods/test_get_numeric_data.py b/pandas/tests/series/methods/test_get_numeric_data.py new file mode 100644 index 0000000000000..dc0becf46a24c --- /dev/null +++ b/pandas/tests/series/methods/test_get_numeric_data.py @@ -0,0 +1,25 @@ +from pandas import Index, Series, date_range +import pandas._testing as tm + + +class TestGetNumericData: + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + obj = Series([1, 2, 3]) + result = obj._get_numeric_data() + tm.assert_series_equal(result, obj) + + obj = Series([1, "2", 3.0]) + result = obj._get_numeric_data() + expected = Series([], dtype=object, index=Index([], dtype=object)) + tm.assert_series_equal(result, expected) + + obj = Series([True, False, True]) + result = obj._get_numeric_data() + tm.assert_series_equal(result, obj) + + obj = Series(date_range("20130101", periods=3)) + result = obj._get_numeric_data() + expected = Series([], dtype="M8[ns]", index=Index([], dtype=object)) + tm.assert_series_equal(result, expected) From 86fbacee530abf0a4f73d75ce3630c00575f400b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 5 Nov 2020 16:31:06 -0800 Subject: [PATCH 062/147] REF: de-duplicate _validate_insert_value with _validate_scalar (#37640) --- pandas/core/arrays/_mixins.py | 2 +- pandas/core/arrays/categorical.py | 5 ++-- pandas/core/arrays/datetimelike.py | 36 ++++++++++++++++++++--------- pandas/core/arrays/interval.py | 3 --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/category.py | 2 +- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/extension.py | 2 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- 10 files changed, 35 insertions(+), 25 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 67ac2a3688214..63c414d96c8de 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -45,7 +45,7 @@ def _box_func(self, x): """ return x - def _validate_insert_value(self, value): + def _validate_scalar(self, value): # used by NDArrayBackedExtensionIndex.insert raise AbstractMethodError(self) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 626fb495dec03..edbf24ca87f5c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1177,9 +1177,6 @@ def map(self, mapper): # ------------------------------------------------------------- # Validators; ideally these can be de-duplicated - def _validate_insert_value(self, value) -> int: - return self._validate_fill_value(value) - def _validate_searchsorted_value(self, value): # searchsorted is very performance sensitive. By converting codes # to same dtype as self.codes, we get much faster performance. @@ -1219,6 +1216,8 @@ def _validate_fill_value(self, fill_value): ) return fill_value + _validate_scalar = _validate_fill_value + # ------------------------------------------------------------- def __array__(self, dtype=None) -> np.ndarray: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 404511895ddf0..7a0d88f29b9b0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -479,10 +479,12 @@ def _validate_fill_value(self, fill_value): f"Got '{str(fill_value)}'." ) try: - fill_value = self._validate_scalar(fill_value) + return self._validate_scalar(fill_value) except TypeError as err: + if "Cannot compare tz-naive and tz-aware" in str(err): + # tzawareness-compat + raise raise ValueError(msg) from err - return self._unbox(fill_value, setitem=True) def _validate_shift_value(self, fill_value): # TODO(2.0): once this deprecation is enforced, use _validate_fill_value @@ -511,7 +513,14 @@ def _validate_shift_value(self, fill_value): return self._unbox(fill_value, setitem=True) - def _validate_scalar(self, value, allow_listlike: bool = False): + def _validate_scalar( + self, + value, + *, + allow_listlike: bool = False, + setitem: bool = True, + unbox: bool = True, + ): """ Validate that the input value can be cast to our scalar_type. @@ -521,6 +530,11 @@ def _validate_scalar(self, value, allow_listlike: bool = False): allow_listlike: bool, default False When raising an exception, whether the message should say listlike inputs are allowed. + setitem : bool, default True + Whether to check compatibility with setitem strictness. + unbox : bool, default True + Whether to unbox the result before returning. Note: unbox=False + skips the setitem compatibility check. Returns ------- @@ -546,7 +560,12 @@ def _validate_scalar(self, value, allow_listlike: bool = False): msg = self._validation_error_message(value, allow_listlike) raise TypeError(msg) - return value + if not unbox: + # NB: In general NDArrayBackedExtensionArray will unbox here; + # this option exists to prevent a performance hit in + # TimedeltaIndex.get_loc + return value + return self._unbox_scalar(value, setitem=setitem) def _validation_error_message(self, value, allow_listlike: bool = False) -> str: """ @@ -611,7 +630,7 @@ def _validate_listlike(self, value, allow_object: bool = False): def _validate_searchsorted_value(self, value): if not is_list_like(value): - value = self._validate_scalar(value, True) + return self._validate_scalar(value, allow_listlike=True, setitem=False) else: value = self._validate_listlike(value) @@ -621,12 +640,7 @@ def _validate_setitem_value(self, value): if is_list_like(value): value = self._validate_listlike(value) else: - value = self._validate_scalar(value, True) - - return self._unbox(value, setitem=True) - - def _validate_insert_value(self, value): - value = self._validate_scalar(value) + return self._validate_scalar(value, allow_listlike=True) return self._unbox(value, setitem=True) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index f8ece2a9fe7d4..7b10334804ef9 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -889,9 +889,6 @@ def _validate_fillna_value(self, value): ) raise TypeError(msg) from err - def _validate_insert_value(self, value): - return self._validate_scalar(value) - def _validate_setitem_value(self, value): needs_float_conversion = False diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 98ec3b55e65d9..f350e18198057 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2292,7 +2292,7 @@ def fillna(self, value=None, downcast=None): DataFrame.fillna : Fill NaN values of a DataFrame. Series.fillna : Fill NaN Values of a Series. """ - value = self._validate_scalar(value) + value = self._require_scalar(value) if self.hasnans: result = self.putmask(self._isnan, value) if downcast is None: @@ -4140,7 +4140,7 @@ def _validate_fill_value(self, value): return value @final - def _validate_scalar(self, value): + def _require_scalar(self, value): """ Check that this is a scalar value that we can use for setitem-like operations without changing dtype. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 8cbd0d83c78d7..525c41bae8b51 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -382,7 +382,7 @@ def astype(self, dtype, copy=True): @doc(Index.fillna) def fillna(self, value, downcast=None): - value = self._validate_scalar(value) + value = self._require_scalar(value) cat = self._data.fillna(value) return type(self)._simple_new(cat, name=self.name) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 9e2ac6013cb43..2cb66557b3bab 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -581,7 +581,7 @@ def _get_insert_freq(self, loc, item): """ Find the `freq` for self.insert(loc, item). """ - value = self._data._validate_insert_value(item) + value = self._data._validate_scalar(item) item = self._data._box_func(value) freq = None diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index cd1871e4687f3..921c7aac2c85b 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -335,7 +335,7 @@ def insert(self, loc: int, item): ValueError if the item is not valid for this dtype. """ arr = self._data - code = arr._validate_insert_value(item) + code = arr._validate_scalar(item) new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:])) new_arr = arr._from_backing_data(new_vals) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index c700acc24f411..2aec86c9cdfae 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -903,7 +903,7 @@ def insert(self, loc, item): ------- IntervalIndex """ - left_insert, right_insert = self._data._validate_insert_value(item) + left_insert, right_insert = self._data._validate_scalar(item) new_left = self.left.insert(loc, left_insert) new_right = self.right.insert(loc, right_insert) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 66fd6943de721..cf5fa4bbb3d75 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -215,7 +215,7 @@ def get_loc(self, key, method=None, tolerance=None): raise InvalidIndexError(key) try: - key = self._data._validate_scalar(key) + key = self._data._validate_scalar(key, unbox=False) except TypeError as err: raise KeyError(key) from err From ab422cf31935461948ca5ae40604a54a4a6c329e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 6 Nov 2020 17:24:27 -0800 Subject: [PATCH 063/147] CI: catch windows py38 OSError (#37659) --- pandas/_libs/tslibs/tzconversion.pxd | 4 +- pandas/_libs/tslibs/tzconversion.pyx | 8 +++- pandas/tests/frame/test_reductions.py | 8 ++++ pandas/tests/indexes/datetimes/test_ops.py | 40 +++++++++++--------- pandas/tests/tseries/offsets/test_offsets.py | 4 ++ 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 1990afd77a8fb..3666d00707ac8 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -2,7 +2,9 @@ from cpython.datetime cimport tzinfo from numpy cimport int64_t -cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=*) +cdef int64_t tz_convert_utc_to_tzlocal( + int64_t utc_val, tzinfo tz, bint* fold=* +) except? -1 cpdef int64_t tz_convert_from_utc_single(int64_t val, tzinfo tz) cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 4a3fac1954ab7..f08a86b1262e6 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -355,7 +355,9 @@ cdef inline str _render_tstamp(int64_t val): # ---------------------------------------------------------------------- # Timezone Conversion -cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz, bint* fold=NULL): +cdef int64_t tz_convert_utc_to_tzlocal( + int64_t utc_val, tzinfo tz, bint* fold=NULL +) except? -1: """ Parameters ---------- @@ -549,8 +551,10 @@ cdef inline int64_t _tzlocal_get_offset_components(int64_t val, tzinfo tz, return int(td.total_seconds() * 1_000_000_000) +# OSError may be thrown by tzlocal on windows at or close to 1970-01-01 +# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True, - bint* fold=NULL): + bint* fold=NULL) except? -1: """ Convert the i8 representation of a datetime from a tzlocal timezone to UTC, or vice-versa. diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index fbb51b70d34fd..374d185f45844 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1,9 +1,11 @@ from datetime import timedelta from decimal import Decimal +from dateutil.tz import tzlocal import numpy as np import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas as pd @@ -1172,6 +1174,12 @@ def test_min_max_dt64_with_NaT(self): def test_min_max_dt64_with_NaT_skipna_false(self, tz_naive_fixture): # GH#36907 tz = tz_naive_fixture + if isinstance(tz, tzlocal) and is_platform_windows(): + pytest.xfail( + reason="GH#37659 OSError raised within tzlocal bc Windows " + "chokes in times before 1970-01-01" + ) + df = DataFrame( { "a": [ diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 1d64fde103e9e..0359ee17f87c5 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -1,8 +1,11 @@ from datetime import datetime +from dateutil.tz import tzlocal import numpy as np import pytest +from pandas.compat import IS64 + import pandas as pd from pandas import ( DateOffset, @@ -106,24 +109,27 @@ def test_repeat(self, tz_naive_fixture): with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) - def test_resolution(self, tz_naive_fixture): + @pytest.mark.parametrize( + "freq,expected", + [ + ("A", "day"), + ("Q", "day"), + ("M", "day"), + ("D", "day"), + ("H", "hour"), + ("T", "minute"), + ("S", "second"), + ("L", "millisecond"), + ("U", "microsecond"), + ], + ) + def test_resolution(self, tz_naive_fixture, freq, expected): tz = tz_naive_fixture - for freq, expected in zip( - ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], - [ - "day", - "day", - "day", - "day", - "hour", - "minute", - "second", - "millisecond", - "microsecond", - ], - ): - idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) - assert idx.resolution == expected + if freq == "A" and not IS64 and isinstance(tz, tzlocal): + pytest.xfail(reason="OverflowError inside tzlocal past 2038") + + idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) + assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index fba123e47feb2..fca1316493e85 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1,6 +1,7 @@ from datetime import date, datetime, time as dt_time, timedelta from typing import Dict, List, Optional, Tuple, Type +from dateutil.tz import tzlocal import numpy as np import pytest @@ -14,6 +15,7 @@ import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import ApplyTypeError, _get_offset, _offset_map from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG +from pandas.compat import IS64 from pandas.compat.numpy import np_datetime64_compat from pandas.errors import PerformanceWarning @@ -129,6 +131,8 @@ def test_apply_out_of_range(self, tz_naive_fixture): tz = tz_naive_fixture if self._offset is None: return + if isinstance(tz, tzlocal) and not IS64: + pytest.xfail(reason="OverflowError inside tzlocal past 2038") # try to create an out-of-bounds result timestamp; if we can't create # the offset skip From f7579f636c4ab7ca92128cc2930515e9dfb08c87 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 6 Nov 2020 17:30:19 -0800 Subject: [PATCH 064/147] share test (#37679) --- pandas/tests/frame/test_api.py | 38 ++++++++++++++--------- pandas/tests/series/test_api.py | 53 --------------------------------- 2 files changed, 24 insertions(+), 67 deletions(-) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 25d3fab76ca36..157c8687808b3 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -263,12 +263,16 @@ def _check_f(base, f): @async_mark() @td.check_file_leaks - async def test_tab_complete_warning(self, ip): + async def test_tab_complete_warning(self, ip, frame_or_series): # GH 16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter - code = "from pandas import DataFrame; df = DataFrame()" + if frame_or_series is DataFrame: + code = "from pandas import DataFrame; obj = DataFrame()" + else: + code = "from pandas import Series; obj = Series(dtype=object)" + await ip.run_code(code) # TODO: remove it when Ipython updates @@ -283,7 +287,7 @@ async def test_tab_complete_warning(self, ip): ) with warning: with provisionalcompleter("ignore"): - list(ip.Completer.completions("df.", 1)) + list(ip.Completer.completions("obj.", 1)) def test_attrs(self): df = DataFrame({"A": [2, 3]}) @@ -294,9 +298,15 @@ def test_attrs(self): assert result.attrs == {"version": 1} @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) - def test_set_flags(self, allows_duplicate_labels): - df = DataFrame({"A": [1, 2]}) - result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels) + def test_set_flags(self, allows_duplicate_labels, frame_or_series): + obj = DataFrame({"A": [1, 2]}) + key = (0, 0) + if frame_or_series is Series: + obj = obj["A"] + key = 0 + + result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) + if allows_duplicate_labels is None: # We don't update when it's not provided assert result.flags.allows_duplicate_labels is True @@ -304,21 +314,21 @@ def test_set_flags(self, allows_duplicate_labels): assert result.flags.allows_duplicate_labels is allows_duplicate_labels # We made a copy - assert df is not result + assert obj is not result - # We didn't mutate df - assert df.flags.allows_duplicate_labels is True + # We didn't mutate obj + assert obj.flags.allows_duplicate_labels is True # But we didn't copy data - result.iloc[0, 0] = 0 - assert df.iloc[0, 0] == 0 + result.iloc[key] = 0 + assert obj.iloc[key] == 0 # Now we do copy. - result = df.set_flags( + result = obj.set_flags( copy=True, allows_duplicate_labels=allows_duplicate_labels ) - result.iloc[0, 0] = 10 - assert df.iloc[0, 0] == 0 + result.iloc[key] = 10 + assert obj.iloc[key] == 0 @skip_if_no("jinja2") def test_constructor_expanddim_lookup(self): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 717d8b5c90d85..beace074894a8 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -3,9 +3,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td -from pandas.util._test_decorators import async_mark - import pandas as pd from pandas import DataFrame, Index, Series, Timedelta, Timestamp, date_range import pandas._testing as tm @@ -216,30 +213,6 @@ def test_empty_method(self): for full_series in [Series([1]), s2]: assert not full_series.empty - @async_mark() - @td.check_file_leaks - async def test_tab_complete_warning(self, ip): - # https://github.com/pandas-dev/pandas/issues/16409 - pytest.importorskip("IPython", minversion="6.0.0") - from IPython.core.completer import provisionalcompleter - - code = "import pandas as pd; s = Series(dtype=object)" - await ip.run_code(code) - - # TODO: remove it when Ipython updates - # GH 33567, jedi version raises Deprecation warning in Ipython - import jedi - - if jedi.__version__ < "0.17.0": - warning = tm.assert_produces_warning(None) - else: - warning = tm.assert_produces_warning( - DeprecationWarning, check_stacklevel=False - ) - with warning: - with provisionalcompleter("ignore"): - list(ip.Completer.completions("s.", 1)) - def test_integer_series_size(self): # GH 25580 s = Series(range(9)) @@ -253,29 +226,3 @@ def test_attrs(self): s.attrs["version"] = 1 result = s + 1 assert result.attrs == {"version": 1} - - @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) - def test_set_flags(self, allows_duplicate_labels): - df = Series([1, 2]) - result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels) - if allows_duplicate_labels is None: - # We don't update when it's not provided - assert result.flags.allows_duplicate_labels is True - else: - assert result.flags.allows_duplicate_labels is allows_duplicate_labels - - # We made a copy - assert df is not result - # We didn't mutate df - assert df.flags.allows_duplicate_labels is True - - # But we didn't copy data - result.iloc[0] = 0 - assert df.iloc[0] == 0 - - # Now we do copy. - result = df.set_flags( - copy=True, allows_duplicate_labels=allows_duplicate_labels - ) - result.iloc[0] = 10 - assert df.iloc[0] == 0 From 05313b3071917ce2bd212c82ba2aa5b5fc01075a Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Sat, 7 Nov 2020 21:43:52 +0700 Subject: [PATCH 065/147] TST: match matplotlib warning message (#37666) * TST: match matplotlib warning message * TST: match full message --- pandas/tests/plotting/frame/test_frame.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 9aab765dca96b..f2d2203d25b6c 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1541,11 +1541,11 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared + msg = ( + "To output multiple subplots, " + "the figure containing the passed axes is being cleared" + ) + with tm.assert_produces_warning(UserWarning, match=msg): # Similar warnings were observed in GH #13188 axes = _check_plot_works( df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind @@ -1622,11 +1622,11 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared + msg = ( + "To output multiple subplots, " + "the figure containing the passed axes is being cleared" + ) + with tm.assert_produces_warning(UserWarning, match=msg): # Similar warnings were observed in GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) From bf7083b8d4cf66555822e680db24b3402de297d4 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Sun, 8 Nov 2020 00:47:11 +0100 Subject: [PATCH 066/147] pd.Series.loc.__getitem__ promotes to float64 instead of raising KeyError (#37687) --- pandas/tests/indexing/test_loc.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3b0fae537a6e5..0faa784634fd2 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1512,6 +1512,16 @@ def test_series_loc_getitem_label_list_missing_values(): s.loc[key] +def test_series_getitem_label_list_missing_integer_values(): + # GH: 25927 + s = Series( + index=np.array([9730701000001104, 10049011000001109]), + data=np.array([999000011000001104, 999000011000001104]), + ) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[np.array([9730701000001104, 10047311000001102])] + + @pytest.mark.parametrize( "columns, column_key, expected_columns, check_column_type", [ From a558f63b28ef2aad2edd3484d763d78ceb6b6b8e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 16:02:15 -0800 Subject: [PATCH 067/147] REF/TST: misplaced Categorical tests (#37678) --- pandas/tests/arrays/categorical/test_algos.py | 115 ------------------ .../arrays/categorical/test_analytics.py | 7 -- .../tests/arrays/categorical/test_indexing.py | 61 +++++++++- .../tests/arrays/categorical/test_missing.py | 7 ++ .../arrays/categorical/test_operators.py | 47 ------- .../tests/arrays/categorical/test_replace.py | 26 ++++ pandas/tests/arrays/categorical/test_take.py | 92 ++++++++++++++ 7 files changed, 185 insertions(+), 170 deletions(-) create mode 100644 pandas/tests/arrays/categorical/test_take.py diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 45e0d503f30e7..5b0004a395334 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -59,30 +59,6 @@ def test_isin_cats(): tm.assert_numpy_array_equal(expected, result) -@pytest.mark.parametrize( - "to_replace, value, result, expected_error_msg", - [ - ("b", "c", ["a", "c"], "Categorical.categories are different"), - ("c", "d", ["a", "b"], None), - # https://github.com/pandas-dev/pandas/issues/33288 - ("a", "a", ["a", "b"], None), - ("b", None, ["a", None], "Categorical.categories length are different"), - ], -) -def test_replace(to_replace, value, result, expected_error_msg): - # GH 26988 - cat = pd.Categorical(["a", "b"]) - expected = pd.Categorical(result) - result = cat.replace(to_replace, value) - tm.assert_categorical_equal(result, expected) - if to_replace == "b": # the "c" test is supposed to be unchanged - with pytest.raises(AssertionError, match=expected_error_msg): - # ensure non-inplace call does not affect original - tm.assert_categorical_equal(cat, expected) - cat.replace(to_replace, value, inplace=True) - tm.assert_categorical_equal(cat, expected) - - @pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) def test_isin_empty(empty): s = pd.Categorical(["a", "b"]) @@ -105,94 +81,3 @@ def test_diff(): result = df.diff() tm.assert_frame_equal(result, expected) - - -class TestTake: - # https://github.com/pandas-dev/pandas/issues/20664 - - def test_take_default_allow_fill(self): - cat = pd.Categorical(["a", "b"]) - with tm.assert_produces_warning(None): - result = cat.take([0, -1]) - - assert result.equals(cat) - - def test_take_positive_no_warning(self): - cat = pd.Categorical(["a", "b"]) - with tm.assert_produces_warning(None): - cat.take([0, 0]) - - def test_take_bounds(self, allow_fill): - # https://github.com/pandas-dev/pandas/issues/20664 - cat = pd.Categorical(["a", "b", "a"]) - if allow_fill: - msg = "indices are out-of-bounds" - else: - msg = "index 4 is out of bounds for( axis 0 with)? size 3" - with pytest.raises(IndexError, match=msg): - cat.take([4, 5], allow_fill=allow_fill) - - def test_take_empty(self, allow_fill): - # https://github.com/pandas-dev/pandas/issues/20664 - cat = pd.Categorical([], categories=["a", "b"]) - if allow_fill: - msg = "indices are out-of-bounds" - else: - msg = "cannot do a non-empty take from an empty axes" - with pytest.raises(IndexError, match=msg): - cat.take([0], allow_fill=allow_fill) - - def test_positional_take(self, ordered): - cat = pd.Categorical( - ["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered - ) - result = cat.take([0, 1, 2], allow_fill=False) - expected = pd.Categorical( - ["a", "a", "b"], categories=cat.categories, ordered=ordered - ) - tm.assert_categorical_equal(result, expected) - - def test_positional_take_unobserved(self, ordered): - cat = pd.Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) - result = cat.take([1, 0], allow_fill=False) - expected = pd.Categorical( - ["b", "a"], categories=cat.categories, ordered=ordered - ) - tm.assert_categorical_equal(result, expected) - - def test_take_allow_fill(self): - # https://github.com/pandas-dev/pandas/issues/23296 - cat = pd.Categorical(["a", "a", "b"]) - result = cat.take([0, -1, -1], allow_fill=True) - expected = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b"]) - tm.assert_categorical_equal(result, expected) - - def test_take_fill_with_negative_one(self): - # -1 was a category - cat = pd.Categorical([-1, 0, 1]) - result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) - expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1]) - tm.assert_categorical_equal(result, expected) - - def test_take_fill_value(self): - # https://github.com/pandas-dev/pandas/issues/23296 - cat = pd.Categorical(["a", "b", "c"]) - result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) - expected = pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]) - tm.assert_categorical_equal(result, expected) - - def test_take_fill_value_new_raises(self): - # https://github.com/pandas-dev/pandas/issues/23296 - cat = pd.Categorical(["a", "b", "c"]) - xpr = r"'fill_value=d' is not present in this Categorical's categories" - with pytest.raises(ValueError, match=xpr): - cat.take([0, 1, -1], fill_value="d", allow_fill=True) - - def test_take_nd_deprecated(self): - cat = pd.Categorical(["a", "b", "c"]) - with tm.assert_produces_warning(FutureWarning): - cat.take_nd([0, 1]) - - ci = pd.Index(cat) - with tm.assert_produces_warning(FutureWarning): - ci.take_nd([0, 1]) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 4bf9b4b40d0b6..98dcdd1692117 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -359,10 +359,3 @@ def test_validate_inplace_raises(self, value): with pytest.raises(ValueError, match=msg): cat.sort_values(inplace=value) - - def test_isna(self): - exp = np.array([False, False, True]) - c = Categorical(["a", "b", np.nan]) - res = c.isna() - - tm.assert_numpy_array_equal(res, exp) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index bf0b5289b5df1..6068166cb8618 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -1,7 +1,17 @@ import numpy as np import pytest -from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, + IntervalIndex, + PeriodIndex, + Series, + Timedelta, + Timestamp, +) import pandas._testing as tm import pandas.core.common as com from pandas.tests.arrays.categorical.common import TestCategorical @@ -256,6 +266,55 @@ def test_where_ordered_differs_rasies(self): ser.where([True, False, True], other) +class TestContains: + def test_contains(self): + # GH#21508 + c = Categorical(list("aabbca"), categories=list("cab")) + + assert "b" in c + assert "z" not in c + assert np.nan not in c + with pytest.raises(TypeError, match="unhashable type: 'list'"): + assert [1] in c + + # assert codes NOT in index + assert 0 not in c + assert 1 not in c + + c = Categorical(list("aabbca") + [np.nan], categories=list("cab")) + assert np.nan in c + + @pytest.mark.parametrize( + "item, expected", + [ + (Interval(0, 1), True), + (1.5, True), + (Interval(0.5, 1.5), False), + ("a", False), + (Timestamp(1), False), + (Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH#23705 + cat = Categorical(IntervalIndex.from_breaks(range(3))) + result = item in cat + assert result is expected + + def test_contains_list(self): + # GH#21729 + cat = Categorical([1, 2, 3]) + + assert "a" not in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in cat + + @pytest.mark.parametrize("index", [True, False]) def test_mask_with_boolean(index): s = Series(range(3)) diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 364c290edc46c..cb0ba128c1fb7 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -11,6 +11,13 @@ class TestCategoricalMissing: + def test_isna(self): + exp = np.array([False, False, True]) + cat = Categorical(["a", "b", np.nan]) + res = cat.isna() + + tm.assert_numpy_array_equal(res, exp) + def test_na_flags_int_categories(self): # #1457 diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 51dc66c18a3e6..328b5771e617c 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -395,50 +395,3 @@ def test_numeric_like_ops(self): msg = "Object with dtype category cannot perform the numpy op log" with pytest.raises(TypeError, match=msg): np.log(s) - - def test_contains(self): - # GH21508 - c = Categorical(list("aabbca"), categories=list("cab")) - - assert "b" in c - assert "z" not in c - assert np.nan not in c - with pytest.raises(TypeError, match="unhashable type: 'list'"): - assert [1] in c - - # assert codes NOT in index - assert 0 not in c - assert 1 not in c - - c = Categorical(list("aabbca") + [np.nan], categories=list("cab")) - assert np.nan in c - - @pytest.mark.parametrize( - "item, expected", - [ - (pd.Interval(0, 1), True), - (1.5, True), - (pd.Interval(0.5, 1.5), False), - ("a", False), - (pd.Timestamp(1), False), - (pd.Timedelta(1), False), - ], - ids=str, - ) - def test_contains_interval(self, item, expected): - # GH 23705 - cat = Categorical(pd.IntervalIndex.from_breaks(range(3))) - result = item in cat - assert result is expected - - def test_contains_list(self): - # GH#21729 - cat = Categorical([1, 2, 3]) - - assert "a" not in cat - - with pytest.raises(TypeError, match="unhashable type"): - ["a"] in cat - - with pytest.raises(TypeError, match="unhashable type"): - ["a", "b"] in cat diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py index 8b784fde1d3c5..5889195ad68db 100644 --- a/pandas/tests/arrays/categorical/test_replace.py +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -2,6 +2,7 @@ import pytest import pandas as pd +from pandas import Categorical import pandas._testing as tm @@ -45,3 +46,28 @@ def test_replace(to_replace, value, expected, flip_categories): tm.assert_series_equal(expected, result, check_category_order=False) tm.assert_series_equal(expected, s, check_category_order=False) + + +@pytest.mark.parametrize( + "to_replace, value, result, expected_error_msg", + [ + ("b", "c", ["a", "c"], "Categorical.categories are different"), + ("c", "d", ["a", "b"], None), + # https://github.com/pandas-dev/pandas/issues/33288 + ("a", "a", ["a", "b"], None), + ("b", None, ["a", None], "Categorical.categories length are different"), + ], +) +def test_replace2(to_replace, value, result, expected_error_msg): + # TODO: better name + # GH#26988 + cat = Categorical(["a", "b"]) + expected = Categorical(result) + result = cat.replace(to_replace, value) + tm.assert_categorical_equal(result, expected) + if to_replace == "b": # the "c" test is supposed to be unchanged + with pytest.raises(AssertionError, match=expected_error_msg): + # ensure non-inplace call does not affect original + tm.assert_categorical_equal(cat, expected) + cat.replace(to_replace, value, inplace=True) + tm.assert_categorical_equal(cat, expected) diff --git a/pandas/tests/arrays/categorical/test_take.py b/pandas/tests/arrays/categorical/test_take.py new file mode 100644 index 0000000000000..7a27f5c3e73ad --- /dev/null +++ b/pandas/tests/arrays/categorical/test_take.py @@ -0,0 +1,92 @@ +import numpy as np +import pytest + +from pandas import Categorical, Index +import pandas._testing as tm + + +class TestTake: + # https://github.com/pandas-dev/pandas/issues/20664 + + def test_take_default_allow_fill(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + result = cat.take([0, -1]) + + assert result.equals(cat) + + def test_take_positive_no_warning(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + cat.take([0, 0]) + + def test_take_bounds(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical(["a", "b", "a"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "index 4 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + cat.take([4, 5], allow_fill=allow_fill) + + def test_take_empty(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical([], categories=["a", "b"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "cannot do a non-empty take from an empty axes" + with pytest.raises(IndexError, match=msg): + cat.take([0], allow_fill=allow_fill) + + def test_positional_take(self, ordered): + cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered) + result = cat.take([0, 1, 2], allow_fill=False) + expected = Categorical( + ["a", "a", "b"], categories=cat.categories, ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + + def test_positional_take_unobserved(self, ordered): + cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) + result = cat.take([1, 0], allow_fill=False) + expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_take_allow_fill(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "a", "b"]) + result = cat.take([0, -1, -1], allow_fill=True) + expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_with_negative_one(self): + # -1 was a category + cat = Categorical([-1, 0, 1]) + result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) + expected = Categorical([-1, -1, 0], categories=[-1, 0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) + expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value_new_raises(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + xpr = r"'fill_value=d' is not present in this Categorical's categories" + with pytest.raises(ValueError, match=xpr): + cat.take([0, 1, -1], fill_value="d", allow_fill=True) + + def test_take_nd_deprecated(self): + cat = Categorical(["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning): + cat.take_nd([0, 1]) + + ci = Index(cat) + with tm.assert_produces_warning(FutureWarning): + ci.take_nd([0, 1]) From 3e8b69fc9b349a38f4f8709d2d0787858c09be64 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 16:07:13 -0800 Subject: [PATCH 068/147] REF/TST: collect indexing tests by method (#37677) --- .../tests/frame/indexing/test_categorical.py | 2 +- pandas/tests/frame/indexing/test_getitem.py | 107 +++++++++++++++++- pandas/tests/frame/indexing/test_indexing.py | 15 --- pandas/tests/indexing/test_at.py | 71 +++++++++++- pandas/tests/indexing/test_categorical.py | 95 ---------------- pandas/tests/indexing/test_datetime.py | 43 ------- pandas/tests/indexing/test_iloc.py | 23 +++- pandas/tests/indexing/test_loc.py | 56 +++++++++ pandas/tests/indexing/test_scalar.py | 63 ----------- pandas/tests/series/indexing/test_datetime.py | 17 --- pandas/tests/series/indexing/test_numeric.py | 13 --- pandas/tests/series/methods/test_astype.py | 45 ++++++++ pandas/tests/series/methods/test_item.py | 49 ++++++++ .../tests/series/methods/test_reset_index.py | 20 +++- pandas/tests/series/methods/test_values.py | 5 + pandas/tests/series/test_api.py | 51 +-------- pandas/tests/series/test_dtypes.py | 46 -------- 17 files changed, 374 insertions(+), 347 deletions(-) create mode 100644 pandas/tests/series/methods/test_item.py diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index d3c907f4ce30f..6137cadc93125 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -352,7 +352,7 @@ def test_assigning_ops(self): df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp) - def test_setitem_single_row_categorical(self): + def test_loc_setitem_single_row_categorical(self): # GH 25495 df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index d9cdfa5ea45ec..079cc12389835 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -1,7 +1,16 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, period_range +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + DataFrame, + MultiIndex, + Timestamp, + get_dummies, + period_range, +) import pandas._testing as tm @@ -29,3 +38,99 @@ def test_getitem_periodindex(self): ts = df["1/1/2000"] tm.assert_series_equal(ts, df.iloc[:, 0]) + + def test_getitem_list_of_labels_categoricalindex_cols(self): + # GH#16115 + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + expected = DataFrame( + [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats + ) + dummies = get_dummies(cats) + result = dummies[list(dummies.columns)] + tm.assert_frame_equal(result, expected) + + +class TestGetitemCallable: + def test_getitem_callable(self, float_frame): + # GH#12533 + result = float_frame[lambda x: "A"] + expected = float_frame.loc[:, "A"] + tm.assert_series_equal(result, expected) + + result = float_frame[lambda x: ["A", "B"]] + expected = float_frame.loc[:, ["A", "B"]] + tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) + + df = float_frame[:3] + result = df[lambda x: [True, False, True]] + expected = float_frame.iloc[[0, 2], :] + tm.assert_frame_equal(result, expected) + + +class TestGetitemBooleanMask: + def test_getitem_bool_mask_categorical_index(self): + + df3 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex( + [1, 1, 2, 1, 3, 2], + dtype=CategoricalDtype([3, 2, 1], ordered=True), + name="B", + ), + ) + df4 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex( + [1, 1, 2, 1, 3, 2], + dtype=CategoricalDtype([3, 2, 1], ordered=False), + name="B", + ), + ) + + result = df3[df3.index == "a"] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == "a"] + expected = df4.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index == 1] + expected = df3.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == 1] + expected = df4.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # since we have an ordered categorical + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=True, + # name='B') + result = df3[df3.index < 2] + expected = df3.iloc[[4]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index > 1] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + # unordered + # cannot be compared + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=False, + # name='B') + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): + df4[df4.index < 2] + with pytest.raises(TypeError, match=msg): + df4[df4.index > 1] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4214ac14cba49..ff9646d45c0ac 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -110,21 +110,6 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): with pytest.raises(KeyError, match="not in index"): frame[idx] - def test_getitem_callable(self, float_frame): - # GH 12533 - result = float_frame[lambda x: "A"] - expected = float_frame.loc[:, "A"] - tm.assert_series_equal(result, expected) - - result = float_frame[lambda x: ["A", "B"]] - expected = float_frame.loc[:, ["A", "B"]] - tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) - - df = float_frame[:3] - result = df[lambda x: [True, False, True]] - expected = float_frame.iloc[[0, 2], :] - tm.assert_frame_equal(result, expected) - def test_setitem_list(self, float_frame): float_frame["E"] = "foo" diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 2e06d8c73d7d1..46299fadf7789 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame +from pandas import DataFrame, Series import pandas._testing as tm @@ -39,3 +39,72 @@ def test_at_with_duplicate_axes_requires_scalar_lookup(self): df.at[1, ["A"]] = 1 with pytest.raises(ValueError, match=msg): df.at[:, "A"] = 1 + + +class TestAtErrors: + # TODO: De-duplicate/parametrize + # test_at_series_raises_key_error, test_at_frame_raises_key_error, + # test_at_series_raises_key_error2, test_at_frame_raises_key_error2 + + def test_at_series_raises_key_error(self): + # GH#31724 .at should match .loc + + ser = Series([1, 2, 3], index=[3, 2, 1]) + result = ser.at[1] + assert result == 3 + result = ser.loc[1] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + ser.at["a"] + with pytest.raises(KeyError, match="a"): + # .at should match .loc + ser.loc["a"] + + def test_at_frame_raises_key_error(self): + # GH#31724 .at should match .loc + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + + result = df.at[1, 0] + assert result == 3 + result = df.loc[1, 0] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + df.at["a", 0] + with pytest.raises(KeyError, match="a"): + df.loc["a", 0] + + with pytest.raises(KeyError, match="a"): + df.at[1, "a"] + with pytest.raises(KeyError, match="a"): + df.loc[1, "a"] + + def test_at_series_raises_key_error2(self): + # at should not fallback + # GH#7814 + # GH#31724 .at should match .loc + ser = Series([1, 2, 3], index=list("abc")) + result = ser.at["a"] + assert result == 1 + result = ser.loc["a"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + ser.at[0] + with pytest.raises(KeyError, match="^0$"): + ser.loc[0] + + def test_at_frame_raises_key_error2(self): + # GH#31724 .at should match .loc + df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) + result = df.at["a", "A"] + assert result == 1 + result = df.loc["a", "A"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + df.at["a", 0] + with pytest.raises(KeyError, match="^0$"): + df.loc["a", 0] diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 6cdd73d37aec8..9885765bf53e4 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -2,7 +2,6 @@ import pytest from pandas.core.dtypes.common import is_categorical_dtype -from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd from pandas import ( @@ -276,27 +275,6 @@ def test_slicing_doc_examples(self): ) tm.assert_frame_equal(result, expected) - def test_getitem_category_type(self): - # GH 14580 - # test iloc() on Series with Categorical data - - s = Series([1, 2, 3]).astype("category") - - # get slice - result = s.iloc[0:2] - expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) - tm.assert_series_equal(result, expected) - - # get list of indexes - result = s.iloc[[0, 1]] - expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) - tm.assert_series_equal(result, expected) - - # get boolean array - result = s.iloc[[True, False, False]] - expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) - tm.assert_series_equal(result, expected) - def test_loc_listlike(self): # list of labels @@ -413,17 +391,6 @@ def test_loc_listlike_dtypes(self): with pytest.raises(KeyError, match=msg): df.loc[["a", "x"]] - def test_getitem_with_listlike(self): - # GH 16115 - cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) - - expected = DataFrame( - [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats - ) - dummies = pd.get_dummies(cats) - result = dummies[list(dummies.columns)] - tm.assert_frame_equal(result, expected) - def test_ix_categorical_index(self): # GH 12531 df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ")) @@ -512,68 +479,6 @@ def test_loc_and_at_with_categorical_index(self): assert df.loc["B", 1] == 4 assert df.at["B", 1] == 4 - def test_getitem_bool_mask_categorical_index(self): - - df3 = DataFrame( - { - "A": np.arange(6, dtype="int64"), - }, - index=CategoricalIndex( - [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=True), name="B" - ), - ) - df4 = DataFrame( - { - "A": np.arange(6, dtype="int64"), - }, - index=CategoricalIndex( - [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=False), name="B" - ), - ) - - result = df3[df3.index == "a"] - expected = df3.iloc[[]] - tm.assert_frame_equal(result, expected) - - result = df4[df4.index == "a"] - expected = df4.iloc[[]] - tm.assert_frame_equal(result, expected) - - result = df3[df3.index == 1] - expected = df3.iloc[[0, 1, 3]] - tm.assert_frame_equal(result, expected) - - result = df4[df4.index == 1] - expected = df4.iloc[[0, 1, 3]] - tm.assert_frame_equal(result, expected) - - # since we have an ordered categorical - - # CategoricalIndex([1, 1, 2, 1, 3, 2], - # categories=[3, 2, 1], - # ordered=True, - # name='B') - result = df3[df3.index < 2] - expected = df3.iloc[[4]] - tm.assert_frame_equal(result, expected) - - result = df3[df3.index > 1] - expected = df3.iloc[[]] - tm.assert_frame_equal(result, expected) - - # unordered - # cannot be compared - - # CategoricalIndex([1, 1, 2, 1, 3, 2], - # categories=[3, 2, 1], - # ordered=False, - # name='B') - msg = "Unordered Categoricals can only compare equality or not" - with pytest.raises(TypeError, match=msg): - df4[df4.index < 2] - with pytest.raises(TypeError, match=msg): - df4[df4.index > 1] - def test_indexing_with_category(self): # https://github.com/pandas-dev/pandas/issues/12564 diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index fad3478499929..e7bf186ae6456 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -7,33 +7,6 @@ class TestDatetimeIndex: - def test_setitem_with_datetime_tz(self): - # 16889 - # support .loc with alignment and tz-aware DatetimeIndex - mask = np.array([True, False, True, False]) - - idx = date_range("20010101", periods=4, tz="UTC") - df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") - - result = df.copy() - result.loc[mask, :] = df.loc[mask, :] - tm.assert_frame_equal(result, df) - - result = df.copy() - result.loc[mask] = df.loc[mask] - tm.assert_frame_equal(result, df) - - idx = date_range("20010101", periods=4) - df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") - - result = df.copy() - result.loc[mask, :] = df.loc[mask, :] - tm.assert_frame_equal(result, df) - - result = df.copy() - result.loc[mask] = df.loc[mask] - tm.assert_frame_equal(result, df) - def test_indexing_with_datetime_tz(self): # GH#8260 @@ -187,22 +160,6 @@ def test_indexing_with_datetimeindex_tz(self): expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) - def test_partial_setting_with_datetimelike_dtype(self): - - # GH9478 - # a datetimeindex alignment issue with partial setting - df = DataFrame( - np.arange(6.0).reshape(3, 2), - columns=list("AB"), - index=date_range("1/1/2000", periods=3, freq="1H"), - ) - expected = df.copy() - expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] - - mask = df.A < 1 - df.loc[mask, "C"] = df.loc[mask].index - tm.assert_frame_equal(df, expected) - def test_series_partial_set_datetime(self): # GH 11497 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6c80354610a78..f8dfda3dab486 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -7,7 +7,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, concat, date_range, isna +from pandas import CategoricalDtype, DataFrame, Series, concat, date_range, isna import pandas._testing as tm from pandas.api.types import is_scalar from pandas.core.indexing import IndexingError @@ -748,6 +748,27 @@ def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): tm.assert_series_equal(result, expected) + def test_iloc_getitem_categorical_values(self): + # GH#14580 + # test iloc() on Series with Categorical data + + ser = Series([1, 2, 3]).astype("category") + + # get slice + result = ser.iloc[0:2] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get list of indexes + result = ser.iloc[[0, 1]] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get boolean array + result = ser.iloc[[True, False, False]] + expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0faa784634fd2..6939b280a988b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1501,6 +1501,62 @@ def test_loc_getitem_slice_floats_inexact(self): s1 = df.loc[52195.1:52198.9] assert len(s1) == 3 + def test_loc_getitem_float_slice_float64index(self): + ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) + + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + idx = np.arange(10, 20, dtype=float) + idx[2] = 12.2 + ser.index = idx + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + +class TestLocBooleanMask: + def test_loc_setitem_mask_with_datetimeindex_tz(self): + # GH#16889 + # support .loc with alignment and tz-aware DatetimeIndex + mask = np.array([True, False, True, False]) + + idx = date_range("20010101", periods=4, tz="UTC") + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + idx = date_range("20010101", periods=4) + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + def test_loc_setitem_mask_and_label_with_datetimeindex(self): + # GH#9478 + # a datetimeindex alignment issue with partial setting + df = DataFrame( + np.arange(6.0).reshape(3, 2), + columns=list("AB"), + index=date_range("1/1/2000", periods=3, freq="1H"), + ) + expected = df.copy() + expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] + + mask = df.A < 1 + df.loc[mask, "C"] = df.loc[mask].index + tm.assert_frame_equal(df, expected) + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 72296bb222a5a..127d00c217a15 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -146,69 +146,6 @@ def test_frame_at_with_duplicate_axes(self): expected = Series([2.0, 2.0], index=["A", "A"], name=1) tm.assert_series_equal(df.iloc[1], expected) - def test_series_at_raises_type_error(self): - # at should not fallback - # GH 7814 - # GH#31724 .at should match .loc - ser = Series([1, 2, 3], index=list("abc")) - result = ser.at["a"] - assert result == 1 - result = ser.loc["a"] - assert result == 1 - - with pytest.raises(KeyError, match="^0$"): - ser.at[0] - with pytest.raises(KeyError, match="^0$"): - ser.loc[0] - - def test_frame_raises_key_error(self): - # GH#31724 .at should match .loc - df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) - result = df.at["a", "A"] - assert result == 1 - result = df.loc["a", "A"] - assert result == 1 - - with pytest.raises(KeyError, match="^0$"): - df.at["a", 0] - with pytest.raises(KeyError, match="^0$"): - df.loc["a", 0] - - def test_series_at_raises_key_error(self): - # GH#31724 .at should match .loc - - ser = Series([1, 2, 3], index=[3, 2, 1]) - result = ser.at[1] - assert result == 3 - result = ser.loc[1] - assert result == 3 - - with pytest.raises(KeyError, match="a"): - ser.at["a"] - with pytest.raises(KeyError, match="a"): - # .at should match .loc - ser.loc["a"] - - def test_frame_at_raises_key_error(self): - # GH#31724 .at should match .loc - - df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) - - result = df.at[1, 0] - assert result == 3 - result = df.loc[1, 0] - assert result == 3 - - with pytest.raises(KeyError, match="a"): - df.at["a", 0] - with pytest.raises(KeyError, match="a"): - df.loc["a", 0] - - with pytest.raises(KeyError, match="a"): - df.at[1, "a"] - with pytest.raises(KeyError, match="a"): - df.loc[1, "a"] - # TODO: belongs somewhere else? def test_getitem_list_missing_key(self): # GH 13822, incorrect error string with non-unique columns when missing diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index b2fc2e2d0619d..44fb8dc519322 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -57,23 +57,6 @@ def test_fancy_setitem(): assert (s[48:54] == -3).all() -def test_dti_reset_index_round_trip(): - dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) - d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti) - d2 = d1.reset_index() - assert d2.dtypes[0] == np.dtype("M8[ns]") - d3 = d2.set_index("index") - tm.assert_frame_equal(d1, d3, check_names=False) - - # #2329 - stamp = datetime(2012, 11, 22) - df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"]) - df = df.set_index("Date") - - assert df.index[0] == stamp - assert df.reset_index()["Date"][0] == stamp - - def test_slicing_datetimes(): # GH 7523 diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index 86af29eac1bae..2ad21d8221e25 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -86,16 +86,3 @@ def test_slice_float_get_set(datetime_series): datetime_series[4.5:10.0] with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): datetime_series[4.5:10.0] = 0 - - -def test_slice_floats2(): - s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) - - assert len(s.loc[12.0:]) == 8 - assert len(s.loc[12.5:]) == 7 - - i = np.arange(10, 20, dtype=float) - i[2] = 12.2 - s.index = i - assert len(s.loc[12.0:]) == 8 - assert len(s.loc[12.5:]) == 7 diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 8044b590b3463..3cd9d52f8e754 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -18,6 +18,7 @@ Series, Timedelta, Timestamp, + cut, date_range, ) import pandas._testing as tm @@ -76,6 +77,35 @@ def test_astype_dict_like(self, dtype_class): class TestAstype: + @pytest.mark.parametrize("dtype", np.typecodes["All"]) + def test_astype_empty_constructor_equality(self, dtype): + # see GH#15524 + + if dtype not in ( + "S", + "V", # poor support (if any) currently + "M", + "m", # Generic timestamps raise a ValueError. Already tested. + ): + init_empty = Series([], dtype=dtype) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) + + @pytest.mark.parametrize("dtype", [str, np.str_]) + @pytest.mark.parametrize( + "series", + [ + Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), + Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), + ], + ) + def test_astype_str_map(self, dtype, series): + # see GH#4405 + result = series.astype(dtype) + expected = series.map(str) + tm.assert_series_equal(result, expected) + def test_astype_float_to_period(self): result = Series([np.nan]).astype("period[D]") expected = Series([NaT], dtype="period[D]") @@ -309,6 +339,21 @@ def test_astype_unicode(self): class TestAstypeCategorical: + def test_astype_categorical_invalid_conversions(self): + # invalid conversion (these are NOT a dtype) + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.randint(0, 10000, 100)).sort_values() + ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) + + msg = ( + "dtype '' " + "not understood" + ) + with pytest.raises(TypeError, match=msg): + ser.astype(Categorical) + with pytest.raises(TypeError, match=msg): + ser.astype("object").astype(Categorical) + def test_astype_categoricaldtype(self): s = Series(["a", "b", "a"]) result = s.astype(CategoricalDtype(["a", "b"], ordered=True)) diff --git a/pandas/tests/series/methods/test_item.py b/pandas/tests/series/methods/test_item.py new file mode 100644 index 0000000000000..a7ddc0c22dcf4 --- /dev/null +++ b/pandas/tests/series/methods/test_item.py @@ -0,0 +1,49 @@ +import pytest + +from pandas import Series, Timedelta, Timestamp, date_range + + +class TestItem: + def test_item(self): + ser = Series([1]) + result = ser.item() + assert result == 1 + assert result == ser.iloc[0] + assert isinstance(result, int) # i.e. not np.int64 + + ser = Series([0.5], index=[3]) + result = ser.item() + assert isinstance(result, float) + assert result == 0.5 + + ser = Series([1, 2]) + msg = "can only convert an array of size 1" + with pytest.raises(ValueError, match=msg): + ser.item() + + dti = date_range("2016-01-01", periods=2) + with pytest.raises(ValueError, match=msg): + dti.item() + with pytest.raises(ValueError, match=msg): + Series(dti).item() + + val = dti[:1].item() + assert isinstance(val, Timestamp) + val = Series(dti)[:1].item() + assert isinstance(val, Timestamp) + + tdi = dti - dti + with pytest.raises(ValueError, match=msg): + tdi.item() + with pytest.raises(ValueError, match=msg): + Series(tdi).item() + + val = tdi[:1].item() + assert isinstance(val, Timedelta) + val = Series(tdi)[:1].item() + assert isinstance(val, Timedelta) + + # Case where ser[0] would not work + ser = Series(dti, index=[5, 6]) + val = ser[:1].item() + assert val == dti[0] diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 13d6a3b1447a1..40e567a8c33ca 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -1,12 +1,30 @@ +from datetime import datetime + import numpy as np import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series +from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series, date_range import pandas._testing as tm class TestResetIndex: + def test_reset_index_dti_round_trip(self): + dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) + d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + assert d2.dtypes[0] == np.dtype("M8[ns]") + d3 = d2.set_index("index") + tm.assert_frame_equal(d1, d3, check_names=False) + + # GH#2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"]) + df = df.set_index("Date") + + assert df.index[0] == stamp + assert df.reset_index()["Date"][0] == stamp + def test_reset_index(self): df = tm.makeDataFrame()[:5] ser = df.stack() diff --git a/pandas/tests/series/methods/test_values.py b/pandas/tests/series/methods/test_values.py index e28a714ea656d..2982dcd52991d 100644 --- a/pandas/tests/series/methods/test_values.py +++ b/pandas/tests/series/methods/test_values.py @@ -18,3 +18,8 @@ def test_values_object_extension_dtypes(self, data): result = Series(data).values expected = np.array(data.astype(object)) tm.assert_numpy_array_equal(result, expected) + + def test_values(self, datetime_series): + tm.assert_almost_equal( + datetime_series.values, datetime_series, check_dtype=False + ) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index beace074894a8..ea0e1203e22ed 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, Timedelta, Timestamp, date_range +from pandas import DataFrame, Index, Series, date_range import pandas._testing as tm @@ -112,11 +112,6 @@ def test_not_hashable(self): def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) - def test_values(self, datetime_series): - tm.assert_almost_equal( - datetime_series.values, datetime_series, check_dtype=False - ) - def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" @@ -135,50 +130,6 @@ def test_class_axis(self): # no exception and no empty docstring assert pydoc.getdoc(Series.index) - def test_item(self): - s = Series([1]) - result = s.item() - assert result == 1 - assert result == s.iloc[0] - assert isinstance(result, int) # i.e. not np.int64 - - ser = Series([0.5], index=[3]) - result = ser.item() - assert isinstance(result, float) - assert result == 0.5 - - ser = Series([1, 2]) - msg = "can only convert an array of size 1" - with pytest.raises(ValueError, match=msg): - ser.item() - - dti = pd.date_range("2016-01-01", periods=2) - with pytest.raises(ValueError, match=msg): - dti.item() - with pytest.raises(ValueError, match=msg): - Series(dti).item() - - val = dti[:1].item() - assert isinstance(val, Timestamp) - val = Series(dti)[:1].item() - assert isinstance(val, Timestamp) - - tdi = dti - dti - with pytest.raises(ValueError, match=msg): - tdi.item() - with pytest.raises(ValueError, match=msg): - Series(tdi).item() - - val = tdi[:1].item() - assert isinstance(val, Timedelta) - val = Series(tdi)[:1].item() - assert isinstance(val, Timedelta) - - # Case where ser[0] would not work - ser = Series(dti, index=[5, 6]) - val = ser[:1].item() - assert val == dti[0] - def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 2fbed92567f71..f5c3623fb9986 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -1,5 +1,3 @@ -import string - import numpy as np import pytest @@ -16,20 +14,6 @@ def test_dtype(self, datetime_series): assert datetime_series.dtype == np.dtype("float64") assert datetime_series.dtypes == np.dtype("float64") - @pytest.mark.parametrize("dtype", [str, np.str_]) - @pytest.mark.parametrize( - "series", - [ - Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), - Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), - ], - ) - def test_astype_str_map(self, dtype, series): - # see gh-4405 - result = series.astype(dtype) - expected = series.map(str) - tm.assert_series_equal(result, expected) - def test_astype_from_categorical(self): items = ["a", "b", "c", "a"] s = Series(items) @@ -120,36 +104,6 @@ def cmp(a, b): s.astype("object").astype(CategoricalDtype()), roundtrip_expected ) - def test_invalid_conversions(self): - # invalid conversion (these are NOT a dtype) - cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series(np.random.randint(0, 10000, 100)).sort_values() - ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) - - msg = ( - "dtype '' " - "not understood" - ) - with pytest.raises(TypeError, match=msg): - ser.astype(Categorical) - with pytest.raises(TypeError, match=msg): - ser.astype("object").astype(Categorical) - - @pytest.mark.parametrize("dtype", np.typecodes["All"]) - def test_astype_empty_constructor_equality(self, dtype): - # see gh-15524 - - if dtype not in ( - "S", - "V", # poor support (if any) currently - "M", - "m", # Generic timestamps raise a ValueError. Already tested. - ): - init_empty = Series([], dtype=dtype) - with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): - as_type_empty = Series([]).astype(dtype) - tm.assert_series_equal(init_empty, as_type_empty) - def test_series_to_categorical(self): # see gh-16524: test conversion of Series to Categorical series = Series(["a", "b", "c"]) From d398f9ed81eba934612645b7d3a89a01c38b8d79 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 16:20:33 -0800 Subject: [PATCH 069/147] CLN: only call _wrap_results one place in nanmedian (#37673) --- pandas/core/nanops.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8e917bb770247..5da8bd300433e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -681,26 +681,26 @@ def get_median(x): # there's a non-empty array to apply over otherwise numpy raises if notempty: if not skipna: - return _wrap_results( - np.apply_along_axis(get_median, axis, values), dtype - ) + res = np.apply_along_axis(get_median, axis, values) + + else: + # fastpath for the skipna case + with warnings.catch_warnings(): + # Suppress RuntimeWarning about All-NaN slice + warnings.filterwarnings("ignore", "All-NaN slice encountered") + res = np.nanmedian(values, axis) - # fastpath for the skipna case - with warnings.catch_warnings(): - # Suppress RuntimeWarning about All-NaN slice - warnings.filterwarnings("ignore", "All-NaN slice encountered") - res = np.nanmedian(values, axis) - return _wrap_results(res, dtype) - - # must return the correct shape, but median is not defined for the - # empty set so return nans of shape "everything but the passed axis" - # since "axis" is where the reduction would occur if we had a nonempty - # array - ret = get_empty_reduction_result(values.shape, axis, np.float_, np.nan) - return _wrap_results(ret, dtype) - - # otherwise return a scalar value - return _wrap_results(get_median(values) if notempty else np.nan, dtype) + else: + # must return the correct shape, but median is not defined for the + # empty set so return nans of shape "everything but the passed axis" + # since "axis" is where the reduction would occur if we had a nonempty + # array + res = get_empty_reduction_result(values.shape, axis, np.float_, np.nan) + + else: + # otherwise return a scalar value + res = get_median(values) if notempty else np.nan + return _wrap_results(res, dtype) def get_empty_reduction_result( From 5f5bcd6204841dc2f08cfd1cf383a93d0e0db09f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 16:31:20 -0800 Subject: [PATCH 070/147] TYP: Index._concat (#37671) --- pandas/core/indexes/base.py | 6 +++--- pandas/core/indexes/category.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f350e18198057..545d1d834fe2d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4297,13 +4297,13 @@ def append(self, other): return self._concat(to_concat, name) - def _concat(self, to_concat, name): + def _concat(self, to_concat: List["Index"], name: Label) -> "Index": """ Concatenate multiple Index objects. """ - to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] + to_concat_vals = [x._values for x in to_concat] - result = concat_compat(to_concat) + result = concat_compat(to_concat_vals) return Index(result, name=name) def putmask(self, mask, value): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 525c41bae8b51..8c9ee1f1d8efa 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -653,7 +653,7 @@ def map(self, mapper): mapped = self._values.map(mapper) return Index(mapped, name=self.name) - def _concat(self, to_concat, name): + def _concat(self, to_concat: List["Index"], name: Label) -> "CategoricalIndex": # if calling index is category, don't check dtype of others codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) cat = self._data._from_backing_data(codes) From dce844fb1d89ac0abc9ec3bc70e150af3a7468a2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 16:34:52 -0800 Subject: [PATCH 071/147] BUG: CategoricalIndex.equals casting non-categories to np.nan (#37667) --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/indexes/category.py | 16 ++++++++++------ .../tests/indexes/categorical/test_category.py | 8 ++++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 690e6b8f725ad..73493bbeb0eac 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -380,7 +380,7 @@ Categorical ^^^^^^^^^^^ - :meth:`Categorical.fillna` will always return a copy, will validate a passed fill value regardless of whether there are any NAs to fill, and will disallow a ``NaT`` as a fill value for numeric categories (:issue:`36530`) - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) -- +- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 8c9ee1f1d8efa..1be979b1b899c 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -20,7 +20,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna from pandas.core import accessor from pandas.core.arrays.categorical import Categorical, contains @@ -263,6 +263,7 @@ def _is_dtype_compat(self, other) -> Categorical: values = other if not is_list_like(values): values = [values] + cat = Categorical(other, dtype=self.dtype) other = CategoricalIndex(cat) if not other.isin(values).all(): @@ -271,6 +272,12 @@ def _is_dtype_compat(self, other) -> Categorical: ) other = other._values + if not ((other == values) | (isna(other) & isna(values))).all(): + # GH#37667 see test_equals_non_category + raise TypeError( + "categories must match existing categories when appending" + ) + return other def equals(self, other: object) -> bool: @@ -291,13 +298,10 @@ def equals(self, other: object) -> bool: try: other = self._is_dtype_compat(other) - if isinstance(other, type(self)): - other = other._data - return self._data.equals(other) except (TypeError, ValueError): - pass + return False - return False + return self._data.equals(other) # -------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index cf2430d041d88..324a2535bc465 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -444,6 +444,14 @@ def test_equals_categorical_unordered(self): assert not a.equals(c) assert not b.equals(c) + def test_equals_non_category(self): + # GH#37667 Case where other contains a value not among ci's + # categories ("D") and also contains np.nan + ci = CategoricalIndex(["A", "B", np.nan, np.nan]) + other = Index(["A", "B", "D", np.nan]) + + assert not ci.equals(other) + def test_frame_repr(self): df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"])) result = repr(df) From 7dbde8f9b5330287a4d755ef7e7c50f65920e85b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 17:47:26 -0800 Subject: [PATCH 072/147] CLN: _replace_single (#37683) * REF: simplify _replace_single by noting regex kwarg is bool * Annotate * CLN: remove never-False convert kwarg --- pandas/core/internals/blocks.py | 82 +++++++++++++------------------ pandas/core/internals/managers.py | 6 ++- 2 files changed, 38 insertions(+), 50 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1f34e91d71077..8e01aaa396265 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -726,7 +726,6 @@ def replace( value, inplace: bool = False, regex: bool = False, - convert: bool = True, ) -> List["Block"]: """ replace the to_replace value with value, possible to create new @@ -755,9 +754,7 @@ def replace( if len(to_replace) == 1: # _can_hold_element checks have reduced this back to the # scalar case and we can avoid a costly object cast - return self.replace( - to_replace[0], value, inplace=inplace, regex=regex, convert=convert - ) + return self.replace(to_replace[0], value, inplace=inplace, regex=regex) # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. @@ -771,7 +768,6 @@ def replace( value=value, inplace=inplace, regex=regex, - convert=convert, ) values = self.values @@ -810,16 +806,21 @@ def replace( value=value, inplace=inplace, regex=regex, - convert=convert, - ) - if convert: - blocks = extend_blocks( - [b.convert(numeric=False, copy=not inplace) for b in blocks] ) + + blocks = extend_blocks( + [b.convert(numeric=False, copy=not inplace) for b in blocks] + ) return blocks def _replace_single( - self, to_replace, value, inplace=False, regex=False, convert=True, mask=None + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, + convert: bool = True, + mask=None, ) -> List["Block"]: """ no-op on a non-ObjectBlock """ return [self] if inplace else [self.copy()] @@ -860,9 +861,9 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray: m = masks[i] convert = i == src_len # only convert once at the end result = blk._replace_coerce( - mask=m, to_replace=src, value=dest, + mask=m, inplace=inplace, regex=regex, ) @@ -1567,9 +1568,9 @@ def _replace_coerce( self, to_replace, value, + mask: np.ndarray, inplace: bool = True, regex: bool = False, - mask=None, ) -> List["Block"]: """ Replace value corresponding to the given boolean array with another @@ -1581,12 +1582,12 @@ def _replace_coerce( Scalar to replace or regular expression to match. value : object Replacement object. + mask : np.ndarray[bool] + True indicate corresponding element is ignored. inplace : bool, default True Perform inplace modification. regex : bool, default False If true, perform regular expression substitution. - mask : array-like of bool, optional - True indicate corresponding element is ignored. Returns ------- @@ -2495,7 +2496,11 @@ def _can_hold_element(self, element: Any) -> bool: return True def replace( - self, to_replace, value, inplace=False, regex=False, convert=True + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, ) -> List["Block"]: to_rep_is_list = is_list_like(to_replace) value_is_list = is_list_like(value) @@ -2506,20 +2511,14 @@ def replace( blocks: List["Block"] = [self] if not either_list and is_re(to_replace): - return self._replace_single( - to_replace, value, inplace=inplace, regex=True, convert=convert - ) + return self._replace_single(to_replace, value, inplace=inplace, regex=True) elif not (either_list or regex): - return super().replace( - to_replace, value, inplace=inplace, regex=regex, convert=convert - ) + return super().replace(to_replace, value, inplace=inplace, regex=regex) elif both_lists: for to_rep, v in zip(to_replace, value): result_blocks = [] for b in blocks: - result = b._replace_single( - to_rep, v, inplace=inplace, regex=regex, convert=convert - ) + result = b._replace_single(to_rep, v, inplace=inplace, regex=regex) result_blocks.extend(result) blocks = result_blocks return result_blocks @@ -2529,18 +2528,22 @@ def replace( result_blocks = [] for b in blocks: result = b._replace_single( - to_rep, value, inplace=inplace, regex=regex, convert=convert + to_rep, value, inplace=inplace, regex=regex ) result_blocks.extend(result) blocks = result_blocks return result_blocks - return self._replace_single( - to_replace, value, inplace=inplace, convert=convert, regex=regex - ) + return self._replace_single(to_replace, value, inplace=inplace, regex=regex) def _replace_single( - self, to_replace, value, inplace=False, regex=False, convert=True, mask=None + self, + to_replace, + value, + inplace: bool = False, + regex: bool = False, + convert: bool = True, + mask=None, ) -> List["Block"]: """ Replace elements by the given value. @@ -2567,23 +2570,7 @@ def _replace_single( inplace = validate_bool_kwarg(inplace, "inplace") # to_replace is regex compilable - to_rep_re = regex and is_re_compilable(to_replace) - - # regex is regex compilable - regex_re = is_re_compilable(regex) - - # only one will survive - if to_rep_re and regex_re: - raise AssertionError( - "only one of to_replace and regex can be regex compilable" - ) - - # if regex was passed as something that can be a regex (rather than a - # boolean) - if regex_re: - to_replace = regex - - regex = regex_re or to_rep_re + regex = regex and is_re_compilable(to_replace) # try to get the pattern attribute (compiled re) or it's a string if is_re(to_replace): @@ -2646,7 +2633,6 @@ def replace( value, inplace: bool = False, regex: bool = False, - convert: bool = True, ) -> List["Block"]: inplace = validate_bool_kwarg(inplace, "inplace") result = self if inplace else self.copy() diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a06d57e268fe2..fda4da8694ea3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -638,9 +638,11 @@ def convert( coerce=coerce, ) - def replace(self, value, **kwargs) -> "BlockManager": + def replace(self, to_replace, value, inplace: bool, regex: bool) -> "BlockManager": assert np.ndim(value) == 0, value - return self.apply("replace", value=value, **kwargs) + return self.apply( + "replace", to_replace=to_replace, value=value, inplace=inplace, regex=regex + ) def replace_list( self: T, From b7fd0af9f708bfc24ee5fda13e99b987112e436d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 18:04:58 -0800 Subject: [PATCH 073/147] TYP: make more internal funcs keyword-only (#37688) --- pandas/core/array_algos/masked_reductions.py | 17 ++++++++++++----- pandas/core/arrays/_mixins.py | 3 ++- pandas/core/arrays/base.py | 12 ++++++++---- pandas/core/arrays/boolean.py | 12 +++++++----- pandas/core/arrays/categorical.py | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/floating.py | 8 +++++--- pandas/core/arrays/integer.py | 8 +++++--- pandas/core/arrays/interval.py | 4 ++-- pandas/core/arrays/masked.py | 3 ++- pandas/core/arrays/numpy_.py | 4 +++- pandas/core/arrays/period.py | 5 +++-- pandas/core/arrays/sparse/array.py | 6 +++--- pandas/core/arrays/string_.py | 6 +++--- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/base.py | 1 + pandas/core/frame.py | 1 + pandas/core/series.py | 10 +++++++++- pandas/tests/extension/arrow/arrays.py | 2 +- pandas/tests/extension/decimal/array.py | 2 +- 20 files changed, 71 insertions(+), 39 deletions(-) diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py index 3f4625e2b712a..bce6f1aafb2c5 100644 --- a/pandas/core/array_algos/masked_reductions.py +++ b/pandas/core/array_algos/masked_reductions.py @@ -17,6 +17,7 @@ def _sumprod( func: Callable, values: np.ndarray, mask: np.ndarray, + *, skipna: bool = True, min_count: int = 0, ): @@ -52,19 +53,25 @@ def _sumprod( return func(values, where=~mask) -def sum(values: np.ndarray, mask: np.ndarray, skipna: bool = True, min_count: int = 0): +def sum( + values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0 +): return _sumprod( np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count ) -def prod(values: np.ndarray, mask: np.ndarray, skipna: bool = True, min_count: int = 0): +def prod( + values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0 +): return _sumprod( np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count ) -def _minmax(func: Callable, values: np.ndarray, mask: np.ndarray, skipna: bool = True): +def _minmax( + func: Callable, values: np.ndarray, mask: np.ndarray, *, skipna: bool = True +): """ Reduction for 1D masked array. @@ -94,9 +101,9 @@ def _minmax(func: Callable, values: np.ndarray, mask: np.ndarray, skipna: bool = return libmissing.NA -def min(values: np.ndarray, mask: np.ndarray, skipna: bool = True): +def min(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): return _minmax(np.min, values=values, mask=mask, skipna=skipna) -def max(values: np.ndarray, mask: np.ndarray, skipna: bool = True): +def max(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): return _minmax(np.max, values=values, mask=mask, skipna=skipna) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 63c414d96c8de..a8c0e77270dfc 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -54,6 +54,7 @@ def _validate_scalar(self, value): def take( self: _T, indices: Sequence[int], + *, allow_fill: bool = False, fill_value: Any = None, axis: int = 0, @@ -246,7 +247,7 @@ def fillna(self: _T, value=None, method=None, limit=None) -> _T: # ------------------------------------------------------------------------ # Reductions - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): meth = getattr(self, name, None) if meth: return meth(skipna=skipna, **kwargs) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index be105fd1f2a46..afbddc53804ac 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -173,7 +173,7 @@ class ExtensionArray: # ------------------------------------------------------------------------ @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): """ Construct a new ExtensionArray from a sequence of scalars. @@ -195,7 +195,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): raise AbstractMethodError(cls) @classmethod - def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + def _from_sequence_of_strings(cls, strings, *, dtype=None, copy=False): """ Construct a new ExtensionArray from a sequence of strings. @@ -922,7 +922,11 @@ def repeat(self, repeats, axis=None): # ------------------------------------------------------------------------ def take( - self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None + self, + indices: Sequence[int], + *, + allow_fill: bool = False, + fill_value: Any = None, ) -> "ExtensionArray": """ Take elements from an array. @@ -1153,7 +1157,7 @@ def _concat_same_type( # of objects _can_hold_na = True - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): """ Return a scalar result of performing the reduction operation. diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 21306455573b8..c6c7396a980b0 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -273,7 +273,9 @@ def dtype(self) -> BooleanDtype: return self._dtype @classmethod - def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "BooleanArray": + def _from_sequence( + cls, scalars, *, dtype=None, copy: bool = False + ) -> "BooleanArray": if dtype: assert dtype == "boolean" values, mask = coerce_to_array(scalars, copy=copy) @@ -281,7 +283,7 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "BooleanArra @classmethod def _from_sequence_of_strings( - cls, strings: List[str], dtype=None, copy: bool = False + cls, strings: List[str], *, dtype=None, copy: bool = False ) -> "BooleanArray": def map_string(s): if isna(s): @@ -294,7 +296,7 @@ def map_string(s): raise ValueError(f"{s} cannot be cast to bool") scalars = [map_string(x) for x in strings] - return cls._from_sequence(scalars, dtype, copy) + return cls._from_sequence(scalars, dtype=dtype, copy=copy) _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) @@ -682,12 +684,12 @@ def _arith_method(self, other, op): return self._maybe_mask_result(result, mask, other, op_name) - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if name in {"any", "all"}: return getattr(self, name)(skipna=skipna, **kwargs) - return super()._reduce(name, skipna, **kwargs) + return super()._reduce(name, skipna=skipna, **kwargs) def _maybe_mask_result(self, result, mask, other, op_name: str): """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index edbf24ca87f5c..51f3c16f3f467 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -385,7 +385,7 @@ def _constructor(self) -> Type["Categorical"]: return Categorical @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): return Categorical(scalars, dtype=dtype) def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 905242bfdd8ad..a05dc717f83c1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -301,7 +301,7 @@ def _simple_new( return result @classmethod - def _from_sequence(cls, scalars, dtype=None, copy: bool = False): + def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False): return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy) @classmethod diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 4cfaae23e4389..a5ebdd8d963e2 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -275,16 +275,18 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): super().__init__(values, mask, copy=copy) @classmethod - def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "FloatingArray": + def _from_sequence( + cls, scalars, *, dtype=None, copy: bool = False + ) -> "FloatingArray": values, mask = coerce_to_array(scalars, dtype=dtype, copy=copy) return FloatingArray(values, mask) @classmethod def _from_sequence_of_strings( - cls, strings, dtype=None, copy: bool = False + cls, strings, *, dtype=None, copy: bool = False ) -> "FloatingArray": scalars = to_numeric(strings, errors="raise") - return cls._from_sequence(scalars, dtype, copy) + return cls._from_sequence(scalars, dtype=dtype, copy=copy) _HANDLED_TYPES = (np.ndarray, numbers.Number) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index e3d19e53e4517..c9d7632e39228 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -358,15 +358,17 @@ def __abs__(self): return type(self)(np.abs(self._data), self._mask) @classmethod - def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray": + def _from_sequence( + cls, scalars, *, dtype=None, copy: bool = False + ) -> "IntegerArray": return integer_array(scalars, dtype=dtype, copy=copy) @classmethod def _from_sequence_of_strings( - cls, strings, dtype=None, copy: bool = False + cls, strings, *, dtype=None, copy: bool = False ) -> "IntegerArray": scalars = to_numeric(strings, errors="raise") - return cls._from_sequence(scalars, dtype, copy) + return cls._from_sequence(scalars, dtype=dtype, copy=copy) _HANDLED_TYPES = (np.ndarray, numbers.Number) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 7b10334804ef9..a2eb506c6747a 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -227,7 +227,7 @@ def _simple_new(cls, data, closed="right"): return result @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): return cls(scalars, dtype=dtype, copy=copy) @classmethod @@ -788,7 +788,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> "IntervalArray": b = empty return self._concat_same_type([a, b]) - def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): + def take(self, indices, *, allow_fill=False, fill_value=None, axis=None, **kwargs): """ Take elements from the IntervalArray. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index b633f268049e5..9cc4cc72e4c8e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -269,6 +269,7 @@ def _concat_same_type(cls: Type[BaseMaskedArrayT], to_concat) -> BaseMaskedArray def take( self: BaseMaskedArrayT, indexer, + *, allow_fill: bool = False, fill_value: Optional[Scalar] = None, ) -> BaseMaskedArrayT: @@ -357,7 +358,7 @@ def value_counts(self, dropna: bool = True) -> "Series": return Series(counts, index=index) - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): data = self._data mask = self._mask diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index e1a424b719a4a..9419f111cc869 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -172,7 +172,9 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False) self._dtype = PandasDtype(values.dtype) @classmethod - def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "PandasArray": + def _from_sequence( + cls, scalars, *, dtype=None, copy: bool = False + ) -> "PandasArray": if isinstance(dtype, PandasDtype): dtype = dtype._dtype diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8de84a0187e95..80882acceb56a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -192,6 +192,7 @@ def _simple_new( def _from_sequence( cls: Type["PeriodArray"], scalars: Union[Sequence[Optional[Period]], AnyArrayLike], + *, dtype: Optional[PeriodDtype] = None, copy: bool = False, ) -> "PeriodArray": @@ -214,9 +215,9 @@ def _from_sequence( @classmethod def _from_sequence_of_strings( - cls, strings, dtype=None, copy=False + cls, strings, *, dtype=None, copy=False ) -> "PeriodArray": - return cls._from_sequence(strings, dtype, copy) + return cls._from_sequence(strings, dtype=dtype, copy=copy) @classmethod def _from_datetime64(cls, data, freq, tz=None) -> "PeriodArray": diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 9152ce72d75aa..d976526955ac2 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -484,7 +484,7 @@ def __setitem__(self, key, value): raise TypeError(msg) @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): return cls(scalars, dtype=dtype) @classmethod @@ -809,7 +809,7 @@ def _get_val_at(self, loc): val = maybe_box_datetimelike(val, self.sp_values.dtype) return val - def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray": + def take(self, indices, *, allow_fill=False, fill_value=None) -> "SparseArray": if is_scalar(indices): raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.") indices = np.asarray(indices, dtype=np.int32) @@ -1156,7 +1156,7 @@ def nonzero(self): # Reductions # ------------------------------------------------------------------------ - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): method = getattr(self, name, None) if method is None: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 8231a5fa0509b..b17481c8e5f88 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -198,7 +198,7 @@ def _validate(self): ) @classmethod - def _from_sequence(cls, scalars, dtype=None, copy=False): + def _from_sequence(cls, scalars, *, dtype=None, copy=False): if dtype: assert dtype == "string" @@ -226,7 +226,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): return new_string_array @classmethod - def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + def _from_sequence_of_strings(cls, strings, *, dtype=None, copy=False): return cls._from_sequence(strings, dtype=dtype, copy=copy) def __arrow_array__(self, type=None): @@ -295,7 +295,7 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy) - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if name in ["min", "max"]: return getattr(self, name)(skipna=skipna) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 8a87df18b6adb..a75d411b4a40c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -219,7 +219,7 @@ def _simple_new( @classmethod def _from_sequence( - cls, data, dtype=TD64NS_DTYPE, copy: bool = False + cls, data, *, dtype=TD64NS_DTYPE, copy: bool = False ) -> "TimedeltaArray": if dtype: _validate_td64_dtype(dtype) diff --git a/pandas/core/base.py b/pandas/core/base.py index c91e4db004f2a..b979298fa53f6 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -785,6 +785,7 @@ def _reduce( self, op, name: str, + *, axis=0, skipna=True, numeric_only=None, diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 049d2c4888a69..11b83a393dcc0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8719,6 +8719,7 @@ def _reduce( self, op, name: str, + *, axis=0, skipna=True, numeric_only=None, diff --git a/pandas/core/series.py b/pandas/core/series.py index e4a805a18bcdb..237c1c9a85575 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4190,7 +4190,15 @@ def f(x): ) def _reduce( - self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds + self, + op, + name: str, + *, + axis=0, + skipna=True, + numeric_only=None, + filter_type=None, + **kwds, ): """ Perform a reduction operation. diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 04ce705690cf3..65c5102e22997 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -159,7 +159,7 @@ def _concat_same_type(cls, to_concat): def __invert__(self): return type(self).from_scalars(~self._data.to_pandas()) - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if skipna: arr = self[~self.isna()] else: diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 3d1ebb01d632f..9ede9c7fbd0fd 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -178,7 +178,7 @@ def _formatter(self, boxed=False): def _concat_same_type(cls, to_concat): return cls(np.concatenate([x._data for x in to_concat])) - def _reduce(self, name: str, skipna: bool = True, **kwargs): + def _reduce(self, name: str, *, skipna: bool = True, **kwargs): if skipna: # If we don't have any NAs, we can ignore skipna From 2d97c9556a2f61e618bfc1d7153654ee791de5c0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 18:10:13 -0800 Subject: [PATCH 074/147] REF: make Series._replace_single a regular method (#37691) --- pandas/core/generic.py | 37 +++++-------------------------------- pandas/core/series.py | 27 ++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 36ce2c4776bd0..bea650c1b50fd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -137,36 +137,6 @@ ) -def _single_replace(self: "Series", to_replace, method, inplace, limit): - """ - Replaces values in a Series using the fill method specified when no - replacement value is given in the replace method - """ - if self.ndim != 1: - raise TypeError( - f"cannot replace {to_replace} with method {method} on a " - f"{type(self).__name__}" - ) - - orig_dtype = self.dtype - result = self if inplace else self.copy() - fill_f = missing.get_fill_func(method) - - mask = missing.mask_missing(result.values, to_replace) - values = fill_f(result.values, limit=limit, mask=mask) - - if values.dtype == orig_dtype and inplace: - return - - result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self) - - if inplace: - self._update_inplace(result) - return - - return result - - bool_t = bool # Need alias because NDFrame has def bool: @@ -6690,11 +6660,14 @@ def replace( if isinstance(to_replace, (tuple, list)): if isinstance(self, ABCDataFrame): + from pandas import Series + return self.apply( - _single_replace, args=(to_replace, method, inplace, limit) + Series._replace_single, + args=(to_replace, method, inplace, limit), ) self = cast("Series", self) - return _single_replace(self, to_replace, method, inplace, limit) + return self._replace_single(to_replace, method, inplace, limit) if not is_dict_like(to_replace): if not is_dict_like(regex): diff --git a/pandas/core/series.py b/pandas/core/series.py index 237c1c9a85575..c5df6a9298c88 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -67,7 +67,7 @@ remove_na_arraylike, ) -from pandas.core import algorithms, base, generic, nanops, ops +from pandas.core import algorithms, base, generic, missing, nanops, ops from pandas.core.accessor import CachedAccessor from pandas.core.aggregation import aggregate, transform from pandas.core.arrays import ExtensionArray @@ -4558,6 +4558,31 @@ def replace( method=method, ) + def _replace_single(self, to_replace, method, inplace, limit): + """ + Replaces values in a Series using the fill method specified when no + replacement value is given in the replace method + """ + + orig_dtype = self.dtype + result = self if inplace else self.copy() + fill_f = missing.get_fill_func(method) + + mask = missing.mask_missing(result.values, to_replace) + values = fill_f(result.values, limit=limit, mask=mask) + + if values.dtype == orig_dtype and inplace: + return + + result = self._constructor(values, index=self.index, dtype=self.dtype) + result = result.__finalize__(self) + + if inplace: + self._update_inplace(result) + return + + return result + @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "Series": return super().shift( From 924f0e145a36755d5b1622181e35371b350c0195 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Sun, 8 Nov 2020 09:11:12 +0700 Subject: [PATCH 075/147] REF: simplify cycling through colors (#37664) --- pandas/plotting/_matplotlib/style.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py index b2c7b2610845c..cc2dde0f2179a 100644 --- a/pandas/plotting/_matplotlib/style.py +++ b/pandas/plotting/_matplotlib/style.py @@ -1,3 +1,4 @@ +import itertools from typing import ( TYPE_CHECKING, Collection, @@ -74,7 +75,7 @@ def get_standard_colors( num_colors=num_colors, ) - return _cycle_colors(colors, num_colors=num_colors) + return list(_cycle_colors(colors, num_colors=num_colors)) def _derive_colors( @@ -128,19 +129,14 @@ def _derive_colors( return _get_colors_from_color_type(color_type, num_colors=num_colors) -def _cycle_colors(colors: List[Color], num_colors: int) -> List[Color]: - """Append more colors by cycling if there is not enough color. +def _cycle_colors(colors: List[Color], num_colors: int) -> Iterator[Color]: + """Cycle colors until achieving max of `num_colors` or length of `colors`. Extra colors will be ignored by matplotlib if there are more colors than needed and nothing needs to be done here. """ - if len(colors) < num_colors: - multiple = num_colors // len(colors) - 1 - mod = num_colors % len(colors) - colors += multiple * colors - colors += colors[:mod] - - return colors + max_colors = max(num_colors, len(colors)) + yield from itertools.islice(itertools.cycle(colors), max_colors) def _get_colors_from_colormap( From d18d339568007720035c72e8385c3c3353e95120 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 7 Nov 2020 18:14:32 -0800 Subject: [PATCH 076/147] REF: implement _wrap_reduction_result (#37660) --- pandas/core/arrays/_mixins.py | 7 ++++- pandas/core/arrays/categorical.py | 4 +-- pandas/core/arrays/datetimelike.py | 16 +++------- pandas/core/arrays/numpy_.py | 48 +++++++++++++++++++----------- pandas/core/arrays/string_.py | 17 +++++++++++ pandas/core/arrays/timedeltas.py | 4 +-- pandas/core/nanops.py | 1 + 7 files changed, 61 insertions(+), 36 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index a8c0e77270dfc..d84e2e2ad295b 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -1,4 +1,4 @@ -from typing import Any, Sequence, TypeVar +from typing import Any, Optional, Sequence, TypeVar import numpy as np @@ -255,6 +255,11 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): msg = f"'{type(self).__name__}' does not implement reduction '{name}'" raise TypeError(msg) + def _wrap_reduction_result(self, axis: Optional[int], result): + if axis is None or self.ndim == 1: + return self._box_func(result) + return self._from_backing_data(result) + # ------------------------------------------------------------------------ def __repr__(self) -> str: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 51f3c16f3f467..3e2da3e95f396 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1957,7 +1957,7 @@ def min(self, *, skipna=True, **kwargs): return np.nan else: pointer = self._codes.min() - return self.categories[pointer] + return self._wrap_reduction_result(None, pointer) @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") def max(self, *, skipna=True, **kwargs): @@ -1993,7 +1993,7 @@ def max(self, *, skipna=True, **kwargs): return np.nan else: pointer = self._codes.max() - return self.categories[pointer] + return self._wrap_reduction_result(None, pointer) def mode(self, dropna=True): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7a0d88f29b9b0..8d90035491d28 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1283,9 +1283,7 @@ def min(self, *, axis=None, skipna=True, **kwargs): return self._from_backing_data(result) result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) - if lib.is_scalar(result): - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def max(self, *, axis=None, skipna=True, **kwargs): """ @@ -1316,9 +1314,7 @@ def max(self, *, axis=None, skipna=True, **kwargs): return self._from_backing_data(result) result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) - if lib.is_scalar(result): - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def mean(self, *, skipna=True, axis: Optional[int] = 0): """ @@ -1357,9 +1353,7 @@ def mean(self, *, skipna=True, axis: Optional[int] = 0): result = nanops.nanmean( self._ndarray, axis=axis, skipna=skipna, mask=self.isna() ) - if axis is None or self.ndim == 1: - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): nv.validate_median((), kwargs) @@ -1378,9 +1372,7 @@ def median(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): return self._from_backing_data(result) result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) - if axis is None or self.ndim == 1: - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) class DatelikeOps(DatetimeLikeArrayMixin): diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 9419f111cc869..0cdce1eabccc6 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -12,7 +12,6 @@ from pandas.core.dtypes.missing import isna from pandas.core import nanops, ops -from pandas.core.array_algos import masked_reductions from pandas.core.arraylike import OpsMixin from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.strings.object_array import ObjectStringArrayMixin @@ -275,39 +274,46 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: def any(self, *, axis=None, out=None, keepdims=False, skipna=True): nv.validate_any((), dict(out=out, keepdims=keepdims)) - return nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def all(self, *, axis=None, out=None, keepdims=False, skipna=True): nv.validate_all((), dict(out=out, keepdims=keepdims)) - return nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) - def min(self, *, skipna: bool = True, **kwargs) -> Scalar: + def min(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: nv.validate_min((), kwargs) - return masked_reductions.min( - values=self.to_numpy(), mask=self.isna(), skipna=skipna + result = nanops.nanmin( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) + return self._wrap_reduction_result(axis, result) - def max(self, *, skipna: bool = True, **kwargs) -> Scalar: + def max(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: nv.validate_max((), kwargs) - return masked_reductions.max( - values=self.to_numpy(), mask=self.isna(), skipna=skipna + result = nanops.nanmax( + values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) + return self._wrap_reduction_result(axis, result) def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: nv.validate_sum((), kwargs) - return nanops.nansum( + result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) + return self._wrap_reduction_result(axis, result) def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: nv.validate_prod((), kwargs) - return nanops.nanprod( + result = nanops.nanprod( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) + return self._wrap_reduction_result(axis, result) def mean(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims)) - return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def median( self, *, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True @@ -315,7 +321,8 @@ def median( nv.validate_median( (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims) ) - return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def std( self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True @@ -323,7 +330,8 @@ def std( nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" ) - return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) def var( self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True @@ -331,7 +339,8 @@ def var( nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var" ) - return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) def sem( self, *, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True @@ -339,19 +348,22 @@ def sem( nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem" ) - return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + return self._wrap_reduction_result(axis, result) def kurt(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt" ) - return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) def skew(self, *, axis=None, dtype=None, out=None, keepdims=False, skipna=True): nv.validate_stat_ddof_func( (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew" ) - return nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) + return self._wrap_reduction_result(axis, result) # ------------------------------------------------------------------------ # Additional Methods diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index b17481c8e5f88..c73855f281bcc 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -3,6 +3,8 @@ import numpy as np from pandas._libs import lib, missing as libmissing +from pandas._typing import Scalar +from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype from pandas.core.dtypes.common import ( @@ -15,6 +17,7 @@ ) from pandas.core import ops +from pandas.core.array_algos import masked_reductions from pandas.core.arrays import IntegerArray, PandasArray from pandas.core.arrays.integer import _IntegerDtype from pandas.core.construction import extract_array @@ -301,6 +304,20 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_min((), kwargs) + result = masked_reductions.min( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + + def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: + nv.validate_max((), kwargs) + result = masked_reductions.max( + values=self.to_numpy(), mask=self.isna(), skipna=skipna + ) + return self._wrap_reduction_result(axis, result) + def value_counts(self, dropna=False): from pandas import value_counts diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a75d411b4a40c..d9ecbc874cd59 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -381,9 +381,7 @@ def sum( result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) - if axis is None or self.ndim == 1: - return self._box_func(result) - return self._from_backing_data(result) + return self._wrap_reduction_result(axis, result) def std( self, diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 5da8bd300433e..6dc05c23c026f 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -344,6 +344,7 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None): assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan + if tz is not None: # we get here e.g. via nanmean when we call it on a DTA[tz] result = Timestamp(result, tz=tz) From fd4275f707e20a9d724c1572ffbaf5ffc7218c83 Mon Sep 17 00:00:00 2001 From: Alex Kirko Date: Sun, 8 Nov 2020 05:22:47 +0300 Subject: [PATCH 077/147] BUG: preserve fold in Timestamp.replace (#37644) --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/_libs/tslibs/nattype.pyx | 2 +- pandas/_libs/tslibs/timestamps.pyx | 9 +++++++-- pandas/tests/scalar/timestamp/test_unary_ops.py | 11 +++++++++++ 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 73493bbeb0eac..e5b34e7c8a339 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -412,7 +412,7 @@ Timezones ^^^^^^^^^ - Bug in :func:`date_range` was raising AmbiguousTimeError for valid input with ``ambiguous=False`` (:issue:`35297`) -- +- Bug in :meth:`Timestamp.replace` was losing fold information (:issue:`37610`) Numeric diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 88ad008b42c21..e10ac6a05ead8 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -774,7 +774,7 @@ default 'raise' microsecond : int, optional nanosecond : int, optional tzinfo : tz-convertible, optional - fold : int, optional, default is 0 + fold : int, optional Returns ------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 9076325d01bab..b3ae69d7a3237 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1374,7 +1374,7 @@ default 'raise' microsecond=None, nanosecond=None, tzinfo=object, - fold=0, + fold=None, ): """ implements datetime.replace, handles nanoseconds. @@ -1390,7 +1390,7 @@ default 'raise' microsecond : int, optional nanosecond : int, optional tzinfo : tz-convertible, optional - fold : int, optional, default is 0 + fold : int, optional Returns ------- @@ -1407,6 +1407,11 @@ default 'raise' # set to naive if needed tzobj = self.tzinfo value = self.value + + # GH 37610. Preserve fold when replacing. + if fold is None: + fold = self.fold + if tzobj is not None: value = tz_convert_from_utc_single(value, tzobj) diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index e8196cd8328e7..88f99a6784ba1 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -424,3 +424,14 @@ def test_timestamp(self): # should agree with datetime.timestamp method dt = ts.to_pydatetime() assert dt.timestamp() == ts.timestamp() + + +@pytest.mark.parametrize("fold", [0, 1]) +def test_replace_preserves_fold(fold): + # GH 37610. Check that replace preserves Timestamp fold property + tz = gettz("Europe/Moscow") + + ts = Timestamp(year=2009, month=10, day=25, hour=2, minute=30, fold=fold, tzinfo=tz) + ts_replaced = ts.replace(second=1) + + assert ts_replaced.fold == fold From dff78ae7fd89c81bfdc5b2fc059be60356ae7dc0 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Sun, 8 Nov 2020 03:59:31 +0100 Subject: [PATCH 078/147] CLN: Clean indexing tests (#37689) --- pandas/tests/indexing/test_iloc.py | 1 - pandas/tests/indexing/test_indexing.py | 1 + pandas/tests/indexing/test_loc.py | 3 ++- pandas/tests/indexing/test_partial.py | 6 +++--- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f8dfda3dab486..f5f2ac0225bd4 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -615,7 +615,6 @@ def test_iloc_mask(self): # UserWarnings from reindex of a boolean mask with catch_warnings(record=True): simplefilter("ignore", UserWarning) - result = dict() for idx in [None, "index", "locs"]: mask = (df.nums > 2).values if idx: diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 614e424e8aca2..06bd8a5f300bb 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -843,6 +843,7 @@ def test_maybe_numeric_slice(self): result = maybe_numeric_slice(df, None, include_bool=True) expected = pd.IndexSlice[:, ["A", "C"]] + assert all(result[1] == expected[1]) result = maybe_numeric_slice(df, [1]) expected = [1] assert result == expected diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6939b280a988b..952368ee0ffa9 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1191,7 +1191,8 @@ def test_loc_setitem_multiindex_slice(self): def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self): times = date_range("2000-01-01", freq="10min", periods=100000) ser = Series(range(100000), times) - ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] + result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] + tm.assert_series_equal(result, ser) class TestLocSetitemWithExpansion: diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 01db937153b3a..3bf37f4cade8b 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -185,14 +185,14 @@ def test_series_partial_set(self): # loc equiv to .reindex expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) with pytest.raises(KeyError, match="with any missing labels"): - result = ser.loc[[3, 2, 3]] + ser.loc[[3, 2, 3]] result = ser.reindex([3, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) with pytest.raises(KeyError, match="with any missing labels"): - result = ser.loc[[3, 2, 3, "x"]] + ser.loc[[3, 2, 3, "x"]] result = ser.reindex([3, 2, 3, "x"]) tm.assert_series_equal(result, expected, check_index_type=True) @@ -203,7 +203,7 @@ def test_series_partial_set(self): expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) with pytest.raises(KeyError, match="with any missing labels"): - result = ser.loc[[2, 2, "x", 1]] + ser.loc[[2, 2, "x", 1]] result = ser.reindex([2, 2, "x", 1]) tm.assert_series_equal(result, expected, check_index_type=True) From 4bac70bbf3814940972da7288f9cab1536dac59f Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Sun, 8 Nov 2020 10:00:20 +0700 Subject: [PATCH 079/147] TST: fix warning for pie chart (#37669) --- pandas/tests/plotting/frame/test_frame.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index f2d2203d25b6c..0f256e623e42c 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1452,11 +1452,20 @@ def test_pie_df(self): self._check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): + import matplotlib as mpl + df = DataFrame(np.random.rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan fig, axes = self.plt.subplots(ncols=4) - df.plot.pie(subplots=True, ax=axes, legend=True) + + # GH 37668 + kwargs = {} + if mpl.__version__ >= "3.3": + kwargs = {"normalize": True} + + with tm.assert_produces_warning(None): + df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) base_expected = ["0", "1", "2", "3"] for i, ax in enumerate(axes): From 78f092b8764bde8af3f56b360bf574ce852172bf Mon Sep 17 00:00:00 2001 From: Yassir Karroum Date: Sun, 8 Nov 2020 04:01:27 +0100 Subject: [PATCH 080/147] PERF: reverted change from commit 7d257c6 to solve issue #37081 (#37426) --- pandas/core/frame.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 11b83a393dcc0..bfb633ae55095 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8747,11 +8747,6 @@ def _reduce( cols = self.columns[~dtype_is_dt] self = self[cols] - any_object = np.array( - [is_object_dtype(dtype) for dtype in own_dtypes], - dtype=bool, - ).any() - # TODO: Make other agg func handle axis=None properly GH#21597 axis = self._get_axis_number(axis) labels = self._get_agg_axis(axis) @@ -8778,12 +8773,7 @@ def _get_data() -> DataFrame: data = self._get_bool_data() return data - if numeric_only is not None or ( - numeric_only is None - and axis == 0 - and not any_object - and not self._mgr.any_extension_types - ): + if numeric_only is not None: # For numeric_only non-None and axis non-None, we know # which blocks to use and no try/except is needed. # For numeric_only=None only the case with axis==0 and no object From a44af95f211c0d3466d1f45cd812a9500583802b Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Sun, 8 Nov 2020 04:03:10 +0100 Subject: [PATCH 081/147] DataFrameGroupby.boxplot fails when subplots=False (#28102) --- doc/source/whatsnew/v1.2.0.rst | 2 + pandas/plotting/_matplotlib/boxplot.py | 10 +++ pandas/tests/plotting/test_boxplot_method.py | 73 ++++++++++++++++++++ 3 files changed, 85 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e5b34e7c8a339..9502a2e14565f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -510,6 +510,8 @@ Plotting - Bug in :meth:`DataFrame.plot` was rotating xticklabels when ``subplots=True``, even if the x-axis wasn't an irregular time series (:issue:`29460`) - Bug in :meth:`DataFrame.plot` where a marker letter in the ``style`` keyword sometimes causes a ``ValueError`` (:issue:`21003`) - Twinned axes were losing their tick labels which should only happen to all but the last row or column of 'externally' shared axes (:issue:`33819`) +- Bug in :meth:`DataFrameGroupBy.boxplot` when ``subplots=False``, a KeyError would raise (:issue:`16748`) + Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 8ceba22b1f7a4..3d0e30f8b9234 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -9,6 +9,7 @@ from pandas.core.dtypes.missing import remove_na_arraylike import pandas as pd +import pandas.core.common as com from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.core import LinePlot, MPLPlot @@ -443,6 +444,15 @@ def boxplot_frame_groupby( df = frames[0].join(frames[1::]) else: df = frames[0] + + # GH 16748, DataFrameGroupby fails when subplots=False and `column` argument + # is assigned, and in this case, since `df` here becomes MI after groupby, + # so we need to couple the keys (grouped values) and column (original df + # column) together to search for subset to plot + if column is not None: + column = com.convert_to_list_like(column) + multi_key = pd.MultiIndex.from_product([keys, column]) + column = list(multi_key.values) ret = df.boxplot( column=column, fontsize=fontsize, diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index dc2e9e1e8d15f..9e1a8d473b9d6 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -454,3 +454,76 @@ def test_fontsize(self): self._check_ticks_props( df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16 ) + + @pytest.mark.parametrize( + "col, expected_xticklabel", + [ + ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]), + ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]), + ( + ["v", "v1"], + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), + ( + None, + [ + "(a, v)", + "(a, v1)", + "(b, v)", + "(b, v1)", + "(c, v)", + "(c, v1)", + "(d, v)", + "(d, v1)", + "(e, v)", + "(e, v1)", + ], + ), + ], + ) + def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel): + # GH 16748 + df = DataFrame( + { + "cat": np.random.choice(list("abcde"), 100), + "v": np.random.rand(100), + "v1": np.random.rand(100), + } + ) + grouped = df.groupby("cat") + + axes = _check_plot_works( + grouped.boxplot, subplots=False, column=col, return_type="axes" + ) + + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel + + def test_boxplot_multiindex_column(self): + # GH 16748 + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = list(zip(*arrays)) + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index) + + col = [("bar", "one"), ("bar", "two")] + axes = _check_plot_works(df.boxplot, column=col, return_type="axes") + + expected_xticklabel = ["(bar, one)", "(bar, two)"] + result_xticklabel = [x.get_text() for x in axes.get_xticklabels()] + assert expected_xticklabel == result_xticklabel From 0c54d4501781bc83938eb11199e7a34ece9de4f3 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Sun, 8 Nov 2020 04:07:59 +0100 Subject: [PATCH 082/147] ENH: Improve error reporting for wrong merge cols (#37547) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/reshape/merge.py | 23 ++++++++-- pandas/tests/reshape/merge/test_merge.py | 54 ++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9502a2e14565f..d07db18ee5df0 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -231,6 +231,7 @@ Other enhancements - :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`) - :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`) - :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`) +- Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`) .. _whatsnew_120.api_breaking.python: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 978597e3c7686..aa883d518f8d1 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1203,11 +1203,9 @@ def _validate_specification(self): if self.left_index and self.right_index: self.left_on, self.right_on = (), () elif self.left_index: - if self.right_on is None: - raise MergeError("Must pass right_on or right_index=True") + raise MergeError("Must pass right_on or right_index=True") elif self.right_index: - if self.left_on is None: - raise MergeError("Must pass left_on or left_index=True") + raise MergeError("Must pass left_on or left_index=True") else: # use the common columns common_cols = self.left.columns.intersection(self.right.columns) @@ -1228,8 +1226,19 @@ def _validate_specification(self): 'Can only pass argument "on" OR "left_on" ' 'and "right_on", not a combination of both.' ) + if self.left_index or self.right_index: + raise MergeError( + 'Can only pass argument "on" OR "left_index" ' + 'and "right_index", not a combination of both.' + ) self.left_on = self.right_on = self.on elif self.left_on is not None: + if self.left_index: + raise MergeError( + 'Can only pass argument "left_on" OR "left_index" not both.' + ) + if not self.right_index and self.right_on is None: + raise MergeError('Must pass "right_on" OR "right_index".') n = len(self.left_on) if self.right_index: if len(self.left_on) != self.right.index.nlevels: @@ -1239,6 +1248,12 @@ def _validate_specification(self): ) self.right_on = [None] * n elif self.right_on is not None: + if self.right_index: + raise MergeError( + 'Can only pass argument "right_on" OR "right_index" not both.' + ) + if not self.left_index and self.left_on is None: + raise MergeError('Must pass "left_on" OR "left_index".') n = len(self.right_on) if self.left_index: if len(self.right_on) != self.left.index.nlevels: diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a58372040c7f3..999b827fe0571 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2283,3 +2283,57 @@ def test_merge_join_categorical_multiindex(): expected = expected.drop(["Cat", "Int"], axis=1) result = a.join(b, on=["Cat1", "Int1"]) tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + ("kwargs", "err_msg"), + [ + ({"left_on": "a", "left_index": True}, ["left_on", "left_index"]), + ({"right_on": "a", "right_index": True}, ["right_on", "right_index"]), + ], +) +def test_merge_join_cols_error_reporting_duplicates(func, kwargs, err_msg): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = rf'Can only pass argument "{err_msg[0]}" OR "{err_msg[1]}" not both\.' + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, **kwargs) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + ("kwargs", "err_msg"), + [ + ({"left_on": "a"}, ["right_on", "right_index"]), + ({"right_on": "a"}, ["left_on", "left_index"]), + ], +) +def test_merge_join_cols_error_reporting_missing(func, kwargs, err_msg): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = rf'Must pass "{err_msg[0]}" OR "{err_msg[1]}"\.' + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, **kwargs) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + "kwargs", + [ + {"right_index": True}, + {"left_index": True}, + ], +) +def test_merge_join_cols_error_reporting_on_and_index(func, kwargs): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = ( + r'Can only pass argument "on" OR "left_index" ' + r'and "right_index", not a combination of both\.' + ) + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, on="a", **kwargs) From d35135ab74b90a73cbf00dedc35c8b9683753208 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:17:53 +0300 Subject: [PATCH 083/147] Transfer tests of test_frame.py to test_frame_color.py --- pandas/tests/plotting/frame/test_frame.py | 945 +++++++++++++++--- .../tests/plotting/frame/test_frame_color.py | 116 +-- 2 files changed, 878 insertions(+), 183 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 0f256e623e42c..4d339b93fd30d 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -6,6 +6,7 @@ import warnings import numpy as np +from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -174,14 +175,14 @@ def test_nonnumeric_exclude(self): @pytest.mark.slow def test_implicit_label(self): - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) ax = df.plot(x="a", y="b") self._check_text_labels(ax.xaxis.get_label(), "a") @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) + df = DataFrame(randn(2, 2), columns=["a", "b"]) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -336,9 +337,415 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + def test_negative_log(self): df = -DataFrame( - np.random.rand(6, 4), + rand(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -357,20 +764,15 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): def test_line_area_stacked(self): with tm.RNGContext(42): - df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"]) + df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) neg_df = -df # each column has either positive or negative value sep_df = DataFrame( - { - "w": np.random.rand(6), - "x": np.random.rand(6), - "y": -np.random.rand(6), - "z": -np.random.rand(6), - } + {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} ) # each column has positive-negative mixed value mixed_df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -438,7 +840,7 @@ def test_line_area_nan_df(self): tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) def test_line_lim(self): - df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -462,7 +864,7 @@ def test_line_lim(self): assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): - df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) + df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) neg_df = -df for stacked in [True, False]: @@ -480,7 +882,7 @@ def test_area_lim(self): @pytest.mark.slow def test_bar_linewidth(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) # regular ax = df.plot.bar(linewidth=2) @@ -501,7 +903,7 @@ def test_bar_linewidth(self): @pytest.mark.slow def test_bar_barwidth(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) width = 0.9 @@ -537,9 +939,49 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + @pytest.mark.slow def test_bar_bottom_left(self): - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -585,13 +1027,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = DataFrame( + df1 = pd.DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = DataFrame( + df2 = pd.DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -615,7 +1057,7 @@ def test_bar_categorical(self): @pytest.mark.slow def test_plot_scatter(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -634,7 +1076,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = DataFrame(np.random.randn(10), columns=["a"]) + df = pd.DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -645,30 +1087,31 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = DataFrame({"dates": dates, "vals": vals}) + df = pd.DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) + @pytest.mark.slow def test_if_hexbin_xaxis_label_is_visible(self): # addressing issue #10678, to ensure colobar does not # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) @@ -679,14 +1122,16 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) + df = pd.DataFrame( + {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} + ) _check_plot_works(df.plot.scatter, x=x, y=y) @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -742,10 +1187,43 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + @pytest.mark.slow def test_plot_bar(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -758,9 +1236,7 @@ def test_plot_bar(self): _check_plot_works(df.plot.bar, stacked=True) df = DataFrame( - np.random.randn(10, 15), - index=list(string.ascii_letters[:10]), - columns=range(15), + randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) ) _check_plot_works(df.plot.bar) @@ -777,6 +1253,164 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -836,7 +1470,7 @@ def test_boxplot_vertical(self): @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -855,10 +1489,30 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) @@ -885,7 +1539,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(randn(100, 4)) series = df[0] ax = _check_plot_works(df.plot.hist) @@ -923,7 +1577,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1093,16 +1747,16 @@ def test_hist_df_coord(self): @pytest.mark.slow def test_plot_int_columns(self): - df = DataFrame(np.random.randn(100, 4)).cumsum() + df = DataFrame(randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.slow def test_df_legend_labels(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) for kind in kinds: @@ -1131,9 +1785,9 @@ def test_df_legend_labels(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) ax = df.plot(legend=True, secondary_y="b") self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) ax = df2.plot(legend=False, ax=ax) @@ -1164,7 +1818,9 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) + df = pd.DataFrame( + data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] + ) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) @@ -1187,7 +1843,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - np.random.randn(4, 4), + randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -1196,7 +1852,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") @@ -1213,7 +1869,7 @@ def test_legend_name(self): @pytest.mark.slow def test_no_legend(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) for kind in kinds: @@ -1226,7 +1882,7 @@ def test_style_by_column(self): fig = plt.gcf() - df = DataFrame(np.random.randn(100, 3)) + df = DataFrame(randn(100, 3)) for markers in [ {0: "^", 1: "+", 2: "o"}, {0: "^", 1: "+"}, @@ -1248,23 +1904,6 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -1299,7 +1938,7 @@ def test_all_invalid_plot_data(self): @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): - df = DataFrame(np.random.randn(10, 2), dtype=object) + df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in plotting.PlotAccessor._common_kinds: @@ -1310,14 +1949,14 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): # area plot doesn't support positive/negative mixed data kinds = ["area"] - df = DataFrame(np.random.rand(10, 2), dtype=object) + df = DataFrame(rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in kinds: with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(kind="aasdf") @@ -1404,6 +2043,13 @@ def test_hexbin_cmap(self): ax = df.plot.hexbin(x="A", y="B", colormap=cm) assert ax.collections[0].cmap.name == cm + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df @@ -1452,20 +2098,11 @@ def test_pie_df(self): self._check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): - import matplotlib as mpl - df = DataFrame(np.random.rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan fig, axes = self.plt.subplots(ncols=4) - - # GH 37668 - kwargs = {} - if mpl.__version__ >= "3.3": - kwargs = {"normalize": True} - - with tm.assert_produces_warning(None): - df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) + df.plot.pie(subplots=True, ax=axes, legend=True) base_expected = ["0", "1", "2", "3"] for i, ax in enumerate(axes): @@ -1550,11 +2187,11 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works( df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind @@ -1631,11 +2268,11 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) @@ -1852,6 +2489,53 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -1983,11 +2667,11 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = self.plt.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(np.random.rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=ax) # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) + df = DataFrame({"a": randn(8), "b": randn(8)}) fig = self.plt.figure() ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") @@ -1998,21 +2682,21 @@ def test_plain_axes(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=cax) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=iax) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - DataFrame(np.random.randn(15, 2), columns=list("AB")) + pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2028,7 +2712,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = DataFrame( + df = pd.DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -2049,7 +2733,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -2064,7 +2748,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = DataFrame([2, 4], index=[1, 2]) + df = pd.DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -2076,7 +2760,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = DataFrame( + df = pd.DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -2096,9 +2780,25 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = pd.DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + def test_plot_no_rows(self): # GH 27758 - df = DataFrame(columns=["foo"], dtype=int) + df = pd.DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -2107,13 +2807,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = DataFrame(["a", "b", "c"]) + df = pd.DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -2123,7 +2823,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = DataFrame( + df = pd.DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -2154,7 +2854,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -2168,25 +2868,32 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_xlabel() == str(new_label) @pytest.mark.parametrize( - "xlabel, ylabel", + "index_name, old_label, new_label", [ - (None, None), - ("X Label", None), - (None, "Y Label"), - ("X Label", "Y Label"), + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), ], ) - @pytest.mark.parametrize("kind", ["scatter", "hexbin"]) - def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): - # GH 37001 - xcol = "Type A" - ycol = "Type B" - df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) - - # default is the labels are column names - ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) - assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) - assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) def _generate_4_axes_via_gridspec(): diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 2d509e8f3b320..41c6578743bbf 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,20 +1,30 @@ """ Test cases for DataFrame.plot """ +from datetime import date, datetime +import itertools +import string import warnings import numpy as np +from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm +from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + @td.skip_if_no_mpl -class TestDataFramePlotsColor(TestPlotBase): +class TestDataFrameColor(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl @@ -38,9 +48,10 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) + def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -68,7 +79,7 @@ def test_rgb_tuple_color(self): _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") @@ -112,7 +123,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -145,7 +156,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = DataFrame( + df = pd.DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -166,7 +177,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -188,7 +199,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -204,7 +215,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = DataFrame( + df = pd.DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -212,45 +223,12 @@ def test_scatter_with_c_column_name_with_colors(self, cmap): ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) assert ax.collections[0].colorbar is None - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -293,7 +271,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) + pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -303,7 +281,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -372,7 +350,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -415,7 +393,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -452,7 +430,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -474,7 +452,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -542,7 +520,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -592,6 +570,23 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes="red", xxxx="blue")) + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + def test_default_color_cycle(self): import cycler import matplotlib.pyplot as plt @@ -599,21 +594,14 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - def test_invalid_colormap(self): - df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) + df = DataFrame(randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") @@ -623,7 +611,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -631,15 +619,15 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = DataFrame({"a": [2, 4, 6]}) + df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = pd.DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() + assert legend.get_color() == line.get_color() \ No newline at end of file From 44e2977c3855d172ed4a5f0d54e124702e2acb7a Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Sun, 8 Nov 2020 17:58:01 +0300 Subject: [PATCH 084/147] PEP8 --- pandas/tests/plotting/frame/test_frame_color.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 41c6578743bbf..517e3c109fc5b 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,7 +48,6 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -630,4 +629,4 @@ def test_colors_of_columns_with_same_name(self): df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() \ No newline at end of file + assert legend.get_color() == line.get_color() From 99d0032a7822376a54e08004140c785b6772b36c Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 08:13:12 +0300 Subject: [PATCH 085/147] Fixes for linter --- pandas/tests/plotting/frame/test_frame_color.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 517e3c109fc5b..47a45193b2f52 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -155,7 +155,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = pd.DataFrame( + df = DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else From 5cb5fa3df8d3d6089651c262fa6ec8a1b95bd522 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 16:46:12 +0300 Subject: [PATCH 086/147] =?UTF-8?q?=D0=A1hange=20pd.DateFrame=20to=20DateF?= =?UTF-8?q?rame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/tests/plotting/frame/test_frame_color.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 47a45193b2f52..fefa342770c7f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -176,7 +176,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +198,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +214,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( + df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -270,7 +270,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -610,7 +610,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,14 +618,14 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): From d053f4e12b130560448617ad102b404a1a25b983 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 8 Nov 2020 21:24:33 +0000 Subject: [PATCH 087/147] Move inconsistent namespace check to pre-commit, fixup more files (#37662) * check for inconsistent namespace usage * doc * typos * verbose regex * use verbose flag * use verbose flag * match both directions * add test * don't import annotations from future * update extra couple of cases * :truck: rename * typing * don't use subprocess * don't type tests * use pathlib --- .pre-commit-config.yaml | 6 ++ ci/code_checks.sh | 13 ---- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/extension/test_categorical.py | 4 +- pandas/tests/groupby/aggregate/test_other.py | 6 +- pandas/tests/groupby/test_counting.py | 11 ++-- pandas/tests/groupby/test_grouping.py | 32 +++++----- pandas/tests/groupby/test_missing.py | 4 +- pandas/tests/groupby/test_quantile.py | 6 +- pandas/tests/groupby/test_timegrouper.py | 10 +-- .../tests/groupby/transform/test_transform.py | 2 +- pandas/tests/indexes/datetimes/test_shift.py | 16 ++--- pandas/tests/indexes/period/test_formats.py | 4 +- pandas/tests/indexes/test_numeric.py | 4 +- .../tests/indexes/timedeltas/test_formats.py | 4 +- pandas/tests/io/json/test_pandas.py | 6 +- pandas/tests/io/test_compression.py | 4 +- pandas/tests/resample/test_time_grouper.py | 32 +++++----- pandas/tests/reshape/merge/test_merge_asof.py | 22 +++---- pandas/tests/series/indexing/test_getitem.py | 8 +-- pandas/tests/series/indexing/test_indexing.py | 2 +- pandas/tests/series/test_repr.py | 2 +- pandas/tests/window/test_expanding.py | 10 +-- ...check_for_inconsistent_pandas_namespace.py | 64 +++++++++++++++++++ .../test_inconsistent_namespace_check.py | 28 ++++++++ 25 files changed, 193 insertions(+), 109 deletions(-) create mode 100644 scripts/check_for_inconsistent_pandas_namespace.py create mode 100644 scripts/tests/test_inconsistent_namespace_check.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c1e4e330c903..f9b396715664a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -119,6 +119,12 @@ repos: entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" types: [python] exclude: ^(asv_bench|pandas/tests|doc)/ + - id: inconsistent-namespace-usage + name: 'Check for inconsistent use of pandas namespace in tests' + entry: python scripts/check_for_inconsistent_pandas_namespace.py + language: python + types: [python] + files: ^pandas/tests/ - id: FrameOrSeriesUnion name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\] diff --git a/ci/code_checks.sh b/ci/code_checks.sh index b5d63e259456b..c920500aac9cd 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -37,12 +37,6 @@ function invgrep { return $((! $EXIT_STATUS)) } -function check_namespace { - local -r CLASS=${1} - grep -R -l --include "*.py" " ${CLASS}(" pandas/tests | xargs grep -n "pd\.${CLASS}[(\.]" - test $? -gt 0 -} - if [[ "$GITHUB_ACTIONS" == "true" ]]; then FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s" INVGREP_PREPEND="##[error]" @@ -120,13 +114,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG invgrep -R --include=*.{py,pyx} '!r}' pandas RET=$(($RET + $?)) ; echo $MSG "DONE" - - # ------------------------------------------------------------------------- - MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG - for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do - check_namespace ${class} - RET=$(($RET + $?)) - done echo $MSG "DONE" fi diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 1c82d6f9a26ff..438a22c99a4eb 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -862,7 +862,7 @@ def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): @pytest.mark.parametrize( "arr", [ - np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]), + np.array([Timedelta("1 days"), Timedelta("2 days")]), np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object), np.array([timedelta(1), timedelta(2)]), ], diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index e8d82b525c9f4..95f338cbc3240 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -192,7 +192,7 @@ def test_cast_category_to_extension_dtype(self, expected): ( "datetime64[ns, MET]", pd.DatetimeIndex( - [pd.Timestamp("2015-01-01 00:00:00+0100", tz="MET")] + [Timestamp("2015-01-01 00:00:00+0100", tz="MET")] ).array, ), ], @@ -254,7 +254,7 @@ def _compare_other(self, s, data, op_name, other): @pytest.mark.parametrize( "categories", - [["a", "b"], [0, 1], [pd.Timestamp("2019"), pd.Timestamp("2020")]], + [["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]], ) def test_not_equal_with_na(self, categories): # https://github.com/pandas-dev/pandas/issues/32276 diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 15803d4b0ef94..5d0f6d6262899 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -425,7 +425,7 @@ def test_agg_over_numpy_arrays(): result = df.groupby("category").agg(sum) expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] - expected_index = pd.Index([1, 2], name="category") + expected_index = Index([1, 2], name="category") expected_column = ["arraydata"] expected = DataFrame(expected_data, index=expected_index, columns=expected_column) @@ -497,7 +497,7 @@ def test_sum_uint64_overflow(): df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) df = df + 9223372036854775807 - index = pd.Index( + index = Index( [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64 ) expected = DataFrame( @@ -596,7 +596,7 @@ def test_agg_lambda_with_timezone(): result = df.groupby("tag").agg({"date": lambda e: e.head(1)}) expected = DataFrame( [pd.Timestamp("2018-01-01", tz="UTC")], - index=pd.Index([1], name="tag"), + index=Index([1], name="tag"), columns=["date"], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 04b73b16ae2c7..1317f0f68216a 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -4,7 +4,6 @@ import numpy as np import pytest -import pandas as pd from pandas import ( DataFrame, Index, @@ -260,7 +259,7 @@ def test_groupby_timedelta_cython_count(): df = DataFrame( {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")} ) - expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt") + expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt") result = df.groupby("g").delt.count() tm.assert_series_equal(expected, result) @@ -317,12 +316,12 @@ def test_count_non_nulls(): def test_count_object(): df = DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3}) result = df.groupby("c").a.count() - expected = Series([3, 3], index=pd.Index([2, 3], name="c"), name="a") + expected = Series([3, 3], index=Index([2, 3], name="c"), name="a") tm.assert_series_equal(result, expected) df = DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3}) result = df.groupby("c").a.count() - expected = Series([1, 3], index=pd.Index([2, 3], name="c"), name="a") + expected = Series([1, 3], index=Index([2, 3], name="c"), name="a") tm.assert_series_equal(result, expected) @@ -354,7 +353,7 @@ def test_lower_int_prec_count(): ) result = df.groupby("grp").count() expected = DataFrame( - {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp") + {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=Index(list("ab"), name="grp") ) tm.assert_frame_equal(result, expected) @@ -374,5 +373,5 @@ def __eq__(self, other): df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)}) result = df.groupby("grp").count() - expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp")) + expected = DataFrame({"a": [2, 2]}, index=Index(list("ab"), name="grp")) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 4d6a1afe06e1c..4aefb73bf912c 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -614,12 +614,12 @@ def test_list_grouper_with_nat(self): # Grouper in a list grouping result = df.groupby([grouper]) - expected = {pd.Timestamp("2011-01-01"): Index(list(range(364)))} + expected = {Timestamp("2011-01-01"): Index(list(range(364)))} tm.assert_dict_equal(result.groups, expected) # Test case without a list result = df.groupby(grouper) - expected = {pd.Timestamp("2011-01-01"): 365} + expected = {Timestamp("2011-01-01"): 365} tm.assert_dict_equal(result.groups, expected) @pytest.mark.parametrize( @@ -938,12 +938,12 @@ def test_groupby_with_small_elem(self): grouped = df.groupby([pd.Grouper(freq="M"), "event"]) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 - assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups - assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups - res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + res = grouped.get_group((Timestamp("2014-09-30"), "start")) tm.assert_frame_equal(res, df.iloc[[0], :]) - res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) tm.assert_frame_equal(res, df.iloc[[1], :]) df = DataFrame( @@ -953,12 +953,12 @@ def test_groupby_with_small_elem(self): grouped = df.groupby([pd.Grouper(freq="M"), "event"]) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 - assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups - assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups - res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + res = grouped.get_group((Timestamp("2014-09-30"), "start")) tm.assert_frame_equal(res, df.iloc[[0, 2], :]) - res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) tm.assert_frame_equal(res, df.iloc[[1], :]) # length=3 @@ -969,15 +969,15 @@ def test_groupby_with_small_elem(self): grouped = df.groupby([pd.Grouper(freq="M"), "event"]) assert len(grouped.groups) == 3 assert grouped.ngroups == 3 - assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups - assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups - assert (pd.Timestamp("2014-08-31"), "start") in grouped.groups + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-08-31"), "start") in grouped.groups - res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + res = grouped.get_group((Timestamp("2014-09-30"), "start")) tm.assert_frame_equal(res, df.iloc[[0], :]) - res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) tm.assert_frame_equal(res, df.iloc[[1], :]) - res = grouped.get_group((pd.Timestamp("2014-08-31"), "start")) + res = grouped.get_group((Timestamp("2014-08-31"), "start")) tm.assert_frame_equal(res, df.iloc[[2], :]) def test_grouping_string_repr(self): diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py index 2c2147795bc07..580148cb2a3a3 100644 --- a/pandas/tests/groupby/test_missing.py +++ b/pandas/tests/groupby/test_missing.py @@ -11,11 +11,11 @@ def test_groupby_column_index_name_lost_fill_funcs(func): # GH: 29764 groupby loses index sometimes df = DataFrame( [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], - columns=pd.Index(["type", "a", "b"], name="idx"), + columns=Index(["type", "a", "b"], name="idx"), ) df_grouped = df.groupby(["type"])[["a", "b"]] result = getattr(df_grouped, func)().columns - expected = pd.Index(["a", "b"], name="idx") + expected = Index(["a", "b"], name="idx") tm.assert_index_equal(result, expected) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index e48f10ebacb79..bd6d33c59a48a 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -194,7 +194,7 @@ def test_quantile_missing_group_values_correct_results( df = DataFrame({"key": key, "val": val}) expected = DataFrame( - expected_val, index=pd.Index(expected_key, name="key"), columns=["val"] + expected_val, index=Index(expected_key, name="key"), columns=["val"] ) grp = df.groupby("key") @@ -223,7 +223,7 @@ def test_groupby_quantile_nullable_array(values, q): idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None]) true_quantiles = [0.0, 0.5, 1.0] else: - idx = pd.Index(["x", "y"], name="a") + idx = Index(["x", "y"], name="a") true_quantiles = [0.5] expected = pd.Series(true_quantiles * 2, index=idx, name="b") @@ -251,6 +251,6 @@ def test_groupby_timedelta_quantile(): pd.Timedelta("0 days 00:00:02.990000"), ] }, - index=pd.Index([1, 2], name="group"), + index=Index([1, 2], name="group"), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 612079447576f..c3282758a23f2 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -452,7 +452,7 @@ def test_groupby_groups_datetimeindex(self): result = df.groupby(level="date").groups dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"] expected = { - Timestamp(date): pd.DatetimeIndex([date], name="date") for date in dates + Timestamp(date): DatetimeIndex([date], name="date") for date in dates } tm.assert_dict_equal(result, expected) @@ -460,7 +460,7 @@ def test_groupby_groups_datetimeindex(self): for date in dates: result = grouped.get_group(date) data = [[df.loc[date, "A"], df.loc[date, "B"]]] - expected_index = pd.DatetimeIndex([date], name="date", freq="D") + expected_index = DatetimeIndex([date], name="date", freq="D") expected = DataFrame(data, columns=list("AB"), index=expected_index) tm.assert_frame_equal(result, expected) @@ -484,7 +484,7 @@ def test_groupby_groups_datetimeindex_tz(self): ) df["datetime"] = df["datetime"].apply(lambda d: Timestamp(d, tz="US/Pacific")) - exp_idx1 = pd.DatetimeIndex( + exp_idx1 = DatetimeIndex( [ "2011-07-19 07:00:00", "2011-07-19 07:00:00", @@ -508,13 +508,13 @@ def test_groupby_groups_datetimeindex_tz(self): tm.assert_frame_equal(result, expected) # by level - didx = pd.DatetimeIndex(dates, tz="Asia/Tokyo") + didx = DatetimeIndex(dates, tz="Asia/Tokyo") df = DataFrame( {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, index=didx, ) - exp_idx = pd.DatetimeIndex( + exp_idx = DatetimeIndex( ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], tz="Asia/Tokyo", ) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 1aeff7426c33a..d7426a5e3b42e 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1134,7 +1134,7 @@ def test_categorical_and_not_categorical_key(observed): # GH 32494 df_with_categorical = DataFrame( { - "A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]), + "A": Categorical(["a", "b", "a"], categories=["a", "b", "c"]), "B": [1, 2, 3], "C": ["a", "b", "a"], } diff --git a/pandas/tests/indexes/datetimes/test_shift.py b/pandas/tests/indexes/datetimes/test_shift.py index a2a673ed5d9e0..3c202005f7933 100644 --- a/pandas/tests/indexes/datetimes/test_shift.py +++ b/pandas/tests/indexes/datetimes/test_shift.py @@ -20,25 +20,25 @@ class TestDatetimeIndexShift: def test_dti_shift_tzaware(self, tz_naive_fixture): # GH#9903 tz = tz_naive_fixture - idx = pd.DatetimeIndex([], name="xxx", tz=tz) + idx = DatetimeIndex([], name="xxx", tz=tz) tm.assert_index_equal(idx.shift(0, freq="H"), idx) tm.assert_index_equal(idx.shift(3, freq="H"), idx) - idx = pd.DatetimeIndex( + idx = DatetimeIndex( ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], name="xxx", tz=tz, freq="H", ) tm.assert_index_equal(idx.shift(0, freq="H"), idx) - exp = pd.DatetimeIndex( + exp = DatetimeIndex( ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], name="xxx", tz=tz, freq="H", ) tm.assert_index_equal(idx.shift(3, freq="H"), exp) - exp = pd.DatetimeIndex( + exp = DatetimeIndex( ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], name="xxx", tz=tz, @@ -51,21 +51,21 @@ def test_dti_shift_freqs(self): # GH#8083 drange = pd.date_range("20130101", periods=5) result = drange.shift(1) - expected = pd.DatetimeIndex( + expected = DatetimeIndex( ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], freq="D", ) tm.assert_index_equal(result, expected) result = drange.shift(-1) - expected = pd.DatetimeIndex( + expected = DatetimeIndex( ["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"], freq="D", ) tm.assert_index_equal(result, expected) result = drange.shift(3, freq="2D") - expected = pd.DatetimeIndex( + expected = DatetimeIndex( ["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"], freq="D", ) @@ -84,7 +84,7 @@ def test_dti_shift_int(self): def test_dti_shift_no_freq(self): # GH#19147 - dti = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) + dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): dti.shift(2) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index 150a797169c14..b60ae8819023f 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import PeriodIndex +from pandas import PeriodIndex, Series import pandas._testing as tm @@ -154,7 +154,7 @@ def test_representation_to_series(self): [idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9], ): - result = repr(pd.Series(idx)) + result = repr(Series(idx)) assert result == expected def test_summary(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 045816b3c9513..0c990b0456b5c 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -273,7 +273,7 @@ def test_equals_numeric_other_index_type(self, other): def test_lookups_datetimelike_values(self, vals): # If we have datetime64 or timedelta64 values, make sure they are # wrappped correctly GH#31163 - ser = pd.Series(vals, index=range(3, 6)) + ser = Series(vals, index=range(3, 6)) ser.index = ser.index.astype("float64") expected = vals[1] @@ -642,7 +642,7 @@ def test_range_float_union_dtype(): def test_uint_index_does_not_convert_to_float64(box): # https://github.com/pandas-dev/pandas/issues/28279 # https://github.com/pandas-dev/pandas/issues/28023 - series = pd.Series( + series = Series( [0, 1, 2, 3, 4, 5], index=[ 7606741985629028552, diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 1dfc5b5305008..8a8e2abd17165 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -1,7 +1,7 @@ import pytest import pandas as pd -from pandas import TimedeltaIndex +from pandas import Series, TimedeltaIndex class TestTimedeltaIndexRendering: @@ -62,7 +62,7 @@ def test_representation_to_series(self): for idx, expected in zip( [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] ): - result = repr(pd.Series(idx)) + result = repr(Series(idx)) assert result == expected def test_summary(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 47b7bd0983305..3e5f9d481ce48 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -436,7 +436,7 @@ def test_frame_mixedtype_orient(self): # GH10289 def test_v12_compat(self, datapath): dti = pd.date_range("2000-01-03", "2000-01-07") # freq doesnt roundtrip - dti = pd.DatetimeIndex(np.asarray(dti), freq=None) + dti = DatetimeIndex(np.asarray(dti), freq=None) df = DataFrame( [ [1.56808523, 0.65727391, 1.81021139, -0.17251653], @@ -466,7 +466,7 @@ def test_v12_compat(self, datapath): def test_blocks_compat_GH9037(self): index = pd.date_range("20000101", periods=10, freq="H") # freq doesnt round-trip - index = pd.DatetimeIndex(list(index), freq=None) + index = DatetimeIndex(list(index), freq=None) df_mixed = DataFrame( dict( @@ -1189,7 +1189,7 @@ def test_tz_range_is_utc(self, tz_range): ) assert dumps(tz_range, iso_dates=True) == exp - dti = pd.DatetimeIndex(tz_range) + dti = DatetimeIndex(tz_range) assert dumps(dti, iso_dates=True) == exp df = DataFrame({"DT": dti}) result = dumps(df, iso_dates=True) diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 8d7d5d85cbb48..43a31ff1e4b58 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -205,8 +205,8 @@ def test_with_missing_lzma_runtime(): import sys import pytest sys.modules['lzma'] = None - import pandas - df = pandas.DataFrame() + import pandas as pd + df = pd.DataFrame() with pytest.raises(RuntimeError, match='lzma module'): df.to_csv('foo.csv', compression='xz') """ diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 0832724110203..50e7cf9bd8eda 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -5,7 +5,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, Timestamp import pandas._testing as tm from pandas.core.groupby.grouper import Grouper from pandas.core.indexes.datetimes import date_range @@ -306,21 +306,21 @@ def test_groupby_resample_interpolate(): expected_ind = pd.MultiIndex.from_tuples( [ (50, "2018-01-07"), - (50, pd.Timestamp("2018-01-08")), - (50, pd.Timestamp("2018-01-09")), - (50, pd.Timestamp("2018-01-10")), - (50, pd.Timestamp("2018-01-11")), - (50, pd.Timestamp("2018-01-12")), - (50, pd.Timestamp("2018-01-13")), - (50, pd.Timestamp("2018-01-14")), - (50, pd.Timestamp("2018-01-15")), - (50, pd.Timestamp("2018-01-16")), - (50, pd.Timestamp("2018-01-17")), - (50, pd.Timestamp("2018-01-18")), - (50, pd.Timestamp("2018-01-19")), - (50, pd.Timestamp("2018-01-20")), - (50, pd.Timestamp("2018-01-21")), - (60, pd.Timestamp("2018-01-14")), + (50, Timestamp("2018-01-08")), + (50, Timestamp("2018-01-09")), + (50, Timestamp("2018-01-10")), + (50, Timestamp("2018-01-11")), + (50, Timestamp("2018-01-12")), + (50, Timestamp("2018-01-13")), + (50, Timestamp("2018-01-14")), + (50, Timestamp("2018-01-15")), + (50, Timestamp("2018-01-16")), + (50, Timestamp("2018-01-17")), + (50, Timestamp("2018-01-18")), + (50, Timestamp("2018-01-19")), + (50, Timestamp("2018-01-20")), + (50, Timestamp("2018-01-21")), + (60, Timestamp("2018-01-14")), ], names=["volume", "week_starting"], ) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 895de2b748c34..613e7d423d87f 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -98,7 +98,7 @@ def test_examples2(self): pd.merge_asof(trades, quotes, on="time", by="ticker") pd.merge_asof( - trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms") + trades, quotes, on="time", by="ticker", tolerance=Timedelta("2ms") ) expected = pd.DataFrame( @@ -126,7 +126,7 @@ def test_examples2(self): quotes, on="time", by="ticker", - tolerance=pd.Timedelta("10ms"), + tolerance=Timedelta("10ms"), allow_exact_matches=False, ) tm.assert_frame_equal(result, expected) @@ -591,7 +591,7 @@ def test_non_sorted(self): @pytest.mark.parametrize( "tolerance", [Timedelta("1day"), datetime.timedelta(days=1)], - ids=["pd.Timedelta", "datetime.timedelta"], + ids=["Timedelta", "datetime.timedelta"], ) def test_tolerance(self, tolerance): @@ -652,7 +652,7 @@ def test_tolerance_tz(self): "value2": list("ABCDE"), } ) - result = pd.merge_asof(left, right, on="date", tolerance=pd.Timedelta("1 day")) + result = pd.merge_asof(left, right, on="date", tolerance=Timedelta("1 day")) expected = pd.DataFrame( { @@ -698,7 +698,7 @@ def test_index_tolerance(self): left_index=True, right_index=True, by="ticker", - tolerance=pd.Timedelta("1day"), + tolerance=Timedelta("1day"), ) tm.assert_frame_equal(result, expected) @@ -792,7 +792,7 @@ def test_allow_exact_matches_and_tolerance2(self): df2, on="time", allow_exact_matches=False, - tolerance=pd.Timedelta("10ms"), + tolerance=Timedelta("10ms"), ) expected = pd.DataFrame( { @@ -827,7 +827,7 @@ def test_allow_exact_matches_and_tolerance3(self): df2, on="time", allow_exact_matches=False, - tolerance=pd.Timedelta("10ms"), + tolerance=Timedelta("10ms"), ) expected = pd.DataFrame( { @@ -1342,9 +1342,9 @@ def test_merge_index_column_tz(self): def test_left_index_right_index_tolerance(self): # https://github.com/pandas-dev/pandas/issues/35558 - dr1 = pd.date_range( - start="1/1/2020", end="1/20/2020", freq="2D" - ) + pd.Timedelta(seconds=0.4) + dr1 = pd.date_range(start="1/1/2020", end="1/20/2020", freq="2D") + Timedelta( + seconds=0.4 + ) dr2 = pd.date_range(start="1/1/2020", end="2/1/2020") df1 = pd.DataFrame({"val1": "foo"}, index=pd.DatetimeIndex(dr1)) @@ -1358,6 +1358,6 @@ def test_left_index_right_index_tolerance(self): df2, left_index=True, right_index=True, - tolerance=pd.Timedelta(seconds=0.5), + tolerance=Timedelta(seconds=0.5), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 71bcce12796f5..7b794668803c3 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -188,7 +188,7 @@ def test_getitem_slice_date(self, slc, positions): class TestSeriesGetitemListLike: - @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) + @pytest.mark.parametrize("box", [list, np.array, Index, pd.Series]) def test_getitem_no_matches(self, box): # GH#33462 we expect the same behavior for list/ndarray/Index/Series ser = Series(["A", "B"]) @@ -212,7 +212,7 @@ def test_getitem_intlist_intindex_periodvalues(self): tm.assert_series_equal(result, exp) assert result.dtype == "Period[D]" - @pytest.mark.parametrize("box", [list, np.array, pd.Index]) + @pytest.mark.parametrize("box", [list, np.array, Index]) def test_getitem_intlist_intervalindex_non_int(self, box): # GH#33404 fall back to positional since ints are unambiguous dti = date_range("2000-01-03", periods=3)._with_freq(None) @@ -224,11 +224,11 @@ def test_getitem_intlist_intervalindex_non_int(self, box): result = ser[key] tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("box", [list, np.array, pd.Index]) + @pytest.mark.parametrize("box", [list, np.array, Index]) @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64]) def test_getitem_intlist_multiindex_numeric_level(self, dtype, box): # GH#33404 do _not_ fall back to positional since ints are ambiguous - idx = pd.Index(range(4)).astype(dtype) + idx = Index(range(4)).astype(dtype) dti = date_range("2000-01-03", periods=3) mi = pd.MultiIndex.from_product([idx, dti]) ser = Series(range(len(mi))[::-1], index=mi) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 88087110fc221..1f2adaafbbccd 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -487,7 +487,7 @@ def test_categorical_assigning_ops(): def test_getitem_categorical_str(): # GH#31765 - ser = Series(range(5), index=pd.Categorical(["a", "b", "c", "a", "b"])) + ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"])) result = ser["a"] expected = ser.iloc[[0, 3]] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 7325505ce233b..75e7f8a17eda3 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -251,7 +251,7 @@ class County: def __repr__(self) -> str: return self.name + ", " + self.state - cat = pd.Categorical([County() for _ in range(61)]) + cat = Categorical([County() for _ in range(61)]) idx = Index(cat) ser = idx.to_series() diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 183d2814920e4..21c7477918d02 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -90,14 +90,14 @@ def test_empty_df_expanding(expander): def test_missing_minp_zero(): # https://github.com/pandas-dev/pandas/pull/18921 # minp=0 - x = pd.Series([np.nan]) + x = Series([np.nan]) result = x.expanding(min_periods=0).sum() - expected = pd.Series([0.0]) + expected = Series([0.0]) tm.assert_series_equal(result, expected) # minp=1 result = x.expanding(min_periods=1).sum() - expected = pd.Series([np.nan]) + expected = Series([np.nan]) tm.assert_series_equal(result, expected) @@ -252,6 +252,6 @@ def test_expanding_sem(constructor): obj = getattr(pd, constructor)([0, 1, 2]) result = obj.expanding().sem() if isinstance(result, DataFrame): - result = pd.Series(result[0].values) - expected = pd.Series([np.nan] + [0.707107] * 2) + result = Series(result[0].values) + expected = Series([np.nan] + [0.707107] * 2) tm.assert_series_equal(result, expected) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py new file mode 100644 index 0000000000000..4b4515cdf7e11 --- /dev/null +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -0,0 +1,64 @@ +""" +Check that test suite file doesn't use the pandas namespace inconsistently. + +We check for cases of ``Series`` and ``pd.Series`` appearing in the same file +(likewise for some other common classes). + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run inconsistent-namespace-usage --all-files +""" + +import argparse +from pathlib import Path +import re +from typing import Optional, Sequence + +PATTERN = r""" + ( + (? None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*", type=Path) + args = parser.parse_args(argv) + + for class_name in CLASS_NAMES: + pattern = re.compile( + PATTERN.format(class_name=class_name).encode(), + flags=re.MULTILINE | re.DOTALL | re.VERBOSE, + ) + for path in args.paths: + contents = path.read_bytes() + match = pattern.search(contents) + assert match is None, ERROR_MESSAGE.format( + class_name=class_name, path=str(path) + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py new file mode 100644 index 0000000000000..37e6d288d9341 --- /dev/null +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -0,0 +1,28 @@ +from pathlib import Path + +import pytest + +from scripts.check_for_inconsistent_pandas_namespace import main + +BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" +BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" +GOOD_FILE_0 = "cat_0 = Categorical()\ncat_1 = Categorical()" +GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()" + + +@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) +def test_inconsistent_usage(tmpdir, content): + tmpfile = Path(tmpdir / "tmpfile.py") + tmpfile.touch() + tmpfile.write_text(content) + msg = fr"Found both `pd\.Categorical` and `Categorical` in {str(tmpfile)}" + with pytest.raises(AssertionError, match=msg): + main((str(tmpfile),)) + + +@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) +def test_consistent_usage(tmpdir, content): + tmpfile = Path(tmpdir / "tmpfile.py") + tmpfile.touch() + tmpfile.write_text(content) + main((str(tmpfile),)) # Should not raise. From 51dd1d68a1b43391b589ff4ed1964a46a48d3d45 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Nov 2020 16:37:00 -0800 Subject: [PATCH 088/147] REF: simplify NDFrame.replace, ObjectBlock.replace (#37704) --- pandas/core/generic.py | 34 ++++++------- pandas/core/internals/blocks.py | 48 +++++++------------ .../tests/arrays/categorical/test_replace.py | 3 +- 3 files changed, 37 insertions(+), 48 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bea650c1b50fd..02fa7308e7ee8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6744,25 +6744,25 @@ def replace( else: raise TypeError("value argument must be scalar, dict, or Series") - elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] - if is_list_like(value): - if len(to_replace) != len(value): - raise ValueError( - f"Replacement lists must match in length. " - f"Expecting {len(to_replace)} got {len(value)} " - ) - self._consolidate_inplace() - new_data = self._mgr.replace_list( - src_list=to_replace, - dest_list=value, - inplace=inplace, - regex=regex, + elif is_list_like(to_replace): + if not is_list_like(value): + # e.g. to_replace = [NA, ''] and value is 0, + # so we replace NA with 0 and then replace '' with 0 + value = [value] * len(to_replace) + + # e.g. we have to_replace = [NA, ''] and value = [0, 'missing'] + if len(to_replace) != len(value): + raise ValueError( + f"Replacement lists must match in length. " + f"Expecting {len(to_replace)} got {len(value)} " ) + new_data = self._mgr.replace_list( + src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex, + ) - else: # [NA, ''] -> 0 - new_data = self._mgr.replace( - to_replace=to_replace, value=value, inplace=inplace, regex=regex - ) elif to_replace is None: if not ( is_re_compilable(regex) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8e01aaa396265..9e6480dd709f0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2502,39 +2502,14 @@ def replace( inplace: bool = False, regex: bool = False, ) -> List["Block"]: - to_rep_is_list = is_list_like(to_replace) - value_is_list = is_list_like(value) - both_lists = to_rep_is_list and value_is_list - either_list = to_rep_is_list or value_is_list + # Note: the checks we do in NDFrame.replace ensure we never get + # here with listlike to_replace or value, as those cases + # go through _replace_list - result_blocks: List["Block"] = [] - blocks: List["Block"] = [self] - - if not either_list and is_re(to_replace): + if is_re(to_replace) or regex: return self._replace_single(to_replace, value, inplace=inplace, regex=True) - elif not (either_list or regex): + else: return super().replace(to_replace, value, inplace=inplace, regex=regex) - elif both_lists: - for to_rep, v in zip(to_replace, value): - result_blocks = [] - for b in blocks: - result = b._replace_single(to_rep, v, inplace=inplace, regex=regex) - result_blocks.extend(result) - blocks = result_blocks - return result_blocks - - elif to_rep_is_list and regex: - for to_rep in to_replace: - result_blocks = [] - for b in blocks: - result = b._replace_single( - to_rep, value, inplace=inplace, regex=regex - ) - result_blocks.extend(result) - blocks = result_blocks - return result_blocks - - return self._replace_single(to_replace, value, inplace=inplace, regex=regex) def _replace_single( self, @@ -2627,6 +2602,19 @@ def re_replacer(s): class CategoricalBlock(ExtensionBlock): __slots__ = () + def _replace_list( + self, + src_list: List[Any], + dest_list: List[Any], + inplace: bool = False, + regex: bool = False, + ) -> List["Block"]: + if len(algos.unique(dest_list)) == 1: + # We got likely here by tiling value inside NDFrame.replace, + # so un-tile here + return self.replace(src_list, dest_list[0], inplace, regex) + return super()._replace_list(src_list, dest_list, inplace, regex) + def replace( self, to_replace, diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py index 5889195ad68db..007c4bdea17f8 100644 --- a/pandas/tests/arrays/categorical/test_replace.py +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -21,6 +21,7 @@ ((1, 2, 4), 5, [5, 5, 3], False), ((5, 6), 2, [1, 2, 3], False), # many-to-many, handled outside of Categorical and results in separate dtype + # except for cases with only 1 unique entry in `value` ([1], [2], [2, 2, 3], True), ([1, 4], [5, 2], [5, 2, 3], True), # check_categorical sorts categories, which crashes on mixed dtypes @@ -30,7 +31,7 @@ ) def test_replace(to_replace, value, expected, flip_categories): # GH 31720 - stays_categorical = not isinstance(value, list) + stays_categorical = not isinstance(value, list) or len(pd.unique(value)) == 1 s = pd.Series([1, 2, 3], dtype="category") result = s.replace(to_replace, value) From 1f425dcf49172dee21ab0bbfc55aad2388a3d9bc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Nov 2020 16:37:37 -0800 Subject: [PATCH 089/147] REF: implement Categorical.encode_with_my_categories (#37650) * REF: implement Categorical.encode_with_my_categories * privatize --- pandas/core/arrays/categorical.py | 30 +++++++++++++++++++++++------- pandas/core/dtypes/concat.py | 2 +- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 3e2da3e95f396..87a049c77dc32 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1693,9 +1693,8 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray: # Indexing on codes is more efficient if categories are the same, # so we can apply some optimizations based on the degree of # dtype-matching. - codes = recode_for_categories( - target.codes, target.categories, self.categories, copy=False - ) + cat = self._encode_with_my_categories(target) + codes = cat._codes else: codes = self.categories.get_indexer(target) @@ -1867,8 +1866,8 @@ def _validate_setitem_value(self, value): "without identical categories" ) # is_dtype_equal implies categories_match_up_to_permutation - new_codes = self._validate_listlike(value) - value = Categorical.from_codes(new_codes, dtype=self.dtype) + value = self._encode_with_my_categories(value) + return value._codes # wrap scalars and hashable-listlikes in list rvalue = value if not is_hashable(value) else [value] @@ -2100,8 +2099,8 @@ def equals(self, other: object) -> bool: if not isinstance(other, Categorical): return False elif self._categories_match_up_to_permutation(other): - other_codes = self._validate_listlike(other) - return np.array_equal(self._codes, other_codes) + other = self._encode_with_my_categories(other) + return np.array_equal(self._codes, other._codes) return False @classmethod @@ -2112,6 +2111,23 @@ def _concat_same_type(self, to_concat): # ------------------------------------------------------------------ + def _encode_with_my_categories(self, other: "Categorical") -> "Categorical": + """ + Re-encode another categorical using this Categorical's categories. + + Notes + ----- + This assumes we have already checked + self._categories_match_up_to_permutation(other). + """ + # Indexing on codes is more efficient if categories are the same, + # so we can apply some optimizations based on the degree of + # dtype-matching. + codes = recode_for_categories( + other.codes, other.categories, self.categories, copy=False + ) + return self._from_backing_data(codes) + def _categories_match_up_to_permutation(self, other: "Categorical") -> bool: """ Returns True if categoricals are the same dtype diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 99dc01ef421d1..a38d9cbad0d64 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -301,7 +301,7 @@ def _maybe_unwrap(x): categories = first.categories ordered = first.ordered - all_codes = [first._validate_listlike(x) for x in to_union] + all_codes = [first._encode_with_my_categories(x)._codes for x in to_union] new_codes = np.concatenate(all_codes) if sort_categories and not ignore_order and ordered: From 6880e4332922190cf71a36c971310150bc4ebfce Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Nov 2020 16:38:07 -0800 Subject: [PATCH 090/147] BUG: unpickling modifies Block.ndim (#37657) --- doc/source/whatsnew/v1.1.5.rst | 1 + pandas/core/internals/managers.py | 9 ++++++--- pandas/tests/io/test_pickle.py | 12 ++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index a122154904996..e0fa68e3b9f80 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -24,6 +24,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`) +- Bug in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fda4da8694ea3..767c653f8a404 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -278,14 +278,17 @@ def __getstate__(self): return axes_array, block_values, block_items, extra_state def __setstate__(self, state): - def unpickle_block(values, mgr_locs): - return make_block(values, placement=mgr_locs) + def unpickle_block(values, mgr_locs, ndim: int): + # TODO(EA2D): ndim would be unnecessary with 2D EAs + return make_block(values, placement=mgr_locs, ndim=ndim) if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: state = state[3]["0.14.1"] self.axes = [ensure_index(ax) for ax in state["axes"]] + ndim = len(self.axes) self.blocks = tuple( - unpickle_block(b["values"], b["mgr_locs"]) for b in state["blocks"] + unpickle_block(b["values"], b["mgr_locs"], ndim=ndim) + for b in state["blocks"] ) else: raise NotImplementedError("pre-0.14.1 pickles are no longer supported") diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 925f6b5f125c7..34b36e2549b62 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -576,3 +576,15 @@ def test_pickle_datetimes(datetime_series): def test_pickle_strings(string_series): unp_series = tm.round_trip_pickle(string_series) tm.assert_series_equal(unp_series, string_series) + + +def test_pickle_preserves_block_ndim(): + # GH#37631 + ser = Series(list("abc")).astype("category").iloc[[0]] + res = tm.round_trip_pickle(ser) + + assert res._mgr.blocks[0].ndim == 1 + assert res._mgr.blocks[0].shape == (1,) + + # GH#37631 OP issue was about indexing, underlying problem was pickle + tm.assert_series_equal(res[[True]], ser) From b900041646eb5064ce9d7a3e31a97a7523bd5e73 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Nov 2020 16:38:23 -0800 Subject: [PATCH 091/147] REF: dont support dt64tz in nanmean (#37658) --- pandas/core/nanops.py | 10 +++------- pandas/tests/test_nanops.py | 5 ++--- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6dc05c23c026f..cfb02e5b1e987 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -8,7 +8,7 @@ from pandas._config import get_option -from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib +from pandas._libs import NaT, Timedelta, iNaT, lib from pandas._typing import ArrayLike, Dtype, DtypeObj, F, Scalar from pandas.compat._optional import import_optional_dependency @@ -330,7 +330,7 @@ def _na_ok_dtype(dtype: DtypeObj) -> bool: return not issubclass(dtype.type, np.integer) -def _wrap_results(result, dtype: DtypeObj, fill_value=None): +def _wrap_results(result, dtype: np.dtype, fill_value=None): """ wrap our results if needed """ if result is NaT: pass @@ -340,15 +340,11 @@ def _wrap_results(result, dtype: DtypeObj, fill_value=None): # GH#24293 fill_value = iNaT if not isinstance(result, np.ndarray): - tz = getattr(dtype, "tz", None) assert not isna(fill_value), "Expected non-null fill_value" if result == fill_value: result = np.nan - if tz is not None: - # we get here e.g. via nanmean when we call it on a DTA[tz] - result = Timestamp(result, tz=tz) - elif isna(result): + if isna(result): result = np.datetime64("NaT", "ns") else: result = np.int64(result).view("datetime64[ns]") diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 458fba2e13b0f..359a7eecf6f7b 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -988,11 +988,10 @@ def prng(self): class TestDatetime64NaNOps: - @pytest.mark.parametrize("tz", [None, "UTC"]) # Enabling mean changes the behavior of DataFrame.mean # See https://github.com/pandas-dev/pandas/issues/24752 - def test_nanmean(self, tz): - dti = pd.date_range("2016-01-01", periods=3, tz=tz) + def test_nanmean(self): + dti = pd.date_range("2016-01-01", periods=3) expected = dti[1] for obj in [dti, DatetimeArray(dti), Series(dti)]: From c03a0156d2dded654aaaa1a82e420f457bd7d0e9 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Sun, 8 Nov 2020 19:39:04 -0500 Subject: [PATCH 092/147] CLN: Simplify groupby head/tail tests (#37702) --- pandas/tests/groupby/test_nth.py | 69 +++++++++++--------------------- 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index df1d7819a1894..10394ea997775 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -487,53 +487,30 @@ def test_nth_multi_index_as_expected(): tm.assert_frame_equal(result, expected) -def test_groupby_head_tail(): +@pytest.mark.parametrize( + "op, n, expected_rows", + [ + ("head", -1, []), + ("head", 0, []), + ("head", 1, [0, 2]), + ("head", 7, [0, 1, 2]), + ("tail", -1, []), + ("tail", 0, []), + ("tail", 1, [1, 2]), + ("tail", 7, [0, 1, 2]), + ], +) +@pytest.mark.parametrize("columns", [None, [], ["A"], ["B"], ["A", "B"]]) +@pytest.mark.parametrize("as_index", [True, False]) +def test_groupby_head_tail(op, n, expected_rows, columns, as_index): df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) - g_as = df.groupby("A", as_index=True) - g_not_as = df.groupby("A", as_index=False) - - # as_index= False, much easier - tm.assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) - tm.assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) - - empty_not_as = DataFrame(columns=df.columns, index=Index([], dtype=df.index.dtype)) - empty_not_as["A"] = empty_not_as["A"].astype(df.A.dtype) - empty_not_as["B"] = empty_not_as["B"].astype(df.B.dtype) - tm.assert_frame_equal(empty_not_as, g_not_as.head(0)) - tm.assert_frame_equal(empty_not_as, g_not_as.tail(0)) - tm.assert_frame_equal(empty_not_as, g_not_as.head(-1)) - tm.assert_frame_equal(empty_not_as, g_not_as.tail(-1)) - - tm.assert_frame_equal(df, g_not_as.head(7)) # contains all - tm.assert_frame_equal(df, g_not_as.tail(7)) - - # as_index=True, (used to be different) - df_as = df - - tm.assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1)) - tm.assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1)) - - empty_as = DataFrame(index=df_as.index[:0], columns=df.columns) - empty_as["A"] = empty_not_as["A"].astype(df.A.dtype) - empty_as["B"] = empty_not_as["B"].astype(df.B.dtype) - tm.assert_frame_equal(empty_as, g_as.head(0)) - tm.assert_frame_equal(empty_as, g_as.tail(0)) - tm.assert_frame_equal(empty_as, g_as.head(-1)) - tm.assert_frame_equal(empty_as, g_as.tail(-1)) - - tm.assert_frame_equal(df_as, g_as.head(7)) # contains all - tm.assert_frame_equal(df_as, g_as.tail(7)) - - # test with selection - tm.assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []]) - tm.assert_frame_equal(g_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) - tm.assert_frame_equal(g_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) - tm.assert_frame_equal(g_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) - - tm.assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []]) - tm.assert_frame_equal(g_not_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) - tm.assert_frame_equal(g_not_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) - tm.assert_frame_equal(g_not_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) + g = df.groupby("A", as_index=as_index) + expected = df.iloc[expected_rows] + if columns is not None: + g = g[columns] + expected = expected[columns] + result = getattr(g, op)(n) + tm.assert_frame_equal(result, expected) def test_group_selection_cache(): From 04cb4b31b5cf5816fcef226476cee64975d21cf3 Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Mon, 9 Nov 2020 03:52:06 +0100 Subject: [PATCH 093/147] Bug in loc raised for numeric label even when label is in Index (#37675) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/base.py | 9 ++------- pandas/tests/indexing/test_loc.py | 9 +++++++++ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d07db18ee5df0..d4d28dde52d58 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -466,6 +466,7 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`) - Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`) - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) +- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 545d1d834fe2d..fd402ef27a3a9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5200,13 +5200,8 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind): # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and # datetimelike Indexes - # reject them - if is_float(label): - self._invalid_indexer("slice", label) - - # we are trying to find integer bounds on a non-integer based index - # this is rejected (generally .loc gets you here) - elif is_integer(label): + # reject them, if index does not contain label + if (is_float(label) or is_integer(label)) and label not in self.values: self._invalid_indexer("slice", label) return label diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 952368ee0ffa9..26c9e127bcc10 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1868,3 +1868,12 @@ def test_loc_setitem_dt64tz_values(self): s2["a"] = expected result = s2["a"] assert result == expected + + +@pytest.mark.parametrize("value", [1, 1.5]) +def test_loc_int_in_object_index(frame_or_series, value): + # GH: 26491 + obj = frame_or_series(range(4), index=[value, "first", 2, "third"]) + result = obj.loc[value:"third"] + expected = frame_or_series(range(4), index=[value, "first", 2, "third"]) + tm.assert_equal(result, expected) From 3e376c0df0564080b3127fb19b6a7e9d8e0fa66c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 8 Nov 2020 19:30:11 -0800 Subject: [PATCH 094/147] REF: implement replace_regex, remove unreachable branch in ObjectBlock.replace (#37696) --- pandas/core/array_algos/replace.py | 46 +++++++++++++++++++++++++++++- pandas/core/internals/blocks.py | 29 ++----------------- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 9eaa265adab2b..76d723beac7e6 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -3,7 +3,7 @@ """ import operator import re -from typing import Pattern, Union +from typing import Optional, Pattern, Union import numpy as np @@ -12,8 +12,10 @@ from pandas.core.dtypes.common import ( is_datetimelike_v_numeric, is_numeric_v_string_like, + is_re, is_scalar, ) +from pandas.core.dtypes.missing import isna def compare_or_regex_search( @@ -87,3 +89,45 @@ def _check_comparison_types( _check_comparison_types(result, a, b) return result + + +def replace_regex(values: ArrayLike, rx: re.Pattern, value, mask: Optional[np.ndarray]): + """ + Parameters + ---------- + values : ArrayLike + Object dtype. + rx : re.Pattern + value : Any + mask : np.ndarray[bool], optional + + Notes + ----- + Alters values in-place. + """ + + # deal with replacing values with objects (strings) that match but + # whose replacement is not a string (numeric, nan, object) + if isna(value) or not isinstance(value, str): + + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return value if rx.search(s) is not None else s + else: + return s + + else: + # value is guaranteed to be a string here, s can be either a string + # or null if it's null it gets returned + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return rx.sub(value, s) + else: + return s + + f = np.vectorize(re_replacer, otypes=[values.dtype]) + + if mask is None: + values[:] = f(values) + else: + values[mask] = f(values[mask]) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9e6480dd709f0..fd23b89365496 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -60,7 +60,7 @@ from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat import pandas.core.algorithms as algos -from pandas.core.array_algos.replace import compare_or_regex_search +from pandas.core.array_algos.replace import compare_or_regex_search, replace_regex from pandas.core.array_algos.transforms import shift from pandas.core.arrays import ( Categorical, @@ -2563,32 +2563,7 @@ def _replace_single( return super().replace(to_replace, value, inplace=inplace, regex=regex) new_values = self.values if inplace else self.values.copy() - - # deal with replacing values with objects (strings) that match but - # whose replacement is not a string (numeric, nan, object) - if isna(value) or not isinstance(value, str): - - def re_replacer(s): - if is_re(rx) and isinstance(s, str): - return value if rx.search(s) is not None else s - else: - return s - - else: - # value is guaranteed to be a string here, s can be either a string - # or null if it's null it gets returned - def re_replacer(s): - if is_re(rx) and isinstance(s, str): - return rx.sub(value, s) - else: - return s - - f = np.vectorize(re_replacer, otypes=[self.dtype]) - - if mask is None: - new_values[:] = f(new_values) - else: - new_values[mask] = f(new_values[mask]) + replace_regex(new_values, rx, value, mask) # convert block = self.make_block(new_values) From 92372eff60f77b1580611e58b93da691881791e8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 9 Nov 2020 12:51:08 +0000 Subject: [PATCH 095/147] TYP: Check untyped defs (except vendored) (#37556) --- pandas/_testing.py | 68 +++++++-- pandas/core/apply.py | 6 +- pandas/core/arrays/datetimelike.py | 12 +- pandas/core/arrays/string_.py | 5 +- pandas/core/base.py | 86 ++++++++--- pandas/core/computation/expr.py | 13 +- pandas/core/computation/ops.py | 7 +- pandas/core/computation/scope.py | 43 +++++- pandas/core/frame.py | 7 +- pandas/core/generic.py | 101 +++++++++---- pandas/core/groupby/base.py | 24 +++- pandas/core/groupby/grouper.py | 13 +- pandas/core/indexes/category.py | 4 +- pandas/core/indexes/datetimelike.py | 12 +- pandas/core/indexes/datetimes.py | 4 +- pandas/core/indexes/extension.py | 4 +- pandas/core/indexes/multi.py | 11 +- pandas/core/resample.py | 42 ++++-- pandas/core/reshape/merge.py | 5 +- pandas/io/excel/_base.py | 5 +- pandas/io/formats/console.py | 8 +- pandas/io/formats/excel.py | 25 +++- pandas/io/formats/format.py | 11 +- pandas/io/parsers.py | 214 +++++++++++++++++++++++----- pandas/plotting/_matplotlib/core.py | 27 +++- pandas/plotting/_misc.py | 4 +- setup.cfg | 87 +---------- 27 files changed, 616 insertions(+), 232 deletions(-) diff --git a/pandas/_testing.py b/pandas/_testing.py index ded2ed3141b47..5dcd1247e52ba 100644 --- a/pandas/_testing.py +++ b/pandas/_testing.py @@ -117,14 +117,24 @@ def set_testing_mode(): # set the testing mode filters testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") if "deprecate" in testing_mode: - warnings.simplefilter("always", _testing_mode_warnings) + # pandas\_testing.py:119: error: Argument 2 to "simplefilter" has + # incompatible type "Tuple[Type[DeprecationWarning], + # Type[ResourceWarning]]"; expected "Type[Warning]" + warnings.simplefilter( + "always", _testing_mode_warnings # type: ignore[arg-type] + ) def reset_testing_mode(): # reset the testing mode filters testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") if "deprecate" in testing_mode: - warnings.simplefilter("ignore", _testing_mode_warnings) + # pandas\_testing.py:126: error: Argument 2 to "simplefilter" has + # incompatible type "Tuple[Type[DeprecationWarning], + # Type[ResourceWarning]]"; expected "Type[Warning]" + warnings.simplefilter( + "ignore", _testing_mode_warnings # type: ignore[arg-type] + ) set_testing_mode() @@ -241,16 +251,22 @@ def decompress_file(path, compression): if compression is None: f = open(path, "rb") elif compression == "gzip": - f = gzip.open(path, "rb") + # pandas\_testing.py:243: error: Incompatible types in assignment + # (expression has type "IO[Any]", variable has type "BinaryIO") + f = gzip.open(path, "rb") # type: ignore[assignment] elif compression == "bz2": - f = bz2.BZ2File(path, "rb") + # pandas\_testing.py:245: error: Incompatible types in assignment + # (expression has type "BZ2File", variable has type "BinaryIO") + f = bz2.BZ2File(path, "rb") # type: ignore[assignment] elif compression == "xz": f = get_lzma_file(lzma)(path, "rb") elif compression == "zip": zip_file = zipfile.ZipFile(path) zip_names = zip_file.namelist() if len(zip_names) == 1: - f = zip_file.open(zip_names.pop()) + # pandas\_testing.py:252: error: Incompatible types in assignment + # (expression has type "IO[bytes]", variable has type "BinaryIO") + f = zip_file.open(zip_names.pop()) # type: ignore[assignment] else: raise ValueError(f"ZIP file {path} error. Only one file per ZIP.") else: @@ -286,9 +302,15 @@ def write_to_compressed(compression, path, data, dest="test"): if compression == "zip": compress_method = zipfile.ZipFile elif compression == "gzip": - compress_method = gzip.GzipFile + # pandas\_testing.py:288: error: Incompatible types in assignment + # (expression has type "Type[GzipFile]", variable has type + # "Type[ZipFile]") + compress_method = gzip.GzipFile # type: ignore[assignment] elif compression == "bz2": - compress_method = bz2.BZ2File + # pandas\_testing.py:290: error: Incompatible types in assignment + # (expression has type "Type[BZ2File]", variable has type + # "Type[ZipFile]") + compress_method = bz2.BZ2File # type: ignore[assignment] elif compression == "xz": compress_method = get_lzma_file(lzma) else: @@ -300,7 +322,10 @@ def write_to_compressed(compression, path, data, dest="test"): method = "writestr" else: mode = "wb" - args = (data,) + # pandas\_testing.py:302: error: Incompatible types in assignment + # (expression has type "Tuple[Any]", variable has type "Tuple[Any, + # Any]") + args = (data,) # type: ignore[assignment] method = "write" with compress_method(path, mode=mode) as f: @@ -1996,7 +2021,8 @@ def all_timeseries_index_generator(k=10): """ make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex] for make_index_func in make_index_funcs: - yield make_index_func(k=k) + # pandas\_testing.py:1986: error: Cannot call function of unknown type + yield make_index_func(k=k) # type: ignore[operator] # make series @@ -2130,7 +2156,8 @@ def makeCustomIndex( p=makePeriodIndex, ).get(idx_type) if idx_func: - idx = idx_func(nentries) + # pandas\_testing.py:2120: error: Cannot call function of unknown type + idx = idx_func(nentries) # type: ignore[operator] # but we need to fill in the name if names: idx.name = names[0] @@ -2158,7 +2185,8 @@ def keyfunc(x): # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 - cnt = Counter() + # pandas\_testing.py:2148: error: Need type annotation for 'cnt' + cnt = Counter() # type: ignore[var-annotated] for j in range(div_factor): label = f"{prefix}_l{i}_g{j}" cnt[label] = ndupe_l[i] @@ -2316,7 +2344,14 @@ def _gen_unique_rand(rng, _extra_size): def makeMissingDataframe(density=0.9, random_state=None): df = makeDataFrame() - i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) + # pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple + # values for keyword argument "density" [misc] + + # pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple + # values for keyword argument "random_state" [misc] + i, j = _create_missing_idx( # type: ignore[misc] + *df.shape, density=density, random_state=random_state + ) df.values[i, j] = np.nan return df @@ -2341,7 +2376,10 @@ def dec(f): is_decorating = not kwargs and len(args) == 1 and callable(args[0]) if is_decorating: f = args[0] - args = [] + # pandas\_testing.py:2331: error: Incompatible types in assignment + # (expression has type "List[]", variable has type + # "Tuple[Any, ...]") + args = [] # type: ignore[assignment] return dec(f) else: return dec @@ -2534,7 +2572,9 @@ def wrapper(*args, **kwargs): except Exception as err: errno = getattr(err, "errno", None) if not errno and hasattr(errno, "reason"): - errno = getattr(err.reason, "errno", None) + # pandas\_testing.py:2521: error: "Exception" has no attribute + # "reason" + errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined] if errno in skip_errnos: skip(f"Skipping test due to known errno and error {err}") diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a14debce6eea7..fa4fbe711fbe4 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -141,7 +141,11 @@ def get_result(self): """ compute the results """ # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): - return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds) + # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets + # multiple values for keyword argument "axis" + return self.obj.aggregate( # type: ignore[misc] + self.f, axis=self.axis, *self.args, **self.kwds + ) # all empty if len(self.columns) == 0 and len(self.index) == 0: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8d90035491d28..f2f843886e802 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -431,7 +431,9 @@ def _validate_comparison_value(self, other): raise InvalidComparison(other) if isinstance(other, self._recognized_scalars) or other is NaT: - other = self._scalar_type(other) + # pandas\core\arrays\datetimelike.py:432: error: Too many arguments + # for "object" [call-arg] + other = self._scalar_type(other) # type: ignore[call-arg] try: self._check_compatible_with(other) except TypeError as err: @@ -491,14 +493,18 @@ def _validate_shift_value(self, fill_value): if is_valid_nat_for_dtype(fill_value, self.dtype): fill_value = NaT elif isinstance(fill_value, self._recognized_scalars): - fill_value = self._scalar_type(fill_value) + # pandas\core\arrays\datetimelike.py:746: error: Too many arguments + # for "object" [call-arg] + fill_value = self._scalar_type(fill_value) # type: ignore[call-arg] else: # only warn if we're not going to raise if self._scalar_type is Period and lib.is_integer(fill_value): # kludge for #31971 since Period(integer) tries to cast to str new_fill = Period._from_ordinal(fill_value, freq=self.freq) else: - new_fill = self._scalar_type(fill_value) + # pandas\core\arrays\datetimelike.py:753: error: Too many + # arguments for "object" [call-arg] + new_fill = self._scalar_type(fill_value) # type: ignore[call-arg] # stacklevel here is chosen to be correct when called from # DataFrame.shift or Series.shift diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index c73855f281bcc..3b297e7c2b13b 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -186,7 +186,10 @@ def __init__(self, values, copy=False): values = extract_array(values) super().__init__(values, copy=copy) - self._dtype = StringDtype() + # pandas\core\arrays\string_.py:188: error: Incompatible types in + # assignment (expression has type "StringDtype", variable has type + # "PandasDtype") [assignment] + self._dtype = StringDtype() # type: ignore[assignment] if not isinstance(values, type(self)): self._validate() diff --git a/pandas/core/base.py b/pandas/core/base.py index b979298fa53f6..0f6c369f5e19b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -95,7 +95,9 @@ def __sizeof__(self): either a value or Series of values """ if hasattr(self, "memory_usage"): - mem = self.memory_usage(deep=True) + # pandas\core\base.py:84: error: "PandasObject" has no attribute + # "memory_usage" [attr-defined] + mem = self.memory_usage(deep=True) # type: ignore[attr-defined] return int(mem if is_scalar(mem) else mem.sum()) # no memory_usage attribute, so fall back to object's 'sizeof' @@ -204,10 +206,18 @@ def _selection_list(self): @cache_readonly def _selected_obj(self): - if self._selection is None or isinstance(self.obj, ABCSeries): - return self.obj + # pandas\core\base.py:195: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] + if self._selection is None or isinstance( + self.obj, ABCSeries # type: ignore[attr-defined] + ): + # pandas\core\base.py:194: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] + return self.obj # type: ignore[attr-defined] else: - return self.obj[self._selection] + # pandas\core\base.py:204: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] + return self.obj[self._selection] # type: ignore[attr-defined] @cache_readonly def ndim(self) -> int: @@ -215,21 +225,46 @@ def ndim(self) -> int: @cache_readonly def _obj_with_exclusions(self): - if self._selection is not None and isinstance(self.obj, ABCDataFrame): - return self.obj.reindex(columns=self._selection_list) + # pandas\core\base.py:209: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] + if self._selection is not None and isinstance( + self.obj, ABCDataFrame # type: ignore[attr-defined] + ): + # pandas\core\base.py:217: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] + return self.obj.reindex( # type: ignore[attr-defined] + columns=self._selection_list + ) + + # pandas\core\base.py:207: error: "SelectionMixin" has no attribute + # "exclusions" [attr-defined] + if len(self.exclusions) > 0: # type: ignore[attr-defined] + # pandas\core\base.py:208: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] - if len(self.exclusions) > 0: - return self.obj.drop(self.exclusions, axis=1) + # pandas\core\base.py:208: error: "SelectionMixin" has no attribute + # "exclusions" [attr-defined] + return self.obj.drop(self.exclusions, axis=1) # type: ignore[attr-defined] else: - return self.obj + # pandas\core\base.py:210: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] + return self.obj # type: ignore[attr-defined] def __getitem__(self, key): if self._selection is not None: raise IndexError(f"Column(s) {self._selection} already selected") if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): - if len(self.obj.columns.intersection(key)) != len(key): - bad_keys = list(set(key).difference(self.obj.columns)) + # pandas\core\base.py:217: error: "SelectionMixin" has no attribute + # "obj" [attr-defined] + if len( + self.obj.columns.intersection(key) # type: ignore[attr-defined] + ) != len(key): + # pandas\core\base.py:218: error: "SelectionMixin" has no + # attribute "obj" [attr-defined] + bad_keys = list( + set(key).difference(self.obj.columns) # type: ignore[attr-defined] + ) raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") return self._gotitem(list(key), ndim=2) @@ -559,7 +594,11 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): dtype='datetime64[ns]') """ if is_extension_array_dtype(self.dtype): - return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) + # pandas\core\base.py:837: error: Too many arguments for "to_numpy" + # of "ExtensionArray" [call-arg] + return self.array.to_numpy( # type: ignore[call-arg] + dtype, copy=copy, na_value=na_value, **kwargs + ) elif kwargs: bad_keys = list(kwargs.keys())[0] raise TypeError( @@ -851,8 +890,15 @@ def _map_values(self, mapper, na_action=None): if is_categorical_dtype(self.dtype): # use the built in categorical series mapper which saves # time by mapping the categories instead of all values - self = cast("Categorical", self) - return self._values.map(mapper) + + # pandas\core\base.py:893: error: Incompatible types in + # assignment (expression has type "Categorical", variable has + # type "IndexOpsMixin") [assignment] + self = cast("Categorical", self) # type: ignore[assignment] + # pandas\core\base.py:894: error: Item "ExtensionArray" of + # "Union[ExtensionArray, Any]" has no attribute "map" + # [union-attr] + return self._values.map(mapper) # type: ignore[union-attr] values = self._values @@ -869,7 +915,9 @@ def _map_values(self, mapper, na_action=None): raise NotImplementedError map_f = lambda values, f: values.map(f) else: - values = self.astype(object)._values + # pandas\core\base.py:1142: error: "IndexOpsMixin" has no attribute + # "astype" [attr-defined] + values = self.astype(object)._values # type: ignore[attr-defined] if na_action == "ignore": def map_f(values, f): @@ -1111,7 +1159,9 @@ def memory_usage(self, deep=False): are not components of the array if deep=False or if used on PyPy """ if hasattr(self.array, "memory_usage"): - return self.array.memory_usage(deep=deep) + # pandas\core\base.py:1379: error: "ExtensionArray" has no + # attribute "memory_usage" [attr-defined] + return self.array.memory_usage(deep=deep) # type: ignore[attr-defined] v = self.array.nbytes if deep and is_object_dtype(self) and not PYPY: @@ -1245,7 +1295,9 @@ def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: def drop_duplicates(self, keep="first"): duplicated = self.duplicated(keep=keep) - result = self[np.logical_not(duplicated)] + # pandas\core\base.py:1507: error: Value of type "IndexOpsMixin" is not + # indexable [index] + result = self[np.logical_not(duplicated)] # type: ignore[index] return result def duplicated(self, keep="first"): diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index c971551a7f400..88a25ad9996a0 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -659,7 +659,11 @@ def visit_Call(self, node, side=None, **kwargs): raise if res is None: - raise ValueError(f"Invalid function call {node.func.id}") + # pandas\core\computation\expr.py:663: error: "expr" has no + # attribute "id" [attr-defined] + raise ValueError( + f"Invalid function call {node.func.id}" # type: ignore[attr-defined] + ) if hasattr(res, "value"): res = res.value @@ -680,7 +684,12 @@ def visit_Call(self, node, side=None, **kwargs): for key in node.keywords: if not isinstance(key, ast.keyword): - raise ValueError(f"keyword error in function call '{node.func.id}'") + # pandas\core\computation\expr.py:684: error: "expr" has no + # attribute "id" [attr-defined] + raise ValueError( + "keyword error in function call " # type: ignore[attr-defined] + f"'{node.func.id}'" + ) if key.arg: kwargs[key.arg] = self.visit(key.value).value diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 5759cd17476d6..74bee80c6c8a6 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -69,7 +69,9 @@ def __init__(self, name: str, is_local: Optional[bool] = None): class Term: def __new__(cls, name, env, side=None, encoding=None): klass = Constant if not isinstance(name, str) else cls - supr_new = super(Term, klass).__new__ + # pandas\core\computation\ops.py:72: error: Argument 2 for "super" not + # an instance of argument 1 [misc] + supr_new = super(Term, klass).__new__ # type: ignore[misc] return supr_new(klass) is_local: bool @@ -589,7 +591,8 @@ def __init__(self, func, args): self.func = func def __call__(self, env): - operands = [op(env) for op in self.operands] + # pandas\core\computation\ops.py:592: error: "Op" not callable [operator] + operands = [op(env) for op in self.operands] # type: ignore[operator] with np.errstate(all="ignore"): return self.func.func(*operands) diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index 7a9b8caa985e3..d2708da04b7e9 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -129,15 +129,29 @@ def __init__( # shallow copy here because we don't want to replace what's in # scope when we align terms (alignment accesses the underlying # numpy array of pandas objects) - self.scope = self.scope.new_child((global_dict or frame.f_globals).copy()) + + # pandas\core\computation\scope.py:132: error: Incompatible types + # in assignment (expression has type "ChainMap[str, Any]", variable + # has type "DeepChainMap[str, Any]") [assignment] + self.scope = self.scope.new_child( # type: ignore[assignment] + (global_dict or frame.f_globals).copy() + ) if not isinstance(local_dict, Scope): - self.scope = self.scope.new_child((local_dict or frame.f_locals).copy()) + # pandas\core\computation\scope.py:134: error: Incompatible + # types in assignment (expression has type "ChainMap[str, + # Any]", variable has type "DeepChainMap[str, Any]") + # [assignment] + self.scope = self.scope.new_child( # type: ignore[assignment] + (local_dict or frame.f_locals).copy() + ) finally: del frame # assumes that resolvers are going from outermost scope to inner if isinstance(local_dict, Scope): - resolvers += tuple(local_dict.resolvers.maps) + # pandas\core\computation\scope.py:140: error: Cannot determine + # type of 'resolvers' [has-type] + resolvers += tuple(local_dict.resolvers.maps) # type: ignore[has-type] self.resolvers = DeepChainMap(*resolvers) self.temps = {} @@ -224,7 +238,9 @@ def swapkey(self, old_key: str, new_key: str, new_value=None): for mapping in maps: if old_key in mapping: - mapping[new_key] = new_value + # pandas\core\computation\scope.py:228: error: Unsupported + # target for indexed assignment ("Mapping[Any, Any]") [index] + mapping[new_key] = new_value # type: ignore[index] return def _get_vars(self, stack, scopes: List[str]): @@ -243,7 +259,11 @@ def _get_vars(self, stack, scopes: List[str]): for scope, (frame, _, _, _, _, _) in variables: try: d = getattr(frame, "f_" + scope) - self.scope = self.scope.new_child(d) + # pandas\core\computation\scope.py:247: error: Incompatible + # types in assignment (expression has type "ChainMap[str, + # Any]", variable has type "DeepChainMap[str, Any]") + # [assignment] + self.scope = self.scope.new_child(d) # type: ignore[assignment] finally: # won't remove it, but DECREF it # in Py3 this probably isn't necessary since frame won't be @@ -310,5 +330,16 @@ def full_scope(self): vars : DeepChainMap All variables in this scope. """ - maps = [self.temps] + self.resolvers.maps + self.scope.maps + # pandas\core\computation\scope.py:314: error: Unsupported operand + # types for + ("List[Dict[Any, Any]]" and "List[Mapping[Any, Any]]") + # [operator] + + # pandas\core\computation\scope.py:314: error: Unsupported operand + # types for + ("List[Dict[Any, Any]]" and "List[Mapping[str, Any]]") + # [operator] + maps = ( + [self.temps] + + self.resolvers.maps # type: ignore[operator] + + self.scope.maps # type: ignore[operator] + ) return DeepChainMap(*maps) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bfb633ae55095..80743f8cc924b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3859,7 +3859,12 @@ def reindexer(value): value, len(self.index), infer_dtype ) else: - value = cast_scalar_to_array(len(self.index), value) + # pandas\core\frame.py:3827: error: Argument 1 to + # "cast_scalar_to_array" has incompatible type "int"; expected + # "Tuple[Any, ...]" [arg-type] + value = cast_scalar_to_array( + len(self.index), value # type: ignore[arg-type] + ) value = maybe_cast_to_datetime(value, infer_dtype) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 02fa7308e7ee8..8470ef7ba5efb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10706,7 +10706,9 @@ def _add_numeric_operations(cls): def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) - cls.any = any + # pandas\core\generic.py:10725: error: Cannot assign to a method + # [assignment] + cls.any = any # type: ignore[assignment] @doc( _bool_doc, @@ -10721,7 +10723,14 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) - cls.all = all + # pandas\core\generic.py:10719: error: Cannot assign to a method + # [assignment] + + # pandas\core\generic.py:10719: error: Incompatible types in assignment + # (expression has type "Callable[[Iterable[object]], bool]", variable + # has type "Callable[[NDFrame, Any, Any, Any, Any, KwArg(Any)], Any]") + # [assignment] + cls.all = all # type: ignore[assignment] @doc( desc="Return the mean absolute deviation of the values " @@ -10736,7 +10745,9 @@ def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): def mad(self, axis=None, skipna=None, level=None): return NDFrame.mad(self, axis, skipna, level) - cls.mad = mad + # pandas\core\generic.py:10736: error: Cannot assign to a method + # [assignment] + cls.mad = mad # type: ignore[assignment] @doc( _num_ddof_doc, @@ -10758,7 +10769,9 @@ def sem( ): return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) - cls.sem = sem + # pandas\core\generic.py:10758: error: Cannot assign to a method + # [assignment] + cls.sem = sem # type: ignore[assignment] @doc( _num_ddof_doc, @@ -10779,7 +10792,9 @@ def var( ): return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) - cls.var = var + # pandas\core\generic.py:10779: error: Cannot assign to a method + # [assignment] + cls.var = var # type: ignore[assignment] @doc( _num_ddof_doc, @@ -10801,7 +10816,9 @@ def std( ): return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) - cls.std = std + # pandas\core\generic.py:10801: error: Cannot assign to a method + # [assignment] + cls.std = std # type: ignore[assignment] @doc( _cnum_doc, @@ -10815,7 +10832,9 @@ def std( def cummin(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cummin(self, axis, skipna, *args, **kwargs) - cls.cummin = cummin + # pandas\core\generic.py:10815: error: Cannot assign to a method + # [assignment] + cls.cummin = cummin # type: ignore[assignment] @doc( _cnum_doc, @@ -10829,7 +10848,9 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs): def cummax(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cummax(self, axis, skipna, *args, **kwargs) - cls.cummax = cummax + # pandas\core\generic.py:10829: error: Cannot assign to a method + # [assignment] + cls.cummax = cummax # type: ignore[assignment] @doc( _cnum_doc, @@ -10843,7 +10864,9 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs): def cumsum(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) - cls.cumsum = cumsum + # pandas\core\generic.py:10843: error: Cannot assign to a method + # [assignment] + cls.cumsum = cumsum # type: ignore[assignment] @doc( _cnum_doc, @@ -10857,7 +10880,9 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs): def cumprod(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) - cls.cumprod = cumprod + # pandas\core\generic.py:10857: error: Cannot assign to a method + # [assignment] + cls.cumprod = cumprod # type: ignore[assignment] @doc( _num_doc, @@ -10883,7 +10908,9 @@ def sum( self, axis, skipna, level, numeric_only, min_count, **kwargs ) - cls.sum = sum + # pandas\core\generic.py:10883: error: Cannot assign to a method + # [assignment] + cls.sum = sum # type: ignore[assignment] @doc( _num_doc, @@ -10908,7 +10935,9 @@ def prod( self, axis, skipna, level, numeric_only, min_count, **kwargs ) - cls.prod = prod + # pandas\core\generic.py:10908: error: Cannot assign to a method + # [assignment] + cls.prod = prod # type: ignore[assignment] cls.product = prod @doc( @@ -10924,7 +10953,9 @@ def prod( def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) - cls.mean = mean + # pandas\core\generic.py:10924: error: Cannot assign to a method + # [assignment] + cls.mean = mean # type: ignore[assignment] @doc( _num_doc, @@ -10939,7 +10970,9 @@ def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): def skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) - cls.skew = skew + # pandas\core\generic.py:10939: error: Cannot assign to a method + # [assignment] + cls.skew = skew # type: ignore[assignment] @doc( _num_doc, @@ -10957,7 +10990,9 @@ def skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): def kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) - cls.kurt = kurt + # pandas\core\generic.py:10957: error: Cannot assign to a method + # [assignment] + cls.kurt = kurt # type: ignore[assignment] cls.kurtosis = kurt @doc( @@ -10975,7 +11010,9 @@ def median( ): return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) - cls.median = median + # pandas\core\generic.py:10975: error: Cannot assign to a method + # [assignment] + cls.median = median # type: ignore[assignment] @doc( _num_doc, @@ -10992,7 +11029,9 @@ def median( def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) - cls.max = max + # pandas\core\generic.py:10992: error: Cannot assign to a method + # [assignment] + cls.max = max # type: ignore[assignment] @doc( _num_doc, @@ -11009,7 +11048,9 @@ def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): def min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) - cls.min = min + # pandas\core\generic.py:11009: error: Cannot assign to a method + # [assignment] + cls.min = min # type: ignore[assignment] @doc(Rolling) def rolling( @@ -11115,34 +11156,38 @@ def _inplace_method(self, other, op): return self def __iadd__(self, other): - return self._inplace_method(other, type(self).__add__) + return self._inplace_method(other, type(self).__add__) # type: ignore[operator] def __isub__(self, other): - return self._inplace_method(other, type(self).__sub__) + return self._inplace_method(other, type(self).__sub__) # type: ignore[operator] def __imul__(self, other): - return self._inplace_method(other, type(self).__mul__) + return self._inplace_method(other, type(self).__mul__) # type: ignore[operator] def __itruediv__(self, other): - return self._inplace_method(other, type(self).__truediv__) + return self._inplace_method( + other, type(self).__truediv__ # type: ignore[operator] + ) def __ifloordiv__(self, other): - return self._inplace_method(other, type(self).__floordiv__) + return self._inplace_method( + other, type(self).__floordiv__ # type: ignore[operator] + ) def __imod__(self, other): - return self._inplace_method(other, type(self).__mod__) + return self._inplace_method(other, type(self).__mod__) # type: ignore[operator] def __ipow__(self, other): - return self._inplace_method(other, type(self).__pow__) + return self._inplace_method(other, type(self).__pow__) # type: ignore[operator] def __iand__(self, other): - return self._inplace_method(other, type(self).__and__) + return self._inplace_method(other, type(self).__and__) # type: ignore[operator] def __ior__(self, other): - return self._inplace_method(other, type(self).__or__) + return self._inplace_method(other, type(self).__or__) # type: ignore[operator] def __ixor__(self, other): - return self._inplace_method(other, type(self).__xor__) + return self._inplace_method(other, type(self).__xor__) # type: ignore[operator] # ---------------------------------------------------------------------- # Misc methods diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 8e278dc81a8cc..f205226c03a53 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -49,7 +49,9 @@ def _gotitem(self, key, ndim, subset=None): """ # create a new object to prevent aliasing if subset is None: - subset = self.obj + # pandas\core\groupby\base.py:52: error: "GotItemMixin" has no + # attribute "obj" [attr-defined] + subset = self.obj # type: ignore[attr-defined] # we need to make a shallow copy of ourselves # with the same groupby @@ -57,11 +59,25 @@ def _gotitem(self, key, ndim, subset=None): # Try to select from a DataFrame, falling back to a Series try: - groupby = self._groupby[key] + # pandas\core\groupby\base.py:60: error: "GotItemMixin" has no + # attribute "_groupby" [attr-defined] + groupby = self._groupby[key] # type: ignore[attr-defined] except IndexError: - groupby = self._groupby + # pandas\core\groupby\base.py:62: error: "GotItemMixin" has no + # attribute "_groupby" [attr-defined] + groupby = self._groupby # type: ignore[attr-defined] - self = type(self)(subset, groupby=groupby, parent=self, **kwargs) + # pandas\core\groupby\base.py:64: error: Too many arguments for + # "GotItemMixin" [call-arg] + + # pandas\core\groupby\base.py:64: error: Unexpected keyword argument + # "groupby" for "GotItemMixin" [call-arg] + + # pandas\core\groupby\base.py:64: error: Unexpected keyword argument + # "parent" for "GotItemMixin" [call-arg] + self = type(self)( + subset, groupby=groupby, parent=self, **kwargs # type: ignore[call-arg] + ) self._reset_cache() if subset.ndim == 2 and (is_scalar(key) and key in subset or is_list_like(key)): self._selection = key diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index ff5379567f090..e8af9da30a298 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -307,7 +307,10 @@ def _get_grouper(self, obj, validate: bool = True): a tuple of binner, grouper, obj (possibly sorted) """ self._set_grouper(obj) - self.grouper, _, self.obj = get_grouper( + # pandas\core\groupby\grouper.py:310: error: Value of type variable + # "FrameOrSeries" of "get_grouper" cannot be "Optional[Any]" + # [type-var] + self.grouper, _, self.obj = get_grouper( # type: ignore[type-var] self.obj, [self.key], axis=self.axis, @@ -345,7 +348,9 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): if getattr(self.grouper, "name", None) == key and isinstance( obj, ABCSeries ): - ax = self._grouper.take(obj.index) + # pandas\core\groupby\grouper.py:348: error: Item "None" of + # "Optional[Any]" has no attribute "take" [union-attr] + ax = self._grouper.take(obj.index) # type: ignore[union-attr] else: if key not in obj._info_axis: raise KeyError(f"The grouper name {key} is not found") @@ -379,7 +384,9 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): @property def groups(self): - return self.grouper.groups + # pandas\core\groupby\grouper.py:382: error: Item "None" of + # "Optional[Any]" has no attribute "groups" [union-attr] + return self.grouper.groups # type: ignore[union-attr] def __repr__(self) -> str: attrs_list = ( diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 1be979b1b899c..859c26a40e50d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -324,7 +324,9 @@ def _format_attrs(self): "categories", ibase.default_pprint(self.categories, max_seq_items=max_categories), ), - ("ordered", self.ordered), + # pandas\core\indexes\category.py:315: error: "CategoricalIndex" + # has no attribute "ordered" [attr-defined] + ("ordered", self.ordered), # type: ignore[attr-defined] ] if self.name is not None: attrs.append(("name", ibase.default_pprint(self.name))) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 2cb66557b3bab..40a6086f69f85 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -745,7 +745,11 @@ def intersection(self, other, sort=False): start = right[0] if end < start: - result = type(self)(data=[], dtype=self.dtype, freq=self.freq) + # pandas\core\indexes\datetimelike.py:758: error: Unexpected + # keyword argument "freq" for "DatetimeTimedeltaMixin" [call-arg] + result = type(self)( + data=[], dtype=self.dtype, freq=self.freq # type: ignore[call-arg] + ) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left._values[lslice] @@ -874,7 +878,11 @@ def _union(self, other, sort): i8self = Int64Index._simple_new(self.asi8) i8other = Int64Index._simple_new(other.asi8) i8result = i8self._union(i8other, sort=sort) - result = type(self)(i8result, dtype=self.dtype, freq="infer") + # pandas\core\indexes\datetimelike.py:887: error: Unexpected + # keyword argument "freq" for "DatetimeTimedeltaMixin" [call-arg] + result = type(self)( + i8result, dtype=self.dtype, freq="infer" # type: ignore[call-arg] + ) return result # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index aa16dc9752565..2739806a0f338 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -811,7 +811,9 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): end_casted = self._maybe_cast_slice_bound(end, "right", kind) mask = (self <= end_casted) & mask - indexer = mask.nonzero()[0][::step] + # pandas\core\indexes\datetimes.py:764: error: "bool" has no + # attribute "nonzero" [attr-defined] + indexer = mask.nonzero()[0][::step] # type: ignore[attr-defined] if len(indexer) == len(self): return slice(None) else: diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 921c7aac2c85b..3103c27b35d74 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -220,7 +220,9 @@ def __getitem__(self, key): if result.ndim == 1: return type(self)(result, name=self.name) # Unpack to ndarray for MPL compat - result = result._data + # pandas\core\indexes\extension.py:220: error: "ExtensionArray" has + # no attribute "_data" [attr-defined] + result = result._data # type: ignore[attr-defined] # Includes cases where we get a 2D ndarray back for MPL compat deprecate_ndim_indexing(result) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 65e71a6109a5a..5a3f2b0853c4f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1449,7 +1449,9 @@ def _set_names(self, names, level=None, validate=True): raise TypeError( f"{type(self).__name__}.name must be a hashable type" ) - self._names[lev] = name + # pandas\core\indexes\multi.py:1448: error: Cannot determine type + # of '__setitem__' [has-type] + self._names[lev] = name # type: ignore[has-type] # If .levels has been accessed, the names in our cache will be stale. self._reset_cache() @@ -3506,8 +3508,13 @@ def intersection(self, other, sort=False): if uniq_tuples is None: other_uniq = set(rvals) seen = set() + # pandas\core\indexes\multi.py:3503: error: "add" of "set" does not + # return a value [func-returns-value] uniq_tuples = [ - x for x in lvals if x in other_uniq and not (x in seen or seen.add(x)) + x + for x in lvals + if x in other_uniq + and not (x in seen or seen.add(x)) # type: ignore[func-returns-value] ] if sort is None: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 78d217c4688b6..fccedd75c4531 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -94,7 +94,10 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs): self.as_index = True self.exclusions = set() self.binner = None - self.grouper = None + # pandas\core\resample.py:96: error: Incompatible types in assignment + # (expression has type "None", variable has type "BaseGrouper") + # [assignment] + self.grouper = None # type: ignore[assignment] if self.groupby is not None: self.groupby._set_grouper(self._convert_obj(obj), sort=True) @@ -410,14 +413,21 @@ def _apply_loffset(self, result): result : Series or DataFrame the result of resample """ + # pandas\core\resample.py:409: error: Cannot determine type of + # 'loffset' [has-type] needs_offset = ( - isinstance(self.loffset, (DateOffset, timedelta, np.timedelta64)) + isinstance( + self.loffset, # type: ignore[has-type] + (DateOffset, timedelta, np.timedelta64), + ) and isinstance(result.index, DatetimeIndex) and len(result.index) > 0 ) if needs_offset: - result.index = result.index + self.loffset + # pandas\core\resample.py:415: error: Cannot determine type of + # 'loffset' [has-type] + result.index = result.index + self.loffset # type: ignore[has-type] self.loffset = None return result @@ -852,7 +862,9 @@ def std(self, ddof=1, *args, **kwargs): Standard deviation of values within each group. """ nv.validate_resampler_func("std", args, kwargs) - return self._downsample("std", ddof=ddof) + # pandas\core\resample.py:850: error: Unexpected keyword argument + # "ddof" for "_downsample" [call-arg] + return self._downsample("std", ddof=ddof) # type: ignore[call-arg] def var(self, ddof=1, *args, **kwargs): """ @@ -869,7 +881,9 @@ def var(self, ddof=1, *args, **kwargs): Variance of values within each group. """ nv.validate_resampler_func("var", args, kwargs) - return self._downsample("var", ddof=ddof) + # pandas\core\resample.py:867: error: Unexpected keyword argument + # "ddof" for "_downsample" [call-arg] + return self._downsample("var", ddof=ddof) # type: ignore[call-arg] @doc(GroupBy.size) def size(self): @@ -927,7 +941,12 @@ def quantile(self, q=0.5, **kwargs): Return a DataFrame, where the coulmns are groupby columns, and the values are its quantiles. """ - return self._downsample("quantile", q=q, **kwargs) + # pandas\core\resample.py:920: error: Unexpected keyword argument "q" + # for "_downsample" [call-arg] + + # pandas\core\resample.py:920: error: Too many arguments for + # "_downsample" [call-arg] + return self._downsample("quantile", q=q, **kwargs) # type: ignore[call-arg] # downsample methods @@ -979,7 +998,9 @@ def __init__(self, obj, *args, **kwargs): for attr in self._attributes: setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) - super().__init__(None) + # pandas\core\resample.py:972: error: Too many arguments for "__init__" + # of "object" [call-arg] + super().__init__(None) # type: ignore[call-arg] self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True @@ -1044,7 +1065,12 @@ def _downsample(self, how, **kwargs): # do we have a regular frequency if ax.freq is not None or ax.inferred_freq is not None: - if len(self.grouper.binlabels) > len(ax) and how is None: + # pandas\core\resample.py:1037: error: "BaseGrouper" has no + # attribute "binlabels" [attr-defined] + if ( + len(self.grouper.binlabels) > len(ax) # type: ignore[attr-defined] + and how is None + ): # let's do an asfreq return self.asfreq() diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index aa883d518f8d1..d49e834fedb2d 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -965,7 +965,10 @@ def _get_merge_keys(self): """ left_keys = [] right_keys = [] - join_names = [] + # pandas\core\reshape\merge.py:966: error: Need type annotation for + # 'join_names' (hint: "join_names: List[] = ...") + # [var-annotated] + join_names = [] # type: ignore[var-annotated] right_drop = [] left_drop = [] diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 03c61c3ed8376..dd30bf37793d0 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -755,7 +755,10 @@ def __init__( self.mode = mode def __fspath__(self): - return stringify_path(self.path) + # pandas\io\excel\_base.py:744: error: Argument 1 to "stringify_path" + # has incompatible type "Optional[Any]"; expected "Union[str, Path, + # IO[Any], IOBase]" [arg-type] + return stringify_path(self.path) # type: ignore[arg-type] def _get_sheet_name(self, sheet_name): if sheet_name is None: diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 50e69f7e8b435..ab9c9fe995008 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -69,7 +69,9 @@ def check_main(): return not hasattr(main, "__file__") or get_option("mode.sim_interactive") try: - return __IPYTHON__ or check_main() + # pandas\io\formats\console.py:72: error: Name '__IPYTHON__' is not + # defined [name-defined] + return __IPYTHON__ or check_main() # type: ignore[name-defined] except NameError: return check_main() @@ -83,7 +85,9 @@ def in_ipython_frontend(): bool """ try: - ip = get_ipython() + # pandas\io\formats\console.py:86: error: Name 'get_ipython' is not + # defined [name-defined] + ip = get_ipython() # type: ignore[name-defined] return "zmq" in str(type(ip)).lower() except NameError: pass diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 5013365896fb2..9793f7a1e4613 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -590,10 +590,17 @@ def _format_header_regular(self): colnames = self.columns if has_aliases: - if len(self.header) != len(self.columns): + # pandas\io\formats\excel.py:593: error: Argument 1 to "len" + # has incompatible type "Union[Sequence[Optional[Hashable]], + # bool]"; expected "Sized" [arg-type] + if len(self.header) != len(self.columns): # type: ignore[arg-type] + # pandas\io\formats\excel.py:595: error: Argument 1 to + # "len" has incompatible type + # "Union[Sequence[Optional[Hashable]], bool]"; expected + # "Sized" [arg-type] raise ValueError( - f"Writing {len(self.columns)} cols but got {len(self.header)} " - "aliases" + f"Writing {len(self.columns)} " # type: ignore[arg-type] + f"cols but got {len(self.header)} aliases" ) else: colnames = self.header @@ -615,7 +622,10 @@ def _format_header(self): "" ] * len(self.columns) if reduce(lambda x, y: x and y, map(lambda x: x != "", row)): - gen2 = ( + # pandas\io\formats\excel.py:618: error: Incompatible types in + # assignment (expression has type "Generator[ExcelCell, None, + # None]", variable has type "Tuple[]") [assignment] + gen2 = ( # type: ignore[assignment] ExcelCell(self.rowcounter, colindex, val, self.header_style) for colindex, val in enumerate(row) ) @@ -805,7 +815,12 @@ def write( if isinstance(writer, ExcelWriter): need_save = False else: - writer = ExcelWriter(stringify_path(writer), engine=engine) + # pandas\io\formats\excel.py:808: error: Cannot instantiate + # abstract class 'ExcelWriter' with abstract attributes 'engine', + # 'save', 'supported_extensions' and 'write_cells' [abstract] + writer = ExcelWriter( # type: ignore[abstract] + stringify_path(writer), engine=engine + ) need_save = True formatted_cells = self.get_formatted_cells() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 43e76d0aef490..5b69ef4eba26e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1342,7 +1342,16 @@ def _value_formatter( def base_formatter(v): assert float_format is not None # for mypy - return float_format(value=v) if notna(v) else self.na_rep + # pandas\io\formats\format.py:1411: error: "str" not callable + # [operator] + + # pandas\io\formats\format.py:1411: error: Unexpected keyword + # argument "value" for "__call__" of "EngFormatter" [call-arg] + return ( + float_format(value=v) # type: ignore[operator,call-arg] + if notna(v) + else self.na_rep + ) else: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index e4895d280c241..5725e2304e1d2 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -851,7 +851,10 @@ def _get_options_with_defaults(self, engine): options[argname] = value if engine == "python-fwf": - for argname, default in _fwf_defaults.items(): + # pandas\io\parsers.py:907: error: Incompatible types in assignment + # (expression has type "object", variable has type "Union[int, str, + # None]") [assignment] + for argname, default in _fwf_defaults.items(): # type: ignore[assignment] options[argname] = kwds.get(argname, default) return options @@ -1035,9 +1038,18 @@ def __next__(self): def _make_engine(self, engine="c"): mapping = { - "c": CParserWrapper, - "python": PythonParser, - "python-fwf": FixedWidthFieldParser, + # pandas\io\parsers.py:1099: error: Dict entry 0 has incompatible + # type "str": "Type[CParserWrapper]"; expected "str": + # "Type[ParserBase]" [dict-item] + "c": CParserWrapper, # type: ignore[dict-item] + # pandas\io\parsers.py:1100: error: Dict entry 1 has incompatible + # type "str": "Type[PythonParser]"; expected "str": + # "Type[ParserBase]" [dict-item] + "python": PythonParser, # type: ignore[dict-item] + # pandas\io\parsers.py:1101: error: Dict entry 2 has incompatible + # type "str": "Type[FixedWidthFieldParser]"; expected "str": + # "Type[ParserBase]" [dict-item] + "python-fwf": FixedWidthFieldParser, # type: ignore[dict-item] } try: klass = mapping[engine] @@ -1394,7 +1406,9 @@ def _validate_parse_dates_presence(self, columns: List[str]) -> None: ) def close(self): - self.handles.close() + # pandas\io\parsers.py:1409: error: "ParserBase" has no attribute + # "handles" [attr-defined] + self.handles.close() # type: ignore[attr-defined] @property def _has_complex_date_col(self): @@ -1490,7 +1504,9 @@ def _maybe_dedup_names(self, names): # would be nice! if self.mangle_dupe_cols: names = list(names) # so we can index - counts = defaultdict(int) + # pandas\io\parsers.py:1559: error: Need type annotation for + # 'counts' [var-annotated] + counts = defaultdict(int) # type: ignore[var-annotated] is_potential_mi = _is_potential_multi_index(names, self.index_col) for i, col in enumerate(names): @@ -1535,7 +1551,9 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): # add names for the index if indexnamerow: coffset = len(indexnamerow) - len(columns) - index = index.set_names(indexnamerow[:coffset]) + # pandas\io\parsers.py:1604: error: Item "None" of "Optional[Any]" + # has no attribute "set_names" [union-attr] + index = index.set_names(indexnamerow[:coffset]) # type: ignore[union-attr] # maybe create a mi on the columns columns = self._maybe_make_multi_index_columns(columns, self.col_names) @@ -1609,7 +1627,9 @@ def _agg_index(self, index, try_parse_dates=True) -> Index: col_na_fvalues = set() if isinstance(self.na_values, dict): - col_name = self.index_names[i] + # pandas\io\parsers.py:1678: error: Value of type + # "Optional[Any]" is not indexable [index] + col_name = self.index_names[i] # type: ignore[index] if col_name is not None: col_na_values, col_na_fvalues = _get_na_values( col_name, self.na_values, self.na_fvalues, self.keep_default_na @@ -1840,7 +1860,30 @@ def __init__(self, src, **kwds): kwds.pop("memory_map", None) kwds.pop("compression", None) if self.handles.is_mmap and hasattr(self.handles.handle, "mmap"): - self.handles.handle = self.handles.handle.mmap + # pandas\io\parsers.py:1861: error: Item "IO[Any]" of + # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + + # pandas\io\parsers.py:1861: error: Item "RawIOBase" of + # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + + # pandas\io\parsers.py:1861: error: Item "BufferedIOBase" of + # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + + # pandas\io\parsers.py:1861: error: Item "TextIOBase" of + # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + + # pandas\io\parsers.py:1861: error: Item "TextIOWrapper" of + # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + + # pandas\io\parsers.py:1861: error: Item "mmap" of "Union[IO[Any], + # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]" has + # no attribute "mmap" [union-attr] + self.handles.handle = self.handles.handle.mmap # type: ignore[union-attr] # #2442 kwds["allow_leading_cols"] = self.index_col is not False @@ -1924,7 +1967,12 @@ def __init__(self, src, **kwds): self.index_names = index_names if self._reader.header is None and not passed_names: - self.index_names = [None] * len(self.index_names) + # pandas\io\parsers.py:1997: error: Argument 1 to "len" has + # incompatible type "Optional[Any]"; expected "Sized" + # [arg-type] + self.index_names = [None] * len( + self.index_names # type: ignore[arg-type] + ) self._implicit_index = self._reader.leading_cols > 0 @@ -1956,14 +2004,20 @@ def _set_noconvert_columns(self): usecols = self.names[:] else: # Usecols is empty. - usecols = None + + # pandas\io\parsers.py:2030: error: Incompatible types in + # assignment (expression has type "None", variable has type + # "List[Any]") [assignment] + usecols = None # type: ignore[assignment] def _set(x): if usecols is not None and is_integer(x): x = usecols[x] if not is_integer(x): - x = names.index(x) + # pandas\io\parsers.py:2037: error: Item "None" of + # "Optional[Any]" has no attribute "index" [union-attr] + x = names.index(x) # type: ignore[union-attr] self._reader.set_noconvert(x) @@ -2057,7 +2111,11 @@ def read(self, nrows=None): data = sorted(data.items()) # ugh, mutation - names = list(self.orig_names) + + # pandas\io\parsers.py:2131: error: Argument 1 to "list" has + # incompatible type "Optional[Any]"; expected "Iterable[Any]" + # [arg-type] + names = list(self.orig_names) # type: ignore[arg-type] names = self._maybe_dedup_names(names) if self.usecols is not None: @@ -2391,7 +2449,11 @@ def _read(): reader = _read() - self.data = reader + # pandas\io\parsers.py:2427: error: Incompatible types in assignment + # (expression has type "_reader", variable has type "Union[IO[Any], + # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap, None]") + # [assignment] + self.data = reader # type: ignore[assignment] def read(self, rows=None): try: @@ -2405,7 +2467,10 @@ def read(self, rows=None): # done with first read, next time raise StopIteration self._first_chunk = False - columns = list(self.orig_names) + # pandas\io\parsers.py:2480: error: Argument 1 to "list" has + # incompatible type "Optional[Any]"; expected "Iterable[Any]" + # [arg-type] + columns = list(self.orig_names) # type: ignore[arg-type] if not len(content): # pragma: no cover # DataFrame with the right metadata, even though it's length 0 names = self._maybe_dedup_names(self.orig_names) @@ -2453,7 +2518,9 @@ def _exclude_implicit_index(self, alldata): # legacy def get_chunk(self, size=None): if size is None: - size = self.chunksize + # pandas\io\parsers.py:2528: error: "PythonParser" has no attribute + # "chunksize" [attr-defined] + size = self.chunksize # type: ignore[attr-defined] return self.read(rows=size) def _convert_data(self, data): @@ -2462,8 +2529,15 @@ def _clean_mapping(mapping): """converts col numbers to names""" clean = {} for col, v in mapping.items(): - if isinstance(col, int) and col not in self.orig_names: - col = self.orig_names[col] + # pandas\io\parsers.py:2537: error: Unsupported right operand + # type for in ("Optional[Any]") [operator] + if ( + isinstance(col, int) + and col not in self.orig_names # type: ignore[operator] + ): + # pandas\io\parsers.py:2538: error: Value of type + # "Optional[Any]" is not indexable [index] + col = self.orig_names[col] # type: ignore[index] clean[col] = v return clean @@ -2483,8 +2557,15 @@ def _clean_mapping(mapping): na_value = self.na_values[col] na_fvalue = self.na_fvalues[col] - if isinstance(col, int) and col not in self.orig_names: - col = self.orig_names[col] + # pandas\io\parsers.py:2558: error: Unsupported right operand + # type for in ("Optional[Any]") [operator] + if ( + isinstance(col, int) + and col not in self.orig_names # type: ignore[operator] + ): + # pandas\io\parsers.py:2559: error: Value of type + # "Optional[Any]" is not indexable [index] + col = self.orig_names[col] # type: ignore[index] clean_na_values[col] = na_value clean_na_fvalues[col] = na_fvalue @@ -2505,7 +2586,10 @@ def _infer_columns(self): names = self.names num_original_columns = 0 clear_buffer = True - unnamed_cols = set() + # pandas\io\parsers.py:2580: error: Need type annotation for + # 'unnamed_cols' (hint: "unnamed_cols: Set[] = ...") + # [var-annotated] + unnamed_cols = set() # type: ignore[var-annotated] if self.header is not None: header = self.header @@ -2519,7 +2603,9 @@ def _infer_columns(self): have_mi_columns = False header = [header] - columns = [] + # pandas\io\parsers.py:2594: error: Need type annotation for + # 'columns' (hint: "columns: List[] = ...") [var-annotated] + columns = [] # type: ignore[var-annotated] for level, hr in enumerate(header): try: line = self._buffered_line() @@ -2564,7 +2650,9 @@ def _infer_columns(self): this_columns.append(c) if not have_mi_columns and self.mangle_dupe_cols: - counts = defaultdict(int) + # pandas\io\parsers.py:2639: error: Need type annotation + # for 'counts' [var-annotated] + counts = defaultdict(int) # type: ignore[var-annotated] for i, col in enumerate(this_columns): cur_count = counts[col] @@ -2588,10 +2676,16 @@ def _infer_columns(self): if lc != unnamed_count and lc - ic > unnamed_count: clear_buffer = False - this_columns = [None] * lc + # pandas\io\parsers.py:2663: error: List item 0 has + # incompatible type "None"; expected "str" + # [list-item] + this_columns = [None] * lc # type: ignore[list-item] self.buf = [self.buf[-1]] - columns.append(this_columns) + # pandas\io\parsers.py:2666: error: Argument 1 to "append" of + # "list" has incompatible type "List[str]"; expected + # "List[None]" [arg-type] + columns.append(this_columns) # type: ignore[arg-type] unnamed_cols.update({this_columns[i] for i in this_unnamed_cols}) if len(columns) == 1: @@ -2636,9 +2730,19 @@ def _infer_columns(self): if not names: if self.prefix: - columns = [[f"{self.prefix}{i}" for i in range(ncols)]] + # pandas\io\parsers.py:2711: error: List comprehension has + # incompatible type List[str]; expected List[None] [misc] + columns = [ + [ + f"{self.prefix}{i}" # type: ignore[misc] + for i in range(ncols) + ] + ] else: - columns = [list(range(ncols))] + # pandas\io\parsers.py:2713: error: Argument 1 to "list" + # has incompatible type "range"; expected "Iterable[None]" + # [arg-type] + columns = [list(range(ncols))] # type: ignore[arg-type] columns = self._handle_usecols(columns, columns[0]) else: if self.usecols is None or len(names) >= num_original_columns: @@ -2790,7 +2894,10 @@ def _next_line(self): else: while self.skipfunc(self.pos): self.pos += 1 - next(self.data) + # pandas\io\parsers.py:2865: error: Argument 1 to "next" has + # incompatible type "Optional[Any]"; expected "Iterator[Any]" + # [arg-type] + next(self.data) # type: ignore[arg-type] while True: orig_line = self._next_iter_line(row_num=self.pos + 1) @@ -2851,7 +2958,10 @@ def _next_iter_line(self, row_num): row_num : The row number of the line being parsed. """ try: - return next(self.data) + # pandas\io\parsers.py:2926: error: Argument 1 to "next" has + # incompatible type "Optional[Any]"; expected "Iterator[Any]" + # [arg-type] + return next(self.data) # type: ignore[arg-type] except csv.Error as e: if self.warn_bad_lines or self.error_bad_lines: msg = str(e) @@ -3084,12 +3194,19 @@ def _rows_to_cols(self, content): for i, a in enumerate(zipped_content) if ( i < len(self.index_col) - or i - len(self.index_col) in self._col_indices + # pandas\io\parsers.py:3159: error: Unsupported right + # operand type for in ("Optional[Any]") [operator] + or i - len(self.index_col) # type: ignore[operator] + in self._col_indices ) ] else: zipped_content = [ - a for i, a in enumerate(zipped_content) if i in self._col_indices + # pandas\io\parsers.py:3164: error: Unsupported right + # operand type for in ("Optional[Any]") [operator] + a + for i, a in enumerate(zipped_content) + if i in self._col_indices # type: ignore[operator] ] return zipped_content @@ -3134,7 +3251,10 @@ def _get_lines(self, rows=None): try: if rows is not None: for _ in range(rows): - new_rows.append(next(self.data)) + # pandas\io\parsers.py:3209: error: Argument 1 to + # "next" has incompatible type "Optional[Any]"; + # expected "Iterator[Any]" [arg-type] + new_rows.append(next(self.data)) # type: ignore[arg-type] lines.extend(new_rows) else: rows = 0 @@ -3312,7 +3432,9 @@ def _clean_na_values(na_values, keep_default_na=True): na_values = STR_NA_VALUES else: na_values = set() - na_fvalues = set() + # pandas\io\parsers.py:3387: error: Need type annotation for + # 'na_fvalues' (hint: "na_fvalues: Set[] = ...") [var-annotated] + na_fvalues = set() # type: ignore[var-annotated] elif isinstance(na_values, dict): old_na_values = na_values.copy() na_values = {} # Prevent aliasing. @@ -3329,7 +3451,12 @@ def _clean_na_values(na_values, keep_default_na=True): v = set(v) | STR_NA_VALUES na_values[k] = v - na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} + # pandas\io\parsers.py:3404: error: Incompatible types in assignment + # (expression has type "Dict[Any, Any]", variable has type "Set[Any]") + # [assignment] + na_fvalues = { # type: ignore[assignment] + k: _floatify_na_values(v) for k, v in na_values.items() + } else: if not is_list_like(na_values): na_values = [na_values] @@ -3370,7 +3497,10 @@ def _clean_index_names(columns, index_col, unnamed_cols): # Only clean index names that were placeholders. for i, name in enumerate(index_names): if isinstance(name, str) and name in unnamed_cols: - index_names[i] = None + # pandas\io\parsers.py:3445: error: No overload variant of + # "__setitem__" of "list" matches argument types "int", "None" + # [call-overload] + index_names[i] = None # type: ignore[call-overload] return index_names, columns, index_col @@ -3447,11 +3577,15 @@ def _stringify_na_values(na_values): result.append(f"{v}.0") result.append(str(v)) - result.append(v) + # pandas\io\parsers.py:3522: error: Argument 1 to "append" of + # "list" has incompatible type "float"; expected "str" [arg-type] + result.append(v) # type: ignore[arg-type] except (TypeError, ValueError, OverflowError): pass try: - result.append(int(x)) + # pandas\io\parsers.py:3526: error: Argument 1 to "append" of + # "list" has incompatible type "int"; expected "str" [arg-type] + result.append(int(x)) # type: ignore[arg-type] except (TypeError, ValueError, OverflowError): pass return set(result) @@ -3622,7 +3756,11 @@ def __init__(self, f, **kwds): PythonParser.__init__(self, f, **kwds) def _make_reader(self, f): - self.data = FixedWidthReader( + # pandas\io\parsers.py:3730: error: Incompatible types in assignment + # (expression has type "FixedWidthReader", variable has type + # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, + # mmap, None]") [assignment] + self.data = FixedWidthReader( # type: ignore[assignment] f, self.colspecs, self.delimiter, diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 6c9924e0ada79..2501d84de4459 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -577,8 +577,20 @@ def _make_legend(self): if self.legend: if self.legend == "reverse": - self.legend_handles = reversed(self.legend_handles) - self.legend_labels = reversed(self.legend_labels) + # pandas\plotting\_matplotlib\core.py:578: error: + # Incompatible types in assignment (expression has type + # "Iterator[Any]", variable has type "List[Any]") + # [assignment] + self.legend_handles = reversed( # type: ignore[assignment] + self.legend_handles + ) + # pandas\plotting\_matplotlib\core.py:579: error: + # Incompatible types in assignment (expression has type + # "Iterator[Optional[Hashable]]", variable has type + # "List[Optional[Hashable]]") [assignment] + self.legend_labels = reversed( # type: ignore[assignment] + self.legend_labels + ) handles += self.legend_handles labels += self.legend_labels @@ -1101,7 +1113,11 @@ def _make_plot(self): it = self._iter_data(data=data, keep_index=True) else: x = self._get_xticks(convert_period=True) - plotf = self._plot + # pandas\plotting\_matplotlib\core.py:1100: error: Incompatible + # types in assignment (expression has type "Callable[[Any, Any, + # Any, Any, Any, Any, KwArg(Any)], Any]", variable has type + # "Callable[[Any, Any, Any, Any, KwArg(Any)], Any]") [assignment] + plotf = self._plot # type: ignore[assignment] it = self._iter_data() stacking_id = self._get_stacking_id() @@ -1547,7 +1563,10 @@ def blank_labeler(label, value): if labels is not None: blabels = [blank_labeler(l, value) for l, value in zip(labels, y)] else: - blabels = None + # pandas\plotting\_matplotlib\core.py:1546: error: Incompatible + # types in assignment (expression has type "None", variable has + # type "List[Any]") [assignment] + blabels = None # type: ignore[assignment] results = ax.pie(y, labels=blabels, **kwds) if kwds.get("autopct", None) is not None: diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 6e473bf5b182c..58f44104b99d6 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -530,7 +530,9 @@ def reset(self): ------- None """ - self.__init__() + # pandas\plotting\_misc.py:533: error: Cannot access "__init__" + # directly [misc] + self.__init__() # type: ignore[misc] def _get_canonical_key(self, key): return self._ALIASES.get(key, key) diff --git a/setup.cfg b/setup.cfg index b1f423c12ebf4..c83a83d599f6c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -114,10 +114,11 @@ skip_glob = env, skip = pandas/__init__.py [mypy] -ignore_missing_imports=True -no_implicit_optional=True -check_untyped_defs=True -strict_equality=True +platform = linux-64 +ignore_missing_imports = True +no_implicit_optional = True +check_untyped_defs = True +strict_equality = True warn_redundant_casts = True warn_unused_ignores = True show_error_codes = True @@ -125,86 +126,8 @@ show_error_codes = True [mypy-pandas.tests.*] check_untyped_defs=False -[mypy-pandas._testing] -check_untyped_defs=False - [mypy-pandas._version] check_untyped_defs=False -[mypy-pandas.core.apply] -check_untyped_defs=False - -[mypy-pandas.core.arrays.datetimelike] -check_untyped_defs=False - -[mypy-pandas.core.arrays.string_] -check_untyped_defs=False - -[mypy-pandas.core.base] -check_untyped_defs=False - -[mypy-pandas.core.computation.expr] -check_untyped_defs=False - -[mypy-pandas.core.computation.ops] -check_untyped_defs=False - -[mypy-pandas.core.computation.scope] -check_untyped_defs=False - -[mypy-pandas.core.frame] -check_untyped_defs=False - -[mypy-pandas.core.generic] -check_untyped_defs=False - -[mypy-pandas.core.groupby.base] -check_untyped_defs=False - -[mypy-pandas.core.groupby.grouper] -check_untyped_defs=False - -[mypy-pandas.core.indexes.category] -check_untyped_defs=False - -[mypy-pandas.core.indexes.datetimelike] -check_untyped_defs=False - -[mypy-pandas.core.indexes.datetimes] -check_untyped_defs=False - -[mypy-pandas.core.indexes.extension] -check_untyped_defs=False - -[mypy-pandas.core.indexes.multi] -check_untyped_defs=False - -[mypy-pandas.core.resample] -check_untyped_defs=False - -[mypy-pandas.core.reshape.merge] -check_untyped_defs=False - [mypy-pandas.io.clipboard] check_untyped_defs=False - -[mypy-pandas.io.excel._base] -check_untyped_defs=False - -[mypy-pandas.io.formats.console] -check_untyped_defs=False - -[mypy-pandas.io.formats.excel] -check_untyped_defs=False - -[mypy-pandas.io.formats.format] -check_untyped_defs=False - -[mypy-pandas.io.parsers] -check_untyped_defs=False - -[mypy-pandas.plotting._matplotlib.core] -check_untyped_defs=False - -[mypy-pandas.plotting._misc] -check_untyped_defs=False From b14946578f038e2419c8940df8fe283bbbddfac3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 9 Nov 2020 04:52:48 -0800 Subject: [PATCH 096/147] REF: remove ObjectBlock._replace_single (#37710) --- pandas/core/internals/blocks.py | 135 +++++++++++++++----------------- 1 file changed, 62 insertions(+), 73 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index fd23b89365496..ed77a210b6913 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -813,17 +813,50 @@ def replace( ) return blocks - def _replace_single( + def _replace_regex( self, to_replace, value, inplace: bool = False, - regex: bool = False, convert: bool = True, mask=None, ) -> List["Block"]: - """ no-op on a non-ObjectBlock """ - return [self] if inplace else [self.copy()] + """ + Replace elements by the given value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + List[Block] + """ + if not self._can_hold_element(to_replace): + # i.e. only ObjectBlock, but could in principle include a + # String ExtensionBlock + return [self] if inplace else [self.copy()] + + rx = re.compile(to_replace) + + new_values = self.values if inplace else self.values.copy() + replace_regex(new_values, rx, value, mask) + + block = self.make_block(new_values) + if convert: + nbs = block.convert(numeric=False) + else: + nbs = [block] + return nbs def _replace_list( self, @@ -1598,14 +1631,16 @@ def _replace_coerce( self = self.coerce_to_target_dtype(value) return self.putmask(mask, value, inplace=inplace) else: - return self._replace_single( - to_replace, - value, - inplace=inplace, - regex=regex, - convert=False, - mask=mask, - ) + regex = _should_use_regex(regex, to_replace) + if regex: + return self._replace_regex( + to_replace, + value, + inplace=inplace, + convert=False, + mask=mask, + ) + return self.replace(to_replace, value, inplace=inplace, regex=False) return [self] @@ -2506,72 +2541,26 @@ def replace( # here with listlike to_replace or value, as those cases # go through _replace_list - if is_re(to_replace) or regex: - return self._replace_single(to_replace, value, inplace=inplace, regex=True) - else: - return super().replace(to_replace, value, inplace=inplace, regex=regex) - - def _replace_single( - self, - to_replace, - value, - inplace: bool = False, - regex: bool = False, - convert: bool = True, - mask=None, - ) -> List["Block"]: - """ - Replace elements by the given value. - - Parameters - ---------- - to_replace : object or pattern - Scalar to replace or regular expression to match. - value : object - Replacement object. - inplace : bool, default False - Perform inplace modification. - regex : bool, default False - If true, perform regular expression substitution. - convert : bool, default True - If true, try to coerce any object types to better types. - mask : array-like of bool, optional - True indicate corresponding element is ignored. - - Returns - ------- - List[Block] - """ - inplace = validate_bool_kwarg(inplace, "inplace") - - # to_replace is regex compilable - regex = regex and is_re_compilable(to_replace) + regex = _should_use_regex(regex, to_replace) - # try to get the pattern attribute (compiled re) or it's a string - if is_re(to_replace): - pattern = to_replace.pattern + if regex: + return self._replace_regex(to_replace, value, inplace=inplace) else: - pattern = to_replace + return super().replace(to_replace, value, inplace=inplace, regex=False) - # if the pattern is not empty and to_replace is either a string or a - # regex - if regex and pattern: - rx = re.compile(to_replace) - else: - # if the thing to replace is not a string or compiled regex call - # the superclass method -> to_replace is some kind of object - return super().replace(to_replace, value, inplace=inplace, regex=regex) - new_values = self.values if inplace else self.values.copy() - replace_regex(new_values, rx, value, mask) +def _should_use_regex(regex: bool, to_replace: Any) -> bool: + """ + Decide whether to treat `to_replace` as a regular expression. + """ + if is_re(to_replace): + regex = True - # convert - block = self.make_block(new_values) - if convert: - nbs = block.convert(numeric=False) - else: - nbs = [block] - return nbs + regex = regex and is_re_compilable(to_replace) + + # Don't use regex if the pattern is empty. + regex = regex and re.compile(to_replace).pattern != "" + return regex class CategoricalBlock(ExtensionBlock): From c94748621e02730e51b7c5d94f936f5da673a37f Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:17:53 +0300 Subject: [PATCH 097/147] Transfer tests of test_frame.py to test_frame_color.py --- pandas/tests/plotting/frame/test_frame.py | 31 +++++++------------ .../tests/plotting/frame/test_frame_color.py | 21 +++++++------ 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 9f94d44c74143..4d339b93fd30d 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -2098,20 +2098,11 @@ def test_pie_df(self): self._check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): - import matplotlib as mpl - df = DataFrame(np.random.rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan fig, axes = self.plt.subplots(ncols=4) - - # GH 37668 - kwargs = {} - if mpl.__version__ >= "3.3": - kwargs = {"normalize": True} - - with tm.assert_produces_warning(None): - df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) + df.plot.pie(subplots=True, ax=axes, legend=True) base_expected = ["0", "1", "2", "3"] for i, ax in enumerate(axes): @@ -2196,11 +2187,11 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works( df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind @@ -2277,11 +2268,11 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index fefa342770c7f..41c6578743bbf 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,6 +48,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) + def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -155,7 +156,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = DataFrame( + df = pd.DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -176,7 +177,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +199,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +215,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = DataFrame( + df = pd.DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -270,7 +271,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) + pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -610,7 +611,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,15 +619,15 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = DataFrame({"a": [2, 4, 6]}) + df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = pd.DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() + assert legend.get_color() == line.get_color() \ No newline at end of file From 537b3ba58de40d2f299a524b32bccb935784412f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 13:21:19 -0800 Subject: [PATCH 098/147] TST/REF: collect indexing tests by method (#37590) --- pandas/tests/indexing/test_loc.py | 176 ++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 26c9e127bcc10..74b40bc274cfb 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1559,6 +1559,182 @@ def test_loc_setitem_mask_and_label_with_datetimeindex(self): tm.assert_frame_equal(df, expected) +class TestLocSetitemWithExpansion: + @pytest.mark.slow + def test_loc_setitem_with_expansion_large_dataframe(self): + # GH#10692 + result = DataFrame({"x": range(10 ** 6)}, dtype="int64") + result.loc[len(result)] = len(result) + 1 + expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") + tm.assert_frame_equal(result, expected) + + +class TestLocCallable: + def test_frame_loc_getitem_callable(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + # iloc cannot use boolean Series (see GH3635) + + # return bool indexer + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + # scalar + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + def test_frame_loc_getitem_callable_mixture(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + def test_frame_loc_getitem_callable_labels(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"], :] + tm.assert_frame_equal(res, df.loc[["A", "C"], :]) + + res = df.loc[lambda x: ["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + # mixture + res = df.loc[["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + res = df.loc[lambda x: ["A", "C"], "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + def test_frame_loc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.copy() + res.loc[lambda x: ["A", "C"]] = -20 + exp = df.copy() + exp.loc[["A", "C"]] = -20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], :] = 20 + exp = df.copy() + exp.loc[["A", "C"], :] = 20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = [5, 10] + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) + exp = df.copy() + exp.loc[["A", "C"], "X"] = np.array([-1, -2]) + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[["A", "C"], lambda x: ["X"]] = 10 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], "X"] = -2 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -2 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], ["X"]] = -4 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = -4 + tm.assert_frame_equal(res, exp) + + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( From e9af595fb112d60738b6ee6f43db134a8d0ea224 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Sun, 8 Nov 2020 17:58:01 +0300 Subject: [PATCH 099/147] PEP8 --- pandas/tests/plotting/frame/test_frame_color.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 41c6578743bbf..517e3c109fc5b 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,7 +48,6 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -630,4 +629,4 @@ def test_colors_of_columns_with_same_name(self): df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() \ No newline at end of file + assert legend.get_color() == line.get_color() From 57f844c33175a9ff3c36f330e34013aa2b382df9 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 17:40:18 +0300 Subject: [PATCH 100/147] =?UTF-8?q?=D0=A1hange=20DateFrame=20to=20pd.DateF?= =?UTF-8?q?rame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/plotting/frame/test_frame_color.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 517e3c109fc5b..136ea43f2333f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -32,7 +32,7 @@ def setup_method(self, method): mpl.rcdefaults() self.tdf = tm.makeTimeDataFrame() - self.hexbin_df = DataFrame( + self.hexbin_df = pd.DataFrame( { "A": np.random.uniform(size=20), "B": np.random.uniform(size=20), @@ -50,7 +50,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = pd.DataFrame(randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -68,22 +68,22 @@ def test_mpl2_color_cycle_str(self): def test_color_single_series_list(self): # GH 3486 - df = DataFrame({"A": [1, 2, 3]}) + df = pd.DataFrame({"A": [1, 2, 3]}) _check_plot_works(df.plot, color=["red"]) def test_rgb_tuple_color(self): # GH 16695 - df = DataFrame({"x": [1, 2], "y": [3, 4]}) + df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(randn(10, 2)) + df = pd.DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") def test_color_and_style_arguments(self): - df = DataFrame({"x": [1, 2], "y": [3, 4]}) + df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) # passing both 'color' and 'style' arguments should be allowed # if there is no color symbol in the style strings: ax = df.plot(color=["red", "black"], style=["-", "--"]) @@ -107,7 +107,7 @@ def test_color_and_style_arguments(self): ) def test_color_and_marker(self, color, expected): # GH 21003 - df = DataFrame(np.random.random((7, 4))) + df = pd.DataFrame(np.random.random((7, 4))) ax = df.plot(color=color, style="d--") # check colors result = [i.get_color() for i in ax.lines] @@ -122,7 +122,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -227,7 +227,7 @@ def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -280,7 +280,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -349,7 +349,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = pd.DataFrame(rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -392,7 +392,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -429,7 +429,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = pd.DataFrame(rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -451,7 +451,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -519,7 +519,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -580,7 +580,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): ) def test_specified_props_kwd_plot_box(self, props, expected): # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) + df = pd.DataFrame({k: np.random.random(100) for k in "ABC"}) kwd = {props: dict(color="C1")} result = df.plot.box(return_type="dict", **kwd) @@ -593,14 +593,14 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(randn(5, 3)) + df = pd.DataFrame(randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) def test_invalid_colormap(self): - df = DataFrame(randn(3, 2), columns=["A", "B"]) + df = pd.DataFrame(randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") From f7984fcd56ee65e07abd4aad7fc252a17b64cc67 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 18:01:36 +0300 Subject: [PATCH 101/147] =?UTF-8?q?=D0=A1hange=20pd.DateFrame=20to=20DateF?= =?UTF-8?q?rame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/tests/plotting/frame/test_frame.py | 48 ++++++++--------- .../tests/plotting/frame/test_frame_color.py | 54 +++++++++---------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 4d339b93fd30d..03fb420517340 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -549,7 +549,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): pd.to_datetime("2017-08-02 00:00:00"), ], } - testdata = pd.DataFrame(data) + testdata = DataFrame(data) ax_period = testdata.plot(x="numeric", y="period") assert ( ax_period.get_lines()[0].get_data()[1] == testdata["period"].values @@ -1027,13 +1027,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = pd.DataFrame( + df1 = DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = pd.DataFrame( + df2 = DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -1076,7 +1076,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = pd.DataFrame(np.random.randn(10), columns=["a"]) + df = DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -1087,19 +1087,19 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = pd.DataFrame({"dates": dates, "vals": vals}) + df = DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) @@ -1111,7 +1111,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) @@ -1122,7 +1122,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = pd.DataFrame( + df = DataFrame( {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} ) @@ -1210,7 +1210,7 @@ def test_scatter_colorbar_different_cmap(self): # GH 33389 import matplotlib.pyplot as plt - df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) df["x2"] = df["x"] + 1 fig, ax = plt.subplots() @@ -1577,7 +1577,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1818,7 +1818,7 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = pd.DataFrame( + df = DataFrame( data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] ) fig, ax = self.plt.subplots(nrows=1, ncols=3) @@ -2696,7 +2696,7 @@ def test_plain_axes(self): def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) + DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2712,7 +2712,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = pd.DataFrame( + df = DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -2733,7 +2733,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -2748,7 +2748,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = pd.DataFrame([2, 4], index=[1, 2]) + df = DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -2760,7 +2760,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = pd.DataFrame( + df = DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -2783,7 +2783,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.rand(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -2798,7 +2798,7 @@ def test_subplots_sharex_false(self): def test_plot_no_rows(self): # GH 27758 - df = pd.DataFrame(columns=["foo"], dtype=int) + df = DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -2807,13 +2807,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = pd.DataFrame(["a", "b", "c"]) + df = DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -2823,7 +2823,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = pd.DataFrame( + df = DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -2854,7 +2854,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -2882,7 +2882,7 @@ def test_xlabel_ylabel_dataframe_subplots( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 136ea43f2333f..fefa342770c7f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -32,7 +32,7 @@ def setup_method(self, method): mpl.rcdefaults() self.tdf = tm.makeTimeDataFrame() - self.hexbin_df = pd.DataFrame( + self.hexbin_df = DataFrame( { "A": np.random.uniform(size=20), "B": np.random.uniform(size=20), @@ -50,7 +50,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): def test_mpl2_color_cycle_str(self): # GH 15516 - df = pd.DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -68,22 +68,22 @@ def test_mpl2_color_cycle_str(self): def test_color_single_series_list(self): # GH 3486 - df = pd.DataFrame({"A": [1, 2, 3]}) + df = DataFrame({"A": [1, 2, 3]}) _check_plot_works(df.plot, color=["red"]) def test_rgb_tuple_color(self): # GH 16695 - df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) + df = DataFrame({"x": [1, 2], "y": [3, 4]}) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = pd.DataFrame(randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") def test_color_and_style_arguments(self): - df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) + df = DataFrame({"x": [1, 2], "y": [3, 4]}) # passing both 'color' and 'style' arguments should be allowed # if there is no color symbol in the style strings: ax = df.plot(color=["red", "black"], style=["-", "--"]) @@ -107,7 +107,7 @@ def test_color_and_style_arguments(self): ) def test_color_and_marker(self, color, expected): # GH 21003 - df = pd.DataFrame(np.random.random((7, 4))) + df = DataFrame(np.random.random((7, 4))) ax = df.plot(color=color, style="d--") # check colors result = [i.get_color() for i in ax.lines] @@ -122,7 +122,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -155,7 +155,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = pd.DataFrame( + df = DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -176,7 +176,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +198,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +214,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( + df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -227,7 +227,7 @@ def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -270,7 +270,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -280,7 +280,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -349,7 +349,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = pd.DataFrame(rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -392,7 +392,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -429,7 +429,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = pd.DataFrame(rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -451,7 +451,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -519,7 +519,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -580,7 +580,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): ) def test_specified_props_kwd_plot_box(self, props, expected): # GH 30346 - df = pd.DataFrame({k: np.random.random(100) for k in "ABC"}) + df = DataFrame({k: np.random.random(100) for k in "ABC"}) kwd = {props: dict(color="C1")} result = df.plot.box(return_type="dict", **kwd) @@ -593,14 +593,14 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = pd.DataFrame(randn(5, 3)) + df = DataFrame(randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) def test_invalid_colormap(self): - df = pd.DataFrame(randn(3, 2), columns=["A", "B"]) + df = DataFrame(randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") @@ -610,7 +610,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,14 +618,14 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): From 156e51ee6c64152a038c320f55ac748a4dc7f489 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 19:30:44 +0300 Subject: [PATCH 102/147] Removing imports --- pandas/tests/plotting/frame/test_frame_color.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index fefa342770c7f..386482ea82ef9 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,8 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string import warnings import numpy as np @@ -11,17 +8,11 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting - @td.skip_if_no_mpl class TestDataFrameColor(TestPlotBase): From 8462f53650f20108132e9e2f8d181dde9d1b380f Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 19:31:18 +0300 Subject: [PATCH 103/147] Bug fixes --- pandas/tests/plotting/frame/test_frame.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 03fb420517340..cdda5e3d5ad88 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1122,9 +1122,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = DataFrame( - {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} - ) + df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) _check_plot_works(df.plot.scatter, x=x, y=y) @@ -1193,7 +1191,6 @@ def test_scatter_colors(self): df.plot.scatter(x="a", y="b", c="c", color="green") default_colors = self._unpack_cycler(self.plt.rcParams) - ax = df.plot.scatter(x="a", y="b", c="c") tm.assert_numpy_array_equal( ax.collections[0].get_facecolor()[0], @@ -1818,9 +1815,7 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = DataFrame( - data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] - ) + df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) From 1337b43c93a703bd988ab0fdb07230d22610fec3 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 19:31:18 +0300 Subject: [PATCH 104/147] Bug fixes --- pandas/tests/plotting/frame/test_frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index cdda5e3d5ad88..9b01bf4adccb5 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1104,7 +1104,6 @@ def test_scatterplot_object_data(self): _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow def test_if_hexbin_xaxis_label_is_visible(self): # addressing issue #10678, to ensure colobar does not From ada4fad6688bd1c0708b288d22c03fe1e8560296 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 21:47:00 +0300 Subject: [PATCH 105/147] Fix incorrect merge --- pandas/tests/plotting/frame/test_frame.py | 743 +----------------- .../tests/plotting/frame/test_frame_color.py | 17 - 2 files changed, 17 insertions(+), 743 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 9b01bf4adccb5..d2d4e2aad24c8 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -337,412 +337,6 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 - def test_negative_log(self): df = -DataFrame( rand(6, 4), @@ -939,46 +533,6 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(rand(5, 5)) @@ -1184,38 +738,6 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_plot_bar(self): df = DataFrame( @@ -1249,164 +771,6 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - - @pytest.mark.slow - def test_bar_log_no_subplots(self): - # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 - # regressions in 1.2.1 - expected = np.array([0.1, 1.0, 10.0, 100]) - - # no subplots - df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) - ax = df.plot.bar(grid=True, log=True) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_bar_log_subplots(self): - expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) - - ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( - log=True, subplots=True - ) - - tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) - tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -1485,26 +849,6 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") - @pytest.mark.slow - def test_boxplot_subplots_return_type(self): - df = self.hist_df - - # normal style: return_type=None - result = df.plot.box(subplots=True) - assert isinstance(result, Series) - self._check_box_return_type( - result, None, expected_keys=["height", "weight", "category"] - ) - - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) - @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): @@ -1898,6 +1242,23 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -2037,13 +1398,6 @@ def test_hexbin_cmap(self): ax = df.plot.hexbin(x="A", y="B", colormap=cm) assert ax.collections[0].cmap.name == cm - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df @@ -2483,53 +1837,6 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines - @pytest.mark.slow - def test_df_subplots_patterns_minorticks(self): - # GH 10657 - import matplotlib.pyplot as plt - - df = DataFrame( - np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), - columns=list("AB"), - ) - - # shared subplots - fig, axes = plt.subplots(2, 1, sharex=True) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - fig, axes = plt.subplots(2, 1) - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # not shared - fig, axes = plt.subplots(2, 1) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -2774,22 +2081,6 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes - def test_subplots_sharex_false(self): - # test when sharex is set to False, two plots should have different - # labels, GH 25160 - df = DataFrame(np.random.rand(10, 2)) - df.iloc[5:, 1] = np.nan - df.iloc[:5, 0] = np.nan - - figs, axs = self.plt.subplots(2, 1) - df.plot.line(ax=axs, subplots=True, sharex=False) - - expected_ax1 = np.arange(4.5, 10, 0.5) - expected_ax2 = np.arange(-0.5, 5, 0.5) - - tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) - tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) - def test_plot_no_rows(self): # GH 27758 df = DataFrame(columns=["foo"], dtype=int) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 386482ea82ef9..a1c32b941a2f2 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -560,23 +560,6 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes="red", xxxx="blue")) - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - def test_default_color_cycle(self): import cycler import matplotlib.pyplot as plt From e9253d5c24436228efc78f5e7f9e3fb6a7e41f9c Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 22:43:13 +0300 Subject: [PATCH 106/147] test_frame_color.py edit --- .../tests/plotting/frame/test_frame_color.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index a1c32b941a2f2..2cf327a85c6a7 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -604,3 +604,43 @@ def test_colors_of_columns_with_same_name(self): result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): assert legend.get_color() == line.get_color() + + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" From acc2a24efc667e42c05bfa73164ab05d642b5691 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 23:36:53 +0300 Subject: [PATCH 107/147] Transfer tests of test_frame.py to test_frame_color.py, test_frame_groupby.py and test_frame_subplots.py --- pandas/tests/plotting/frame/__init__.py | 0 .../tests/plotting/{ => frame}/test_frame.py | 1339 ----------------- .../tests/plotting/frame/test_frame_color.py | 672 +++++++++ .../plotting/frame/test_frame_groupby.py | 107 ++ .../plotting/frame/test_frame_subplots.py | 704 +++++++++ 5 files changed, 1483 insertions(+), 1339 deletions(-) create mode 100644 pandas/tests/plotting/frame/__init__.py rename pandas/tests/plotting/{ => frame}/test_frame.py (61%) create mode 100644 pandas/tests/plotting/frame/test_frame_color.py create mode 100644 pandas/tests/plotting/frame/test_frame_groupby.py create mode 100644 pandas/tests/plotting/frame/test_frame_subplots.py diff --git a/pandas/tests/plotting/frame/__init__.py b/pandas/tests/plotting/frame/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/frame/test_frame.py similarity index 61% rename from pandas/tests/plotting/test_frame.py rename to pandas/tests/plotting/frame/test_frame.py index 11a46858ba281..ee9e98fb7f3b8 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -167,74 +167,6 @@ def test_integer_array_plot(self): _check_plot_works(df.plot.scatter, x="x", y="y") _check_plot_works(df.plot.hexbin, x="x", y="y") - def test_mpl2_color_cycle_str(self): - # GH 15516 - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) - colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always", "MatplotlibDeprecationWarning") - - for color in colors: - _check_plot_works(df.plot, color=color) - - # if warning is raised, check that it is the exact problematic one - # GH 36972 - if w: - match = "Support for uppercase single-letter colors is deprecated" - warning_message = str(w[0].message) - msg = "MatplotlibDeprecationWarning related to CN colors was raised" - assert match not in warning_message, msg - - def test_color_single_series_list(self): - # GH 3486 - df = DataFrame({"A": [1, 2, 3]}) - _check_plot_works(df.plot, color=["red"]) - - def test_rgb_tuple_color(self): - # GH 16695 - df = DataFrame({"x": [1, 2], "y": [3, 4]}) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) - - def test_color_empty_string(self): - df = DataFrame(np.random.randn(10, 2)) - with pytest.raises(ValueError): - df.plot(color="") - - def test_color_and_style_arguments(self): - df = DataFrame({"x": [1, 2], "y": [3, 4]}) - # passing both 'color' and 'style' arguments should be allowed - # if there is no color symbol in the style strings: - ax = df.plot(color=["red", "black"], style=["-", "--"]) - # check that the linestyles are correctly set: - linestyle = [line.get_linestyle() for line in ax.lines] - assert linestyle == ["-", "--"] - # check that the colors are correctly set: - color = [line.get_color() for line in ax.lines] - assert color == ["red", "black"] - # passing both 'color' and 'style' arguments should not be allowed - # if there is a color symbol in the style strings: - with pytest.raises(ValueError): - df.plot(color=["red", "black"], style=["k-", "r--"]) - - @pytest.mark.parametrize( - "color, expected", - [ - ("green", ["green"] * 4), - (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]), - ], - ) - def test_color_and_marker(self, color, expected): - # GH 21003 - df = DataFrame(np.random.random((7, 4))) - ax = df.plot(color=color, style="d--") - # check colors - result = [i.get_color() for i in ax.lines] - assert result == expected - # check markers and linestyles - assert all(i.get_linestyle() == "--" for i in ax.lines) - assert all(i.get_marker() == "d" for i in ax.lines) - def test_nonnumeric_exclude(self): df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}) ax = df.plot() @@ -404,412 +336,6 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 - def test_negative_log(self): df = -DataFrame( np.random.rand(6, 4), @@ -952,60 +478,6 @@ def test_area_lim(self): ymin, ymax = ax.get_ylim() assert ymax == 0 - @pytest.mark.slow - def test_bar_colors(self): - import matplotlib.pyplot as plt - - default_colors = self._unpack_cycler(plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - ax = df.plot.bar() - self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) - tm.close() - - custom_colors = "rgcby" - ax = df.plot.bar(color=custom_colors) - self._check_colors(ax.patches[::5], facecolors=custom_colors) - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - ax = df.plot.bar(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::5], facecolors=rgba_colors) - tm.close() - - # Test colormap functionality - ax = df.plot.bar(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::5], facecolors=rgba_colors) - tm.close() - - ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") - self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) - tm.close() - - ax = df.plot(kind="bar", color="green") - self._check_colors(ax.patches[::5], facecolors=["green"] * 5) - tm.close() - - def test_bar_user_colors(self): - df = DataFrame( - {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} - ) - # This should *only* work when `y` is specified, else - # we use one color per column - ax = df.plot.bar(y="A", color=df["color"]) - result = [p.get_facecolor() for p in ax.patches] - expected = [ - (1.0, 0.0, 0.0, 1.0), - (0.0, 0.0, 1.0, 1.0), - (0.0, 0.0, 1.0, 1.0), - (1.0, 0.0, 0.0, 1.0), - ] - assert result == expected - @pytest.mark.slow def test_bar_linewidth(self): df = DataFrame(np.random.randn(5, 5)) @@ -1065,46 +537,6 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(np.random.randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(np.random.randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(np.random.rand(5, 5)) @@ -1230,60 +662,6 @@ def test_scatterplot_object_data(self): _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow - def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): - # addressing issue #10611, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax1 = df.plot.scatter(x="A label", y="B label") - ax2 = df.plot.scatter(x="A label", y="B label", c="C label") - - vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] - vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] - assert vis1 == vis2 - - vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] - vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] - assert vis1 == vis2 - - assert ( - ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() - ) - - @pytest.mark.slow - def test_if_hexbin_xaxis_label_is_visible(self): - # addressing issue #10678, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax = df.plot.hexbin("A label", "B label", gridsize=12) - assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) - assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) - assert ax.xaxis.get_label().get_visible() - - @pytest.mark.slow - def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): - import matplotlib.pyplot as plt - - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - fig, axes = plt.subplots(1, 2) - df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) - df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) - plt.tight_layout() - - points = np.array([ax.get_position().get_points() for ax in fig.axes]) - axes_x_coords = points[:, :, 0] - parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] - colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] - assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() - @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): @@ -1344,17 +722,6 @@ def test_plot_scatter_with_c(self): float_array = np.array([0.0, 1.0]) df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") - @pytest.mark.parametrize("cmap", [None, "Greys"]) - def test_scatter_with_c_column_name_with_colors(self, cmap): - # https://github.com/pandas-dev/pandas/issues/34316 - df = DataFrame( - [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], - columns=["length", "width"], - ) - df["species"] = ["r", "r", "g", "g", "b"] - ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) - assert ax.collections[0].colorbar is None - def test_plot_scatter_with_s(self): # this refers to GH 32904 df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) @@ -1362,39 +729,6 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_plot_bar(self): df = DataFrame( @@ -1430,164 +764,6 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(np.random.randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - - @pytest.mark.slow - def test_bar_log_no_subplots(self): - # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 - # regressions in 1.2.1 - expected = np.array([0.1, 1.0, 10.0, 100]) - - # no subplots - df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) - ax = df.plot.bar(grid=True, log=True) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_bar_log_subplots(self): - expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) - - ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( - log=True, subplots=True - ) - - tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) - tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -1666,26 +842,6 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") - @pytest.mark.slow - def test_boxplot_subplots_return_type(self): - df = self.hist_df - - # normal style: return_type=None - result = df.plot.box(subplots=True) - assert isinstance(result, Series) - self._check_box_return_type( - result, None, expected_keys=["height", "weight", "category"] - ) - - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) - @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): @@ -2078,352 +1234,6 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" - @pytest.mark.slow - def test_line_colors(self): - from matplotlib import cm - - custom_colors = "rgcby" - df = DataFrame(np.random.randn(5, 5)) - - ax = df.plot(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - - tm.close() - - ax2 = df.plot(color=custom_colors) - lines2 = ax2.get_lines() - - for l1, l2 in zip(ax.get_lines(), lines2): - assert l1.get_color() == l2.get_color() - - tm.close() - - ax = df.plot(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - ax = df.plot(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - ax = df.loc[:, [0]].plot(color="DodgerBlue") - self._check_colors(ax.lines, linecolors=["DodgerBlue"]) - - ax = df.plot(color="red") - self._check_colors(ax.get_lines(), linecolors=["red"] * 5) - tm.close() - - # GH 10299 - custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] - ax = df.plot(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - tm.close() - - @pytest.mark.slow - def test_dont_modify_colors(self): - colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) - assert len(colors) == 3 - - @pytest.mark.slow - def test_line_colors_and_styles_subplots(self): - # GH 9894 - from matplotlib import cm - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - - axes = df.plot(subplots=True) - for ax, c in zip(axes, list(default_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # single color char - axes = df.plot(subplots=True, color="k") - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["k"]) - tm.close() - - # single color str - axes = df.plot(subplots=True, color="green") - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["green"]) - tm.close() - - custom_colors = "rgcby" - axes = df.plot(color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - axes = df.plot(color=list(custom_colors), subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # GH 10299 - custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] - axes = df.plot(color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - for cmap in ["jet", cm.jet]: - axes = df.plot(colormap=cmap, subplots=True) - for ax, c in zip(axes, rgba_colors): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) - self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) - - # single character style - axes = df.plot(style="r", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["r"]) - tm.close() - - # list of styles - styles = list("rgcby") - axes = df.plot(style=styles, subplots=True) - for ax, c in zip(axes, styles): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - @pytest.mark.slow - def test_area_colors(self): - from matplotlib import cm - from matplotlib.collections import PolyCollection - - custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) - - ax = df.plot.area(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - self._check_colors(poly, facecolors=custom_colors) - - handles, labels = ax.get_legend_handles_labels() - self._check_colors(handles, facecolors=custom_colors) - - for h in handles: - assert h.get_alpha() is None - tm.close() - - ax = df.plot.area(colormap="jet") - jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=jet_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - self._check_colors(poly, facecolors=jet_colors) - - handles, labels = ax.get_legend_handles_labels() - self._check_colors(handles, facecolors=jet_colors) - for h in handles: - assert h.get_alpha() is None - tm.close() - - # When stacked=False, alpha is set to 0.5 - ax = df.plot.area(colormap=cm.jet, stacked=False) - self._check_colors(ax.get_lines(), linecolors=jet_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] - self._check_colors(poly, facecolors=jet_with_alpha) - - handles, labels = ax.get_legend_handles_labels() - linecolors = jet_with_alpha - self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) - for h in handles: - assert h.get_alpha() == 0.5 - - @pytest.mark.slow - def test_hist_colors(self): - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - ax = df.plot.hist() - self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) - tm.close() - - custom_colors = "rgcby" - ax = df.plot.hist(color=custom_colors) - self._check_colors(ax.patches[::10], facecolors=custom_colors) - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - ax = df.plot.hist(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::10], facecolors=rgba_colors) - tm.close() - - # Test colormap functionality - ax = df.plot.hist(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::10], facecolors=rgba_colors) - tm.close() - - ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") - self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) - - ax = df.plot(kind="hist", color="green") - self._check_colors(ax.patches[::10], facecolors=["green"] * 5) - tm.close() - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_colors(self): - from matplotlib import cm - - custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) - - ax = df.plot.kde(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - tm.close() - - ax = df.plot.kde(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - ax = df.plot.kde(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_colors_and_styles_subplots(self): - from matplotlib import cm - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - - axes = df.plot(kind="kde", subplots=True) - for ax, c in zip(axes, list(default_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # single color char - axes = df.plot(kind="kde", color="k", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["k"]) - tm.close() - - # single color str - axes = df.plot(kind="kde", color="red", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["red"]) - tm.close() - - custom_colors = "rgcby" - axes = df.plot(kind="kde", color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - for cmap in ["jet", cm.jet]: - axes = df.plot(kind="kde", colormap=cmap, subplots=True) - for ax, c in zip(axes, rgba_colors): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) - self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) - - # single character style - axes = df.plot(kind="kde", style="r", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["r"]) - tm.close() - - # list of styles - styles = list("rgcby") - axes = df.plot(kind="kde", style=styles, subplots=True) - for ax, c in zip(axes, styles): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - @pytest.mark.slow - def test_boxplot_colors(self): - def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): - # TODO: outside this func? - if fliers_c is None: - fliers_c = "k" - self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) - self._check_colors( - bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) - ) - self._check_colors( - bp["medians"], linecolors=[medians_c] * len(bp["medians"]) - ) - self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) - self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - bp = df.plot.box(return_type="dict") - _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) - tm.close() - - dict_colors = dict( - boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456" - ) - bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") - _check_colors( - bp, - dict_colors["boxes"], - dict_colors["whiskers"], - dict_colors["medians"], - dict_colors["caps"], - "r", - ) - tm.close() - - # partial colors - dict_colors = dict(whiskers="c", medians="m") - bp = df.plot.box(color=dict_colors, return_type="dict") - _check_colors(bp, default_colors[0], "c", "m") - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - bp = df.plot.box(colormap="jet", return_type="dict") - jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] - _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) - tm.close() - - # Test colormap functionality - bp = df.plot.box(colormap=cm.jet, return_type="dict") - _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) - tm.close() - - # string color is applied to all artists except fliers - bp = df.plot.box(color="DodgerBlue", return_type="dict") - _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") - - # tuple is also applied to all artists except fliers - bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") - _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") - - with pytest.raises(ValueError): - # Color contains invalid key results in ValueError - df.plot.box(color=dict(boxes="red", xxxx="blue")) - @pytest.mark.parametrize( "props, expected", [ @@ -2441,19 +1251,6 @@ def test_specified_props_kwd_plot_box(self, props, expected): assert result[expected][0].get_color() == "C1" - def test_default_color_cycle(self): - import cycler - import matplotlib.pyplot as plt - - colors = list("rgbk") - plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - - df = DataFrame(np.random.randn(5, 3)) - ax = df.plot() - - expected = self._unpack_cycler(plt.rcParams)[:3] - self._check_colors(ax.get_lines(), linecolors=expected) - def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -2595,19 +1392,6 @@ def test_hexbin_cmap(self, kwargs, expected): ax = df.plot.hexbin(x="A", y="B", **kwargs) assert ax.collections[0].cmap.name == expected - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - - @pytest.mark.slow - def test_mixing_cmap_and_colormap_raises(self): - df = self.hexbin_df - msg = "Only specify one of `cmap` and `colormap`" - with pytest.raises(TypeError, match=msg): - df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") - @pytest.mark.slow def test_pie_df(self): df = DataFrame( @@ -3046,53 +1830,6 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines - @pytest.mark.slow - def test_df_subplots_patterns_minorticks(self): - # GH 10657 - import matplotlib.pyplot as plt - - df = DataFrame( - np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), - columns=list("AB"), - ) - - # shared subplots - fig, axes = plt.subplots(2, 1, sharex=True) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - fig, axes = plt.subplots(2, 1) - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # not shared - fig, axes = plt.subplots(2, 1) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -3218,12 +1955,6 @@ def test_df_grid_settings(self): kws={"x": "a", "y": "b"}, ) - def test_invalid_colormap(self): - df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) - - with pytest.raises(ValueError): - df.plot(colormap="invalid_colormap") - def test_plain_axes(self): # supplied ax itself is a SubplotAxes, but figure contains also @@ -3255,22 +1986,6 @@ def test_plain_axes(self): Series(np.random.rand(10)).plot(ax=ax) Series(np.random.rand(10)).plot(ax=iax) - def test_passed_bar_colors(self): - import matplotlib as mpl - - color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - colormap = mpl.colors.ListedColormap(color_tuples) - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) - assert color_tuples == [c.get_facecolor() for c in barplot.patches] - - def test_rcParams_bar_colors(self): - import matplotlib as mpl - - color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") - assert color_tuples == [c.get_facecolor() for c in barplot.patches] - @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 @@ -3359,22 +2074,6 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes - def test_subplots_sharex_false(self): - # test when sharex is set to False, two plots should have different - # labels, GH 25160 - df = DataFrame(np.random.rand(10, 2)) - df.iloc[5:, 1] = np.nan - df.iloc[:5, 0] = np.nan - - figs, axs = self.plt.subplots(2, 1) - df.plot.line(ax=axs, subplots=True, sharex=False) - - expected_ax1 = np.arange(4.5, 10, 0.5) - expected_ax2 = np.arange(-0.5, 5, 0.5) - - tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) - tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) - def test_plot_no_rows(self): # GH 27758 df = DataFrame(columns=["foo"], dtype=int) @@ -3418,16 +2117,6 @@ def test_missing_markers_legend_using_style(self): self._check_legend_labels(ax, labels=["A", "B", "C"]) self._check_legend_marker(ax, expected_markers=[".", ".", "."]) - def test_colors_of_columns_with_same_name(self): - # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 - # Creating a DataFrame with duplicate column labels and testing colors of them. - df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = DataFrame({"a": [2, 4, 6]}) - df_concat = pd.concat([df, df1], axis=1) - result = df_concat.plot() - for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() - @pytest.mark.parametrize( "index_name, old_label, new_label", [ @@ -3477,34 +2166,6 @@ def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) - @pytest.mark.parametrize( - "index_name, old_label, new_label", - [ - (None, "", "new"), - ("old", "old", "new"), - (None, "", ""), - (None, "", 1), - (None, "", [1, 2]), - ], - ) - @pytest.mark.parametrize("kind", ["line", "area", "bar"]) - def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label - ): - # GH 9093 - df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) - df.index.name = index_name - - # default is the ylabel is not shown and xlabel is index name - axes = df.plot(kind=kind, subplots=True) - assert all(ax.get_ylabel() == "" for ax in axes) - assert all(ax.get_xlabel() == old_label for ax in axes) - - # old xlabel will be overriden and assigned ylabel will be used as ylabel - axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) - assert all(ax.get_ylabel() == str(new_label) for ax in axes) - assert all(ax.get_xlabel() == str(new_label) for ax in axes) - def _generate_4_axes_via_gridspec(): import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py new file mode 100644 index 0000000000000..66d25cca60593 --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -0,0 +1,672 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFrameColor(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + def test_mpl2_color_cycle_str(self): + # GH 15516 + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", "MatplotlibDeprecationWarning") + + for color in colors: + _check_plot_works(df.plot, color=color) + + # if warning is raised, check that it is the exact problematic one + # GH 36972 + if w: + match = "Support for uppercase single-letter colors is deprecated" + warning_message = str(w[0].message) + msg = "MatplotlibDeprecationWarning related to CN colors was raised" + assert match not in warning_message, msg + + def test_color_single_series_list(self): + # GH 3486 + df = DataFrame({"A": [1, 2, 3]}) + _check_plot_works(df.plot, color=["red"]) + + def test_rgb_tuple_color(self): + # GH 16695 + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) + + def test_color_empty_string(self): + df = DataFrame(np.random.randn(10, 2)) + with pytest.raises(ValueError): + df.plot(color="") + + def test_color_and_style_arguments(self): + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + # passing both 'color' and 'style' arguments should be allowed + # if there is no color symbol in the style strings: + ax = df.plot(color=["red", "black"], style=["-", "--"]) + # check that the linestyles are correctly set: + linestyle = [line.get_linestyle() for line in ax.lines] + assert linestyle == ["-", "--"] + # check that the colors are correctly set: + color = [line.get_color() for line in ax.lines] + assert color == ["red", "black"] + # passing both 'color' and 'style' arguments should not be allowed + # if there is a color symbol in the style strings: + with pytest.raises(ValueError): + df.plot(color=["red", "black"], style=["k-", "r--"]) + + @pytest.mark.parametrize( + "color, expected", + [ + ("green", ["green"] * 4), + (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]), + ], + ) + def test_color_and_marker(self, color, expected): + # GH 21003 + df = DataFrame(np.random.random((7, 4))) + ax = df.plot(color=color, style="d--") + # check colors + result = [i.get_color() for i in ax.lines] + assert result == expected + # check markers and linestyles + assert all(i.get_linestyle() == "--" for i in ax.lines) + assert all(i.get_marker() == "d" for i in ax.lines) + + @pytest.mark.slow + def test_bar_colors(self): + import matplotlib.pyplot as plt + + default_colors = self._unpack_cycler(plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + ax = df.plot.bar() + self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.bar(color=custom_colors) + self._check_colors(ax.patches[::5], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.bar(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.bar(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + tm.close() + + ax = df.plot(kind="bar", color="green") + self._check_colors(ax.patches[::5], facecolors=["green"] * 5) + tm.close() + + def test_bar_user_colors(self): + df = DataFrame( + {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} + ) + # This should *only* work when `y` is specified, else + # we use one color per column + ax = df.plot.bar(y="A", color=df["color"]) + result = [p.get_facecolor() for p in ax.patches] + expected = [ + (1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (1.0, 0.0, 0.0, 1.0), + ] + assert result == expected + + @pytest.mark.slow + def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): + # addressing issue #10611, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax1 = df.plot.scatter(x="A label", y="B label") + ax2 = df.plot.scatter(x="A label", y="B label", c="C label") + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] + assert vis1 == vis2 + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] + assert vis1 == vis2 + + assert ( + ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() + ) + + @pytest.mark.slow + def test_if_hexbin_xaxis_label_is_visible(self): + # addressing issue #10678, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax = df.plot.hexbin("A label", "B label", gridsize=12) + assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) + assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) + assert ax.xaxis.get_label().get_visible() + + @pytest.mark.slow + def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): + import matplotlib.pyplot as plt + + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + fig, axes = plt.subplots(1, 2) + df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) + df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) + plt.tight_layout() + + points = np.array([ax.get_position().get_points() for ax in fig.axes]) + axes_x_coords = points[:, :, 0] + parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] + colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] + assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() + + @pytest.mark.parametrize("cmap", [None, "Greys"]) + def test_scatter_with_c_column_name_with_colors(self, cmap): + # https://github.com/pandas-dev/pandas/issues/34316 + df = DataFrame( + [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], + columns=["length", "width"], + ) + df["species"] = ["r", "r", "g", "g", "b"] + ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) + assert ax.collections[0].colorbar is None + + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + + @pytest.mark.slow + def test_line_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(np.random.randn(5, 5)) + + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + + tm.close() + + ax2 = df.plot(color=custom_colors) + lines2 = ax2.get_lines() + + for l1, l2 in zip(ax.get_lines(), lines2): + assert l1.get_color() == l2.get_color() + + tm.close() + + ax = df.plot(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + ax = df.loc[:, [0]].plot(color="DodgerBlue") + self._check_colors(ax.lines, linecolors=["DodgerBlue"]) + + ax = df.plot(color="red") + self._check_colors(ax.get_lines(), linecolors=["red"] * 5) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + @pytest.mark.slow + def test_dont_modify_colors(self): + colors = ["r", "g", "b"] + DataFrame(np.random.rand(10, 2)).plot(color=colors) + assert len(colors) == 3 + + @pytest.mark.slow + def test_line_colors_and_styles_subplots(self): + # GH 9894 + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + + axes = df.plot(subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(subplots=True, color="k") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(subplots=True, color="green") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["green"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + axes = df.plot(color=list(custom_colors), subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_area_colors(self): + from matplotlib import cm + from matplotlib.collections import PolyCollection + + custom_colors = "rgcby" + df = DataFrame(np.random.rand(5, 5)) + + ax = df.plot.area(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=custom_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=custom_colors) + + for h in handles: + assert h.get_alpha() is None + tm.close() + + ax = df.plot.area(colormap="jet") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=jet_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=jet_colors) + for h in handles: + assert h.get_alpha() is None + tm.close() + + # When stacked=False, alpha is set to 0.5 + ax = df.plot.area(colormap=cm.jet, stacked=False) + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] + self._check_colors(poly, facecolors=jet_with_alpha) + + handles, labels = ax.get_legend_handles_labels() + linecolors = jet_with_alpha + self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) + for h in handles: + assert h.get_alpha() == 0.5 + + @pytest.mark.slow + def test_hist_colors(self): + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + ax = df.plot.hist() + self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.hist(color=custom_colors) + self._check_colors(ax.patches[::10], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.hist(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.hist(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + + ax = df.plot(kind="hist", color="green") + self._check_colors(ax.patches[::10], facecolors=["green"] * 5) + tm.close() + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(np.random.rand(5, 5)) + + ax = df.plot.kde(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + ax = df.plot.kde(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot.kde(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors_and_styles_subplots(self): + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + + axes = df.plot(kind="kde", subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(kind="kde", color="k", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(kind="kde", color="red", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["red"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(kind="kde", color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(kind="kde", colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(kind="kde", style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(kind="kde", style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_boxplot_colors(self): + def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): + # TODO: outside this func? + if fliers_c is None: + fliers_c = "k" + self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) + self._check_colors( + bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) + ) + self._check_colors( + bp["medians"], linecolors=[medians_c] * len(bp["medians"]) + ) + self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) + self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + bp = df.plot.box(return_type="dict") + _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) + tm.close() + + dict_colors = dict( + boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456" + ) + bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") + _check_colors( + bp, + dict_colors["boxes"], + dict_colors["whiskers"], + dict_colors["medians"], + dict_colors["caps"], + "r", + ) + tm.close() + + # partial colors + dict_colors = dict(whiskers="c", medians="m") + bp = df.plot.box(color=dict_colors, return_type="dict") + _check_colors(bp, default_colors[0], "c", "m") + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + bp = df.plot.box(colormap="jet", return_type="dict") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # Test colormap functionality + bp = df.plot.box(colormap=cm.jet, return_type="dict") + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # string color is applied to all artists except fliers + bp = df.plot.box(color="DodgerBlue", return_type="dict") + _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") + + # tuple is also applied to all artists except fliers + bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") + _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") + + with pytest.raises(ValueError): + # Color contains invalid key results in ValueError + df.plot.box(color=dict(boxes="red", xxxx="blue")) + + def test_default_color_cycle(self): + import cycler + import matplotlib.pyplot as plt + + colors = list("rgbk") + plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) + + df = DataFrame(np.random.randn(5, 3)) + ax = df.plot() + + expected = self._unpack_cycler(plt.rcParams)[:3] + self._check_colors(ax.get_lines(), linecolors=expected) + + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + @pytest.mark.slow + def test_mixing_cmap_and_colormap_raises(self): + df = self.hexbin_df + msg = "Only specify one of `cmap` and `colormap`" + with pytest.raises(TypeError, match=msg): + df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") + + def test_passed_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + colormap = mpl.colors.ListedColormap(color_tuples) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_rcParams_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_colors_of_columns_with_same_name(self): + # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 + # Creating a DataFrame with duplicate column labels and testing colors of them. + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) + df_concat = pd.concat([df, df1], axis=1) + result = df_concat.plot() + for legend, line in zip(result.get_legend().legendHandles, result.lines): + assert legend.get_color() == line.get_color() + + def test_invalid_colormap(self): + df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) + + with pytest.raises(ValueError): + df.plot(colormap="invalid_colormap") diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py new file mode 100644 index 0000000000000..968fa65e63e79 --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -0,0 +1,107 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFramePlotsGroupby(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py new file mode 100644 index 0000000000000..de3d65ebe3d4c --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -0,0 +1,704 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFramePlotsSubplots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [ + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), + ], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(np.random.randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(np.random.randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(np.random.randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes From 4dcc320af7790bc5db0bc12f13f4db465121cd13 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 23:49:21 +0300 Subject: [PATCH 108/147] Removing unnecessary imports --- pandas/tests/plotting/frame/test_frame_color.py | 10 +--------- .../tests/plotting/frame/test_frame_groupby.py | 17 ++--------------- .../tests/plotting/frame/test_frame_subplots.py | 9 ++------- 3 files changed, 5 insertions(+), 31 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 66d25cca60593..0c8086dd6a7a9 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,8 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string import warnings import numpy as np @@ -10,17 +7,12 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting - @td.skip_if_no_mpl class TestDataFrameColor(TestPlotBase): diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 968fa65e63e79..06ce0d5076d69 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,25 +1,12 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string -import warnings - import numpy as np -import pytest import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - -import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works - -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting +from pandas.tests.plotting.common import TestPlotBase @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index de3d65ebe3d4c..dfc98ad887aef 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -1,7 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools import string import warnings @@ -10,16 +8,13 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame, Series, date_range import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import TestPlotBase from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting @td.skip_if_no_mpl From de5b17c6adb74b656bf808d385ae8828f952f2ff Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Tue, 10 Nov 2020 00:20:11 +0300 Subject: [PATCH 109/147] PEP8 --- pandas/tests/plotting/frame/test_frame_color.py | 1 - pandas/tests/plotting/frame/test_frame_subplots.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 0c8086dd6a7a9..74eb87862b9d1 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -7,7 +7,6 @@ import pandas.util._test_decorators as td - import pandas as pd from pandas import DataFrame import pandas._testing as tm diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index dfc98ad887aef..4a9f85d61ba2a 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -8,7 +8,6 @@ import pandas.util._test_decorators as td - import pandas as pd from pandas import DataFrame, Series, date_range import pandas._testing as tm From d29832ac25d9f0bd71a494ee922349bbf2952fce Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Tue, 10 Nov 2020 00:42:09 +0300 Subject: [PATCH 110/147] # Conflicts: # pandas/tests/plotting/frame/test_frame.py # pandas/tests/plotting/frame/test_frame_color.py # pandas/tests/plotting/frame/test_frame_subplots.py --- pandas/tests/plotting/frame/test_frame.py | 212 ++++++++---------- .../tests/plotting/frame/test_frame_color.py | 129 ++++++----- .../plotting/frame/test_frame_subplots.py | 80 +++---- 3 files changed, 212 insertions(+), 209 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index d2d4e2aad24c8..ee9e98fb7f3b8 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -6,7 +6,6 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -175,14 +174,14 @@ def test_nonnumeric_exclude(self): @pytest.mark.slow def test_implicit_label(self): - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) ax = df.plot(x="a", y="b") self._check_text_labels(ax.xaxis.get_label(), "a") @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(randn(2, 2), columns=["a", "b"]) + df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -339,7 +338,7 @@ def test_unsorted_index_lims(self): def test_negative_log(self): df = -DataFrame( - rand(6, 4), + np.random.rand(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -358,15 +357,20 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): def test_line_area_stacked(self): with tm.RNGContext(42): - df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) + df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"]) neg_df = -df # each column has either positive or negative value sep_df = DataFrame( - {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} + { + "w": np.random.rand(6), + "x": np.random.rand(6), + "y": -np.random.rand(6), + "z": -np.random.rand(6), + } ) # each column has positive-negative mixed value mixed_df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -434,7 +438,7 @@ def test_line_area_nan_df(self): tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) def test_line_lim(self): - df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -458,7 +462,7 @@ def test_line_lim(self): assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): - df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) + df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) neg_df = -df for stacked in [True, False]: @@ -476,7 +480,7 @@ def test_area_lim(self): @pytest.mark.slow def test_bar_linewidth(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) # regular ax = df.plot.bar(linewidth=2) @@ -497,7 +501,7 @@ def test_bar_linewidth(self): @pytest.mark.slow def test_bar_barwidth(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) width = 0.9 @@ -535,7 +539,7 @@ def test_bar_barwidth(self): @pytest.mark.slow def test_bar_bottom_left(self): - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -611,7 +615,7 @@ def test_bar_categorical(self): @pytest.mark.slow def test_plot_scatter(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -658,19 +662,6 @@ def test_scatterplot_object_data(self): _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow - def test_if_hexbin_xaxis_label_is_visible(self): - # addressing issue #10678, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax = df.plot.hexbin("A label", "B label", gridsize=12) - assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) - assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) - assert ax.xaxis.get_label().get_visible() - @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): @@ -682,7 +673,7 @@ def test_plot_scatter_with_categorical_data(self, x, y): @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -741,7 +732,7 @@ def test_plot_scatter_with_s(self): @pytest.mark.slow def test_plot_bar(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -754,7 +745,9 @@ def test_plot_bar(self): _check_plot_works(df.plot.bar, stacked=True) df = DataFrame( - randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) + np.random.randn(10, 15), + index=list(string.ascii_letters[:10]), + columns=range(15), ) _check_plot_works(df.plot.bar) @@ -830,7 +823,7 @@ def test_boxplot_vertical(self): @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -852,7 +845,7 @@ def test_boxplot_return_type(self): @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(randn(100, 4)) + df = DataFrame(np.random.randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) @@ -879,7 +872,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle - df = DataFrame(randn(100, 4)) + df = DataFrame(np.random.randn(100, 4)) series = df[0] ax = _check_plot_works(df.plot.hist) @@ -1087,16 +1080,16 @@ def test_hist_df_coord(self): @pytest.mark.slow def test_plot_int_columns(self): - df = DataFrame(randn(100, 4)).cumsum() + df = DataFrame(np.random.randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.slow def test_df_legend_labels(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) for kind in kinds: @@ -1125,9 +1118,9 @@ def test_df_legend_labels(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) ax = df.plot(legend=True, secondary_y="b") self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) ax = df2.plot(legend=False, ax=ax) @@ -1181,7 +1174,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - randn(4, 4), + np.random.randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -1190,7 +1183,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") @@ -1207,10 +1200,9 @@ def test_legend_name(self): @pytest.mark.slow def test_no_legend(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) for kind in kinds: - ax = df.plot(kind=kind, legend=False) self._check_legend_labels(ax, visible=False) @@ -1220,7 +1212,7 @@ def test_style_by_column(self): fig = plt.gcf() - df = DataFrame(randn(100, 3)) + df = DataFrame(np.random.randn(100, 3)) for markers in [ {0: "^", 1: "+", 2: "o"}, {0: "^", 1: "+"}, @@ -1230,8 +1222,8 @@ def test_style_by_column(self): fig.clf() fig.add_subplot(111) ax = df.plot(style=markers) - for i, l in enumerate(ax.get_lines()[: len(markers)]): - assert l.get_marker() == markers[i] + for idx, line in enumerate(ax.get_lines()[: len(markers)]): + assert line.get_marker() == markers[idx] @pytest.mark.slow def test_line_label_none(self): @@ -1293,7 +1285,7 @@ def test_all_invalid_plot_data(self): @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): - df = DataFrame(randn(10, 2), dtype=object) + df = DataFrame(np.random.randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in plotting.PlotAccessor._common_kinds: @@ -1304,14 +1296,14 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): # area plot doesn't support positive/negative mixed data kinds = ["area"] - df = DataFrame(rand(10, 2), dtype=object) + df = DataFrame(np.random.rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in kinds: with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(randn(10, 2)) + df = DataFrame(np.random.randn(10, 2)) with pytest.raises(ValueError): df.plot(kind="aasdf") @@ -1387,26 +1379,18 @@ def test_hexbin_with_c(self): assert len(ax.collections) == 1 @pytest.mark.slow - def test_hexbin_cmap(self): - df = self.hexbin_df - - # Default to BuGn - ax = df.plot.hexbin(x="A", y="B") - assert ax.collections[0].cmap.name == "BuGn" - - cm = "cubehelix" - ax = df.plot.hexbin(x="A", y="B", colormap=cm) - assert ax.collections[0].cmap.name == cm - - @pytest.mark.slow - def test_allow_cmap(self): + @pytest.mark.parametrize( + "kwargs, expected", + [ + ({}, "BuGn"), # default cmap + ({"colormap": "cubehelix"}, "cubehelix"), + ({"cmap": "YlGn"}, "YlGn"), + ], + ) + def test_hexbin_cmap(self, kwargs, expected): df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", cmap="YlGn") - assert ax.collections[0].cmap.name == "YlGn" - - with pytest.raises(TypeError): - df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") + ax = df.plot.hexbin(x="A", y="B", **kwargs) + assert ax.collections[0].cmap.name == expected @pytest.mark.slow def test_pie_df(self): @@ -1446,11 +1430,20 @@ def test_pie_df(self): self._check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): + import matplotlib as mpl + df = DataFrame(np.random.rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan fig, axes = self.plt.subplots(ncols=4) - df.plot.pie(subplots=True, ax=axes, legend=True) + + # GH 37668 + kwargs = {} + if mpl.__version__ >= "3.3": + kwargs = {"normalize": True} + + with tm.assert_produces_warning(None): + df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) base_expected = ["0", "1", "2", "3"] for i, ax in enumerate(axes): @@ -1458,12 +1451,13 @@ def test_pie_df_nan(self): expected[i] = "" result = [x.get_text() for x in ax.texts] assert result == expected + # legend labels # NaN's not included in legend with subplots # see https://github.com/pandas-dev/pandas/issues/8390 - assert [x.get_text() for x in ax.get_legend().get_texts()] == base_expected[ - :i - ] + base_expected[i + 1 :] + result_labels = [x.get_text() for x in ax.get_legend().get_texts()] + expected_labels = base_expected[:i] + base_expected[i + 1 :] + assert result_labels == expected_labels @pytest.mark.slow def test_errorbar_plot(self): @@ -1535,11 +1529,11 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared + msg = ( + "To output multiple subplots, " + "the figure containing the passed axes is being cleared" + ) + with tm.assert_produces_warning(UserWarning, match=msg): # Similar warnings were observed in GH #13188 axes = _check_plot_works( df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind @@ -1616,17 +1610,16 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared + msg = ( + "To output multiple subplots, " + "the figure containing the passed axes is being cleared" + ) + with tm.assert_produces_warning(UserWarning, match=msg): # Similar warnings were observed in GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): - np.random.seed(0) err = np.random.rand(3, 2, 5) @@ -1968,11 +1961,11 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = self.plt.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=ax) # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": randn(8), "b": randn(8)}) + df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) fig = self.plt.figure() ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") @@ -1983,15 +1976,15 @@ def test_plain_axes(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=cax) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=iax) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): @@ -2153,32 +2146,25 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_xlabel() == str(new_label) @pytest.mark.parametrize( - "index_name, old_label, new_label", + "xlabel, ylabel", [ - (None, "", "new"), - ("old", "old", "new"), - (None, "", ""), - (None, "", 1), - (None, "", [1, 2]), + (None, None), + ("X Label", None), + (None, "Y Label"), + ("X Label", "Y Label"), ], ) - @pytest.mark.parametrize("kind", ["line", "area", "bar"]) - def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label - ): - # GH 9093 - df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) - df.index.name = index_name - - # default is the ylabel is not shown and xlabel is index name - axes = df.plot(kind=kind, subplots=True) - assert all(ax.get_ylabel() == "" for ax in axes) - assert all(ax.get_xlabel() == old_label for ax in axes) - - # old xlabel will be overriden and assigned ylabel will be used as ylabel - axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) - assert all(ax.get_ylabel() == str(new_label) for ax in axes) - assert all(ax.get_xlabel() == str(new_label) for ax in axes) + @pytest.mark.parametrize("kind", ["scatter", "hexbin"]) + def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): + # GH 37001 + xcol = "Type A" + ycol = "Type B" + df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) + + # default is the labels are column names + ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) + assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) + assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) def _generate_4_axes_via_gridspec(): diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 2cf327a85c6a7..74eb87862b9d1 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -3,7 +3,6 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -41,7 +40,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -69,7 +68,7 @@ def test_rgb_tuple_color(self): _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(randn(10, 2)) + df = DataFrame(np.random.randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") @@ -113,7 +112,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -184,6 +183,19 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() ) + @pytest.mark.slow + def test_if_hexbin_xaxis_label_is_visible(self): + # addressing issue #10678, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax = df.plot.hexbin("A label", "B label", gridsize=12) + assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) + assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) + assert ax.xaxis.get_label().get_visible() + @pytest.mark.slow def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt @@ -213,12 +225,45 @@ def test_scatter_with_c_column_name_with_colors(self, cmap): ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) assert ax.collections[0].colorbar is None + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + @pytest.mark.slow def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -271,12 +316,11 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): - c = [c] - self._check_colors(ax.get_lines(), linecolors=c) + self._check_colors(ax.get_lines(), linecolors=[c]) tm.close() # single color char @@ -340,7 +384,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -383,7 +427,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -420,7 +464,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -442,7 +486,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -510,7 +554,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -567,17 +611,24 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) - def test_invalid_colormap(self): - df = DataFrame(randn(3, 2), columns=["A", "B"]) + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None - with pytest.raises(ValueError): - df.plot(colormap="invalid_colormap") + @pytest.mark.slow + def test_mixing_cmap_and_colormap_raises(self): + df = self.hexbin_df + msg = "Only specify one of `cmap` and `colormap`" + with pytest.raises(TypeError, match=msg): + df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") def test_passed_bar_colors(self): import matplotlib as mpl @@ -605,42 +656,8 @@ def test_colors_of_columns_with_same_name(self): for legend, line in zip(result.get_legend().legendHandles, result.lines): assert legend.get_color() == line.get_color() - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + def test_invalid_colormap(self): + df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" + with pytest.raises(ValueError): + df.plot(colormap="invalid_colormap") diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 4c86a570360b0..4a9f85d61ba2a 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -529,46 +529,6 @@ def test_xlabel_ylabel_dataframe_subplots( assert all(ax.get_ylabel() == str(new_label) for ax in axes) assert all(ax.get_xlabel() == str(new_label) for ax in axes) - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(np.random.randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(np.random.randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - @pytest.mark.slow def test_bar_stacked_center(self): # GH2157 @@ -625,6 +585,46 @@ def test_bar_edge(self): df, kind="barh", subplots=True, width=0.9, align="edge" ) + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(np.random.randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(np.random.randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + def _check_bar_alignment( self, df, From 769d0a80897eff2ae1e54aff82161bb16ed99a6b Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Sat, 31 Oct 2020 18:32:27 +0300 Subject: [PATCH 111/147] Moving the file test_frame.py to a new directory --- pandas/tests/plotting/frame/__init__.py | 0 pandas/tests/plotting/{ => frame}/test_frame.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 pandas/tests/plotting/frame/__init__.py rename pandas/tests/plotting/{ => frame}/test_frame.py (100%) diff --git a/pandas/tests/plotting/frame/__init__.py b/pandas/tests/plotting/frame/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/frame/test_frame.py similarity index 100% rename from pandas/tests/plotting/test_frame.py rename to pandas/tests/plotting/frame/test_frame.py From 3382d2324e8c831ae359182d6660f6a8247bf32d Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 23:36:53 +0300 Subject: [PATCH 112/147] Transfer tests of test_frame.py to test_frame_color.py, test_frame_groupby.py and test_frame_subplots.py --- pandas/tests/plotting/frame/test_frame.py | 1339 ----------------- .../tests/plotting/frame/test_frame_color.py | 672 +++++++++ .../plotting/frame/test_frame_groupby.py | 107 ++ .../plotting/frame/test_frame_subplots.py | 704 +++++++++ 4 files changed, 1483 insertions(+), 1339 deletions(-) create mode 100644 pandas/tests/plotting/frame/test_frame_color.py create mode 100644 pandas/tests/plotting/frame/test_frame_groupby.py create mode 100644 pandas/tests/plotting/frame/test_frame_subplots.py diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 11a46858ba281..ee9e98fb7f3b8 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -167,74 +167,6 @@ def test_integer_array_plot(self): _check_plot_works(df.plot.scatter, x="x", y="y") _check_plot_works(df.plot.hexbin, x="x", y="y") - def test_mpl2_color_cycle_str(self): - # GH 15516 - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) - colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always", "MatplotlibDeprecationWarning") - - for color in colors: - _check_plot_works(df.plot, color=color) - - # if warning is raised, check that it is the exact problematic one - # GH 36972 - if w: - match = "Support for uppercase single-letter colors is deprecated" - warning_message = str(w[0].message) - msg = "MatplotlibDeprecationWarning related to CN colors was raised" - assert match not in warning_message, msg - - def test_color_single_series_list(self): - # GH 3486 - df = DataFrame({"A": [1, 2, 3]}) - _check_plot_works(df.plot, color=["red"]) - - def test_rgb_tuple_color(self): - # GH 16695 - df = DataFrame({"x": [1, 2], "y": [3, 4]}) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) - - def test_color_empty_string(self): - df = DataFrame(np.random.randn(10, 2)) - with pytest.raises(ValueError): - df.plot(color="") - - def test_color_and_style_arguments(self): - df = DataFrame({"x": [1, 2], "y": [3, 4]}) - # passing both 'color' and 'style' arguments should be allowed - # if there is no color symbol in the style strings: - ax = df.plot(color=["red", "black"], style=["-", "--"]) - # check that the linestyles are correctly set: - linestyle = [line.get_linestyle() for line in ax.lines] - assert linestyle == ["-", "--"] - # check that the colors are correctly set: - color = [line.get_color() for line in ax.lines] - assert color == ["red", "black"] - # passing both 'color' and 'style' arguments should not be allowed - # if there is a color symbol in the style strings: - with pytest.raises(ValueError): - df.plot(color=["red", "black"], style=["k-", "r--"]) - - @pytest.mark.parametrize( - "color, expected", - [ - ("green", ["green"] * 4), - (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]), - ], - ) - def test_color_and_marker(self, color, expected): - # GH 21003 - df = DataFrame(np.random.random((7, 4))) - ax = df.plot(color=color, style="d--") - # check colors - result = [i.get_color() for i in ax.lines] - assert result == expected - # check markers and linestyles - assert all(i.get_linestyle() == "--" for i in ax.lines) - assert all(i.get_marker() == "d" for i in ax.lines) - def test_nonnumeric_exclude(self): df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}) ax = df.plot() @@ -404,412 +336,6 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 - def test_negative_log(self): df = -DataFrame( np.random.rand(6, 4), @@ -952,60 +478,6 @@ def test_area_lim(self): ymin, ymax = ax.get_ylim() assert ymax == 0 - @pytest.mark.slow - def test_bar_colors(self): - import matplotlib.pyplot as plt - - default_colors = self._unpack_cycler(plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - ax = df.plot.bar() - self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) - tm.close() - - custom_colors = "rgcby" - ax = df.plot.bar(color=custom_colors) - self._check_colors(ax.patches[::5], facecolors=custom_colors) - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - ax = df.plot.bar(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::5], facecolors=rgba_colors) - tm.close() - - # Test colormap functionality - ax = df.plot.bar(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::5], facecolors=rgba_colors) - tm.close() - - ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") - self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) - tm.close() - - ax = df.plot(kind="bar", color="green") - self._check_colors(ax.patches[::5], facecolors=["green"] * 5) - tm.close() - - def test_bar_user_colors(self): - df = DataFrame( - {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} - ) - # This should *only* work when `y` is specified, else - # we use one color per column - ax = df.plot.bar(y="A", color=df["color"]) - result = [p.get_facecolor() for p in ax.patches] - expected = [ - (1.0, 0.0, 0.0, 1.0), - (0.0, 0.0, 1.0, 1.0), - (0.0, 0.0, 1.0, 1.0), - (1.0, 0.0, 0.0, 1.0), - ] - assert result == expected - @pytest.mark.slow def test_bar_linewidth(self): df = DataFrame(np.random.randn(5, 5)) @@ -1065,46 +537,6 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(np.random.randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(np.random.randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(np.random.rand(5, 5)) @@ -1230,60 +662,6 @@ def test_scatterplot_object_data(self): _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow - def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): - # addressing issue #10611, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax1 = df.plot.scatter(x="A label", y="B label") - ax2 = df.plot.scatter(x="A label", y="B label", c="C label") - - vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] - vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] - assert vis1 == vis2 - - vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] - vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] - assert vis1 == vis2 - - assert ( - ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() - ) - - @pytest.mark.slow - def test_if_hexbin_xaxis_label_is_visible(self): - # addressing issue #10678, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax = df.plot.hexbin("A label", "B label", gridsize=12) - assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) - assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) - assert ax.xaxis.get_label().get_visible() - - @pytest.mark.slow - def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): - import matplotlib.pyplot as plt - - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - fig, axes = plt.subplots(1, 2) - df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) - df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) - plt.tight_layout() - - points = np.array([ax.get_position().get_points() for ax in fig.axes]) - axes_x_coords = points[:, :, 0] - parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] - colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] - assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() - @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): @@ -1344,17 +722,6 @@ def test_plot_scatter_with_c(self): float_array = np.array([0.0, 1.0]) df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") - @pytest.mark.parametrize("cmap", [None, "Greys"]) - def test_scatter_with_c_column_name_with_colors(self, cmap): - # https://github.com/pandas-dev/pandas/issues/34316 - df = DataFrame( - [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], - columns=["length", "width"], - ) - df["species"] = ["r", "r", "g", "g", "b"] - ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) - assert ax.collections[0].colorbar is None - def test_plot_scatter_with_s(self): # this refers to GH 32904 df = DataFrame(np.random.random((10, 3)) * 100, columns=["a", "b", "c"]) @@ -1362,39 +729,6 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_plot_bar(self): df = DataFrame( @@ -1430,164 +764,6 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(np.random.randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - - @pytest.mark.slow - def test_bar_log_no_subplots(self): - # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 - # regressions in 1.2.1 - expected = np.array([0.1, 1.0, 10.0, 100]) - - # no subplots - df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) - ax = df.plot.bar(grid=True, log=True) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_bar_log_subplots(self): - expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) - - ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( - log=True, subplots=True - ) - - tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) - tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -1666,26 +842,6 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") - @pytest.mark.slow - def test_boxplot_subplots_return_type(self): - df = self.hist_df - - # normal style: return_type=None - result = df.plot.box(subplots=True) - assert isinstance(result, Series) - self._check_box_return_type( - result, None, expected_keys=["height", "weight", "category"] - ) - - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) - @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): @@ -2078,352 +1234,6 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" - @pytest.mark.slow - def test_line_colors(self): - from matplotlib import cm - - custom_colors = "rgcby" - df = DataFrame(np.random.randn(5, 5)) - - ax = df.plot(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - - tm.close() - - ax2 = df.plot(color=custom_colors) - lines2 = ax2.get_lines() - - for l1, l2 in zip(ax.get_lines(), lines2): - assert l1.get_color() == l2.get_color() - - tm.close() - - ax = df.plot(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - ax = df.plot(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - ax = df.loc[:, [0]].plot(color="DodgerBlue") - self._check_colors(ax.lines, linecolors=["DodgerBlue"]) - - ax = df.plot(color="red") - self._check_colors(ax.get_lines(), linecolors=["red"] * 5) - tm.close() - - # GH 10299 - custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] - ax = df.plot(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - tm.close() - - @pytest.mark.slow - def test_dont_modify_colors(self): - colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) - assert len(colors) == 3 - - @pytest.mark.slow - def test_line_colors_and_styles_subplots(self): - # GH 9894 - from matplotlib import cm - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - - axes = df.plot(subplots=True) - for ax, c in zip(axes, list(default_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # single color char - axes = df.plot(subplots=True, color="k") - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["k"]) - tm.close() - - # single color str - axes = df.plot(subplots=True, color="green") - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["green"]) - tm.close() - - custom_colors = "rgcby" - axes = df.plot(color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - axes = df.plot(color=list(custom_colors), subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # GH 10299 - custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] - axes = df.plot(color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - for cmap in ["jet", cm.jet]: - axes = df.plot(colormap=cmap, subplots=True) - for ax, c in zip(axes, rgba_colors): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) - self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) - - # single character style - axes = df.plot(style="r", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["r"]) - tm.close() - - # list of styles - styles = list("rgcby") - axes = df.plot(style=styles, subplots=True) - for ax, c in zip(axes, styles): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - @pytest.mark.slow - def test_area_colors(self): - from matplotlib import cm - from matplotlib.collections import PolyCollection - - custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) - - ax = df.plot.area(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - self._check_colors(poly, facecolors=custom_colors) - - handles, labels = ax.get_legend_handles_labels() - self._check_colors(handles, facecolors=custom_colors) - - for h in handles: - assert h.get_alpha() is None - tm.close() - - ax = df.plot.area(colormap="jet") - jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=jet_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - self._check_colors(poly, facecolors=jet_colors) - - handles, labels = ax.get_legend_handles_labels() - self._check_colors(handles, facecolors=jet_colors) - for h in handles: - assert h.get_alpha() is None - tm.close() - - # When stacked=False, alpha is set to 0.5 - ax = df.plot.area(colormap=cm.jet, stacked=False) - self._check_colors(ax.get_lines(), linecolors=jet_colors) - poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] - jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] - self._check_colors(poly, facecolors=jet_with_alpha) - - handles, labels = ax.get_legend_handles_labels() - linecolors = jet_with_alpha - self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) - for h in handles: - assert h.get_alpha() == 0.5 - - @pytest.mark.slow - def test_hist_colors(self): - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - ax = df.plot.hist() - self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) - tm.close() - - custom_colors = "rgcby" - ax = df.plot.hist(color=custom_colors) - self._check_colors(ax.patches[::10], facecolors=custom_colors) - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - ax = df.plot.hist(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::10], facecolors=rgba_colors) - tm.close() - - # Test colormap functionality - ax = df.plot.hist(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] - self._check_colors(ax.patches[::10], facecolors=rgba_colors) - tm.close() - - ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") - self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) - - ax = df.plot(kind="hist", color="green") - self._check_colors(ax.patches[::10], facecolors=["green"] * 5) - tm.close() - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_colors(self): - from matplotlib import cm - - custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) - - ax = df.plot.kde(color=custom_colors) - self._check_colors(ax.get_lines(), linecolors=custom_colors) - tm.close() - - ax = df.plot.kde(colormap="jet") - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - tm.close() - - ax = df.plot.kde(colormap=cm.jet) - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - self._check_colors(ax.get_lines(), linecolors=rgba_colors) - - @pytest.mark.slow - @td.skip_if_no_scipy - def test_kde_colors_and_styles_subplots(self): - from matplotlib import cm - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - - axes = df.plot(kind="kde", subplots=True) - for ax, c in zip(axes, list(default_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # single color char - axes = df.plot(kind="kde", color="k", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["k"]) - tm.close() - - # single color str - axes = df.plot(kind="kde", color="red", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["red"]) - tm.close() - - custom_colors = "rgcby" - axes = df.plot(kind="kde", color=custom_colors, subplots=True) - for ax, c in zip(axes, list(custom_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] - for cmap in ["jet", cm.jet]: - axes = df.plot(kind="kde", colormap=cmap, subplots=True) - for ax, c in zip(axes, rgba_colors): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - # make color a list if plotting one column frame - # handles cases like df.plot(color='DodgerBlue') - axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) - self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) - - # single character style - axes = df.plot(kind="kde", style="r", subplots=True) - for ax in axes: - self._check_colors(ax.get_lines(), linecolors=["r"]) - tm.close() - - # list of styles - styles = list("rgcby") - axes = df.plot(kind="kde", style=styles, subplots=True) - for ax, c in zip(axes, styles): - self._check_colors(ax.get_lines(), linecolors=[c]) - tm.close() - - @pytest.mark.slow - def test_boxplot_colors(self): - def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): - # TODO: outside this func? - if fliers_c is None: - fliers_c = "k" - self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) - self._check_colors( - bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) - ) - self._check_colors( - bp["medians"], linecolors=[medians_c] * len(bp["medians"]) - ) - self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) - self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) - - default_colors = self._unpack_cycler(self.plt.rcParams) - - df = DataFrame(np.random.randn(5, 5)) - bp = df.plot.box(return_type="dict") - _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) - tm.close() - - dict_colors = dict( - boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456" - ) - bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") - _check_colors( - bp, - dict_colors["boxes"], - dict_colors["whiskers"], - dict_colors["medians"], - dict_colors["caps"], - "r", - ) - tm.close() - - # partial colors - dict_colors = dict(whiskers="c", medians="m") - bp = df.plot.box(color=dict_colors, return_type="dict") - _check_colors(bp, default_colors[0], "c", "m") - tm.close() - - from matplotlib import cm - - # Test str -> colormap functionality - bp = df.plot.box(colormap="jet", return_type="dict") - jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] - _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) - tm.close() - - # Test colormap functionality - bp = df.plot.box(colormap=cm.jet, return_type="dict") - _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) - tm.close() - - # string color is applied to all artists except fliers - bp = df.plot.box(color="DodgerBlue", return_type="dict") - _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") - - # tuple is also applied to all artists except fliers - bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") - _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") - - with pytest.raises(ValueError): - # Color contains invalid key results in ValueError - df.plot.box(color=dict(boxes="red", xxxx="blue")) - @pytest.mark.parametrize( "props, expected", [ @@ -2441,19 +1251,6 @@ def test_specified_props_kwd_plot_box(self, props, expected): assert result[expected][0].get_color() == "C1" - def test_default_color_cycle(self): - import cycler - import matplotlib.pyplot as plt - - colors = list("rgbk") - plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - - df = DataFrame(np.random.randn(5, 3)) - ax = df.plot() - - expected = self._unpack_cycler(plt.rcParams)[:3] - self._check_colors(ax.get_lines(), linecolors=expected) - def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -2595,19 +1392,6 @@ def test_hexbin_cmap(self, kwargs, expected): ax = df.plot.hexbin(x="A", y="B", **kwargs) assert ax.collections[0].cmap.name == expected - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - - @pytest.mark.slow - def test_mixing_cmap_and_colormap_raises(self): - df = self.hexbin_df - msg = "Only specify one of `cmap` and `colormap`" - with pytest.raises(TypeError, match=msg): - df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") - @pytest.mark.slow def test_pie_df(self): df = DataFrame( @@ -3046,53 +1830,6 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines - @pytest.mark.slow - def test_df_subplots_patterns_minorticks(self): - # GH 10657 - import matplotlib.pyplot as plt - - df = DataFrame( - np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), - columns=list("AB"), - ) - - # shared subplots - fig, axes = plt.subplots(2, 1, sharex=True) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - fig, axes = plt.subplots(2, 1) - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # not shared - fig, axes = plt.subplots(2, 1) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -3218,12 +1955,6 @@ def test_df_grid_settings(self): kws={"x": "a", "y": "b"}, ) - def test_invalid_colormap(self): - df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) - - with pytest.raises(ValueError): - df.plot(colormap="invalid_colormap") - def test_plain_axes(self): # supplied ax itself is a SubplotAxes, but figure contains also @@ -3255,22 +1986,6 @@ def test_plain_axes(self): Series(np.random.rand(10)).plot(ax=ax) Series(np.random.rand(10)).plot(ax=iax) - def test_passed_bar_colors(self): - import matplotlib as mpl - - color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - colormap = mpl.colors.ListedColormap(color_tuples) - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) - assert color_tuples == [c.get_facecolor() for c in barplot.patches] - - def test_rcParams_bar_colors(self): - import matplotlib as mpl - - color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") - assert color_tuples == [c.get_facecolor() for c in barplot.patches] - @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 @@ -3359,22 +2074,6 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes - def test_subplots_sharex_false(self): - # test when sharex is set to False, two plots should have different - # labels, GH 25160 - df = DataFrame(np.random.rand(10, 2)) - df.iloc[5:, 1] = np.nan - df.iloc[:5, 0] = np.nan - - figs, axs = self.plt.subplots(2, 1) - df.plot.line(ax=axs, subplots=True, sharex=False) - - expected_ax1 = np.arange(4.5, 10, 0.5) - expected_ax2 = np.arange(-0.5, 5, 0.5) - - tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) - tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) - def test_plot_no_rows(self): # GH 27758 df = DataFrame(columns=["foo"], dtype=int) @@ -3418,16 +2117,6 @@ def test_missing_markers_legend_using_style(self): self._check_legend_labels(ax, labels=["A", "B", "C"]) self._check_legend_marker(ax, expected_markers=[".", ".", "."]) - def test_colors_of_columns_with_same_name(self): - # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 - # Creating a DataFrame with duplicate column labels and testing colors of them. - df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = DataFrame({"a": [2, 4, 6]}) - df_concat = pd.concat([df, df1], axis=1) - result = df_concat.plot() - for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() - @pytest.mark.parametrize( "index_name, old_label, new_label", [ @@ -3477,34 +2166,6 @@ def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) - @pytest.mark.parametrize( - "index_name, old_label, new_label", - [ - (None, "", "new"), - ("old", "old", "new"), - (None, "", ""), - (None, "", 1), - (None, "", [1, 2]), - ], - ) - @pytest.mark.parametrize("kind", ["line", "area", "bar"]) - def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label - ): - # GH 9093 - df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) - df.index.name = index_name - - # default is the ylabel is not shown and xlabel is index name - axes = df.plot(kind=kind, subplots=True) - assert all(ax.get_ylabel() == "" for ax in axes) - assert all(ax.get_xlabel() == old_label for ax in axes) - - # old xlabel will be overriden and assigned ylabel will be used as ylabel - axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) - assert all(ax.get_ylabel() == str(new_label) for ax in axes) - assert all(ax.get_xlabel() == str(new_label) for ax in axes) - def _generate_4_axes_via_gridspec(): import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py new file mode 100644 index 0000000000000..66d25cca60593 --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -0,0 +1,672 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFrameColor(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + def test_mpl2_color_cycle_str(self): + # GH 15516 + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", "MatplotlibDeprecationWarning") + + for color in colors: + _check_plot_works(df.plot, color=color) + + # if warning is raised, check that it is the exact problematic one + # GH 36972 + if w: + match = "Support for uppercase single-letter colors is deprecated" + warning_message = str(w[0].message) + msg = "MatplotlibDeprecationWarning related to CN colors was raised" + assert match not in warning_message, msg + + def test_color_single_series_list(self): + # GH 3486 + df = DataFrame({"A": [1, 2, 3]}) + _check_plot_works(df.plot, color=["red"]) + + def test_rgb_tuple_color(self): + # GH 16695 + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) + + def test_color_empty_string(self): + df = DataFrame(np.random.randn(10, 2)) + with pytest.raises(ValueError): + df.plot(color="") + + def test_color_and_style_arguments(self): + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + # passing both 'color' and 'style' arguments should be allowed + # if there is no color symbol in the style strings: + ax = df.plot(color=["red", "black"], style=["-", "--"]) + # check that the linestyles are correctly set: + linestyle = [line.get_linestyle() for line in ax.lines] + assert linestyle == ["-", "--"] + # check that the colors are correctly set: + color = [line.get_color() for line in ax.lines] + assert color == ["red", "black"] + # passing both 'color' and 'style' arguments should not be allowed + # if there is a color symbol in the style strings: + with pytest.raises(ValueError): + df.plot(color=["red", "black"], style=["k-", "r--"]) + + @pytest.mark.parametrize( + "color, expected", + [ + ("green", ["green"] * 4), + (["yellow", "red", "green", "blue"], ["yellow", "red", "green", "blue"]), + ], + ) + def test_color_and_marker(self, color, expected): + # GH 21003 + df = DataFrame(np.random.random((7, 4))) + ax = df.plot(color=color, style="d--") + # check colors + result = [i.get_color() for i in ax.lines] + assert result == expected + # check markers and linestyles + assert all(i.get_linestyle() == "--" for i in ax.lines) + assert all(i.get_marker() == "d" for i in ax.lines) + + @pytest.mark.slow + def test_bar_colors(self): + import matplotlib.pyplot as plt + + default_colors = self._unpack_cycler(plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + ax = df.plot.bar() + self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.bar(color=custom_colors) + self._check_colors(ax.patches[::5], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.bar(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.bar(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + tm.close() + + ax = df.plot(kind="bar", color="green") + self._check_colors(ax.patches[::5], facecolors=["green"] * 5) + tm.close() + + def test_bar_user_colors(self): + df = DataFrame( + {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} + ) + # This should *only* work when `y` is specified, else + # we use one color per column + ax = df.plot.bar(y="A", color=df["color"]) + result = [p.get_facecolor() for p in ax.patches] + expected = [ + (1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (1.0, 0.0, 0.0, 1.0), + ] + assert result == expected + + @pytest.mark.slow + def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): + # addressing issue #10611, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax1 = df.plot.scatter(x="A label", y="B label") + ax2 = df.plot.scatter(x="A label", y="B label", c="C label") + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] + assert vis1 == vis2 + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] + assert vis1 == vis2 + + assert ( + ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() + ) + + @pytest.mark.slow + def test_if_hexbin_xaxis_label_is_visible(self): + # addressing issue #10678, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax = df.plot.hexbin("A label", "B label", gridsize=12) + assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) + assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) + assert ax.xaxis.get_label().get_visible() + + @pytest.mark.slow + def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): + import matplotlib.pyplot as plt + + random_array = np.random.random((1000, 3)) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + + fig, axes = plt.subplots(1, 2) + df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) + df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) + plt.tight_layout() + + points = np.array([ax.get_position().get_points() for ax in fig.axes]) + axes_x_coords = points[:, :, 0] + parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] + colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] + assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() + + @pytest.mark.parametrize("cmap", [None, "Greys"]) + def test_scatter_with_c_column_name_with_colors(self, cmap): + # https://github.com/pandas-dev/pandas/issues/34316 + df = DataFrame( + [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], + columns=["length", "width"], + ) + df["species"] = ["r", "r", "g", "g", "b"] + ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) + assert ax.collections[0].colorbar is None + + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + + @pytest.mark.slow + def test_line_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(np.random.randn(5, 5)) + + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + + tm.close() + + ax2 = df.plot(color=custom_colors) + lines2 = ax2.get_lines() + + for l1, l2 in zip(ax.get_lines(), lines2): + assert l1.get_color() == l2.get_color() + + tm.close() + + ax = df.plot(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + ax = df.loc[:, [0]].plot(color="DodgerBlue") + self._check_colors(ax.lines, linecolors=["DodgerBlue"]) + + ax = df.plot(color="red") + self._check_colors(ax.get_lines(), linecolors=["red"] * 5) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + @pytest.mark.slow + def test_dont_modify_colors(self): + colors = ["r", "g", "b"] + DataFrame(np.random.rand(10, 2)).plot(color=colors) + assert len(colors) == 3 + + @pytest.mark.slow + def test_line_colors_and_styles_subplots(self): + # GH 9894 + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + + axes = df.plot(subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(subplots=True, color="k") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(subplots=True, color="green") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["green"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + axes = df.plot(color=list(custom_colors), subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_area_colors(self): + from matplotlib import cm + from matplotlib.collections import PolyCollection + + custom_colors = "rgcby" + df = DataFrame(np.random.rand(5, 5)) + + ax = df.plot.area(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=custom_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=custom_colors) + + for h in handles: + assert h.get_alpha() is None + tm.close() + + ax = df.plot.area(colormap="jet") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=jet_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=jet_colors) + for h in handles: + assert h.get_alpha() is None + tm.close() + + # When stacked=False, alpha is set to 0.5 + ax = df.plot.area(colormap=cm.jet, stacked=False) + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] + self._check_colors(poly, facecolors=jet_with_alpha) + + handles, labels = ax.get_legend_handles_labels() + linecolors = jet_with_alpha + self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) + for h in handles: + assert h.get_alpha() == 0.5 + + @pytest.mark.slow + def test_hist_colors(self): + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + ax = df.plot.hist() + self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.hist(color=custom_colors) + self._check_colors(ax.patches[::10], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.hist(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.hist(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + + ax = df.plot(kind="hist", color="green") + self._check_colors(ax.patches[::10], facecolors=["green"] * 5) + tm.close() + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(np.random.rand(5, 5)) + + ax = df.plot.kde(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + ax = df.plot.kde(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot.kde(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors_and_styles_subplots(self): + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + + axes = df.plot(kind="kde", subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(kind="kde", color="k", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(kind="kde", color="red", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["red"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(kind="kde", color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(kind="kde", colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(kind="kde", style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(kind="kde", style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_boxplot_colors(self): + def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): + # TODO: outside this func? + if fliers_c is None: + fliers_c = "k" + self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) + self._check_colors( + bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) + ) + self._check_colors( + bp["medians"], linecolors=[medians_c] * len(bp["medians"]) + ) + self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) + self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(np.random.randn(5, 5)) + bp = df.plot.box(return_type="dict") + _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) + tm.close() + + dict_colors = dict( + boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456" + ) + bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") + _check_colors( + bp, + dict_colors["boxes"], + dict_colors["whiskers"], + dict_colors["medians"], + dict_colors["caps"], + "r", + ) + tm.close() + + # partial colors + dict_colors = dict(whiskers="c", medians="m") + bp = df.plot.box(color=dict_colors, return_type="dict") + _check_colors(bp, default_colors[0], "c", "m") + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + bp = df.plot.box(colormap="jet", return_type="dict") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # Test colormap functionality + bp = df.plot.box(colormap=cm.jet, return_type="dict") + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # string color is applied to all artists except fliers + bp = df.plot.box(color="DodgerBlue", return_type="dict") + _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") + + # tuple is also applied to all artists except fliers + bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") + _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") + + with pytest.raises(ValueError): + # Color contains invalid key results in ValueError + df.plot.box(color=dict(boxes="red", xxxx="blue")) + + def test_default_color_cycle(self): + import cycler + import matplotlib.pyplot as plt + + colors = list("rgbk") + plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) + + df = DataFrame(np.random.randn(5, 3)) + ax = df.plot() + + expected = self._unpack_cycler(plt.rcParams)[:3] + self._check_colors(ax.get_lines(), linecolors=expected) + + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + @pytest.mark.slow + def test_mixing_cmap_and_colormap_raises(self): + df = self.hexbin_df + msg = "Only specify one of `cmap` and `colormap`" + with pytest.raises(TypeError, match=msg): + df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") + + def test_passed_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + colormap = mpl.colors.ListedColormap(color_tuples) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_rcParams_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_colors_of_columns_with_same_name(self): + # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 + # Creating a DataFrame with duplicate column labels and testing colors of them. + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) + df_concat = pd.concat([df, df1], axis=1) + result = df_concat.plot() + for legend, line in zip(result.get_legend().legendHandles, result.lines): + assert legend.get_color() == line.get_color() + + def test_invalid_colormap(self): + df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) + + with pytest.raises(ValueError): + df.plot(colormap="invalid_colormap") diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py new file mode 100644 index 0000000000000..968fa65e63e79 --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -0,0 +1,107 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFramePlotsGroupby(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py new file mode 100644 index 0000000000000..de3d65ebe3d4c --- /dev/null +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -0,0 +1,704 @@ +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFramePlotsSubplots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + + @pytest.mark.parametrize( + "index_name, old_label, new_label", + [ + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), + ], + ) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(np.random.randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(np.random.randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(np.random.randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes From f181583ced62b5e7e57417a22c5dfb72e83b8d7e Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 23:49:21 +0300 Subject: [PATCH 113/147] Removing unnecessary imports --- pandas/tests/plotting/frame/test_frame_color.py | 10 +--------- .../tests/plotting/frame/test_frame_groupby.py | 17 ++--------------- .../tests/plotting/frame/test_frame_subplots.py | 9 ++------- 3 files changed, 5 insertions(+), 31 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 66d25cca60593..0c8086dd6a7a9 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,8 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string import warnings import numpy as np @@ -10,17 +7,12 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting - @td.skip_if_no_mpl class TestDataFrameColor(TestPlotBase): diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 968fa65e63e79..06ce0d5076d69 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,25 +1,12 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string -import warnings - import numpy as np -import pytest import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - -import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works - -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting +from pandas.tests.plotting.common import TestPlotBase @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index de3d65ebe3d4c..dfc98ad887aef 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -1,7 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools import string import warnings @@ -10,16 +8,13 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame, Series, date_range import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import TestPlotBase from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting @td.skip_if_no_mpl From cd273c98717ceabeb46250b72a705c36678def0e Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Tue, 10 Nov 2020 00:20:11 +0300 Subject: [PATCH 114/147] PEP8 --- pandas/tests/plotting/frame/test_frame_color.py | 1 - pandas/tests/plotting/frame/test_frame_subplots.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 0c8086dd6a7a9..74eb87862b9d1 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -7,7 +7,6 @@ import pandas.util._test_decorators as td - import pandas as pd from pandas import DataFrame import pandas._testing as tm diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index dfc98ad887aef..4a9f85d61ba2a 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -8,7 +8,6 @@ import pandas.util._test_decorators as td - import pandas as pd from pandas import DataFrame, Series, date_range import pandas._testing as tm From 33276812653a589be1c3209c1bc4d3b92ddae61a Mon Sep 17 00:00:00 2001 From: patrick <61934744+phofl@users.noreply.github.com> Date: Mon, 9 Nov 2020 23:16:02 +0100 Subject: [PATCH 115/147] CLN: clean categorical indexes tests (#37721) --- pandas/tests/indexing/test_categorical.py | 25 ++--------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 9885765bf53e4..20d7662855ab3 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -59,10 +59,6 @@ def test_loc_scalar(self): with pytest.raises(TypeError, match=msg): df.loc["d"] = 10 - msg = ( - "cannot insert an item into a CategoricalIndex that is not " - "already an existing category" - ) msg = "'fill_value=d' is not present in this Categorical's categories" with pytest.raises(ValueError, match=msg): df.loc["d", "A"] = 10 @@ -74,9 +70,9 @@ def test_loc_scalar(self): def test_slicing(self): cat = Series(Categorical([1, 2, 3, 4])) - reversed = cat[::-1] + reverse = cat[::-1] exp = np.array([4, 3, 2, 1], dtype=np.int64) - tm.assert_numpy_array_equal(reversed.__array__(), exp) + tm.assert_numpy_array_equal(reverse.__array__(), exp) df = DataFrame({"value": (np.arange(100) + 1).astype("int64")}) df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) @@ -170,23 +166,6 @@ def test_slicing_and_getting_ops(self): res_val = df.loc["j", "cats"] assert res_val == exp_val - # ix - # frame - # res_df = df.loc["j":"k",[0,1]] # doesn't work? - res_df = df.loc["j":"k", :] - tm.assert_frame_equal(res_df, exp_df) - assert is_categorical_dtype(res_df["cats"].dtype) - - # row - res_row = df.loc["j", :] - tm.assert_series_equal(res_row, exp_row) - assert isinstance(res_row["cats"], str) - - # col - res_col = df.loc[:, "cats"] - tm.assert_series_equal(res_col, exp_col) - assert is_categorical_dtype(res_col.dtype) - # single value res_val = df.loc["j", df.columns[0]] assert res_val == exp_val From 083799811f68258eaba512f44fefe973f8779ea4 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:17:53 +0300 Subject: [PATCH 116/147] Fix merge error --- pandas/tests/plotting/frame/test_frame.py | 999 +++++++++++++++--- .../tests/plotting/frame/test_frame_color.py | 142 +-- 2 files changed, 920 insertions(+), 221 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index ee9e98fb7f3b8..4d339b93fd30d 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -6,6 +6,7 @@ import warnings import numpy as np +from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -174,14 +175,14 @@ def test_nonnumeric_exclude(self): @pytest.mark.slow def test_implicit_label(self): - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) ax = df.plot(x="a", y="b") self._check_text_labels(ax.xaxis.get_label(), "a") @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) + df = DataFrame(randn(2, 2), columns=["a", "b"]) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -336,9 +337,415 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + def test_negative_log(self): df = -DataFrame( - np.random.rand(6, 4), + rand(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -357,20 +764,15 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): def test_line_area_stacked(self): with tm.RNGContext(42): - df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"]) + df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) neg_df = -df # each column has either positive or negative value sep_df = DataFrame( - { - "w": np.random.rand(6), - "x": np.random.rand(6), - "y": -np.random.rand(6), - "z": -np.random.rand(6), - } + {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} ) # each column has positive-negative mixed value mixed_df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -438,7 +840,7 @@ def test_line_area_nan_df(self): tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) def test_line_lim(self): - df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -462,7 +864,7 @@ def test_line_lim(self): assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): - df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) + df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) neg_df = -df for stacked in [True, False]: @@ -480,7 +882,7 @@ def test_area_lim(self): @pytest.mark.slow def test_bar_linewidth(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) # regular ax = df.plot.bar(linewidth=2) @@ -501,7 +903,7 @@ def test_bar_linewidth(self): @pytest.mark.slow def test_bar_barwidth(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) width = 0.9 @@ -537,9 +939,49 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + @pytest.mark.slow def test_bar_bottom_left(self): - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -585,13 +1027,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = DataFrame( + df1 = pd.DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = DataFrame( + df2 = pd.DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -615,7 +1057,7 @@ def test_bar_categorical(self): @pytest.mark.slow def test_plot_scatter(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -634,7 +1076,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = DataFrame(np.random.randn(10), columns=["a"]) + df = pd.DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -645,35 +1087,51 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = DataFrame({"dates": dates, "vals": vals}) + df = pd.DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) + + @pytest.mark.slow + def test_if_hexbin_xaxis_label_is_visible(self): + # addressing issue #10678, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax = df.plot.hexbin("A label", "B label", gridsize=12) + assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) + assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) + assert ax.xaxis.get_label().get_visible() + @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) + df = pd.DataFrame( + {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} + ) _check_plot_works(df.plot.scatter, x=x, y=y) @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -729,10 +1187,43 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + @pytest.mark.slow def test_plot_bar(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -745,9 +1236,7 @@ def test_plot_bar(self): _check_plot_works(df.plot.bar, stacked=True) df = DataFrame( - np.random.randn(10, 15), - index=list(string.ascii_letters[:10]), - columns=range(15), + randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) ) _check_plot_works(df.plot.bar) @@ -764,6 +1253,164 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -823,7 +1470,7 @@ def test_boxplot_vertical(self): @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -842,10 +1489,30 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) @@ -872,7 +1539,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(randn(100, 4)) series = df[0] ax = _check_plot_works(df.plot.hist) @@ -910,7 +1577,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1080,16 +1747,16 @@ def test_hist_df_coord(self): @pytest.mark.slow def test_plot_int_columns(self): - df = DataFrame(np.random.randn(100, 4)).cumsum() + df = DataFrame(randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.slow def test_df_legend_labels(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) for kind in kinds: @@ -1118,9 +1785,9 @@ def test_df_legend_labels(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) ax = df.plot(legend=True, secondary_y="b") self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) ax = df2.plot(legend=False, ax=ax) @@ -1151,7 +1818,9 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) + df = pd.DataFrame( + data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] + ) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) @@ -1174,7 +1843,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - np.random.randn(4, 4), + randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -1183,7 +1852,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") @@ -1200,9 +1869,10 @@ def test_legend_name(self): @pytest.mark.slow def test_no_legend(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) for kind in kinds: + ax = df.plot(kind=kind, legend=False) self._check_legend_labels(ax, visible=False) @@ -1212,7 +1882,7 @@ def test_style_by_column(self): fig = plt.gcf() - df = DataFrame(np.random.randn(100, 3)) + df = DataFrame(randn(100, 3)) for markers in [ {0: "^", 1: "+", 2: "o"}, {0: "^", 1: "+"}, @@ -1222,8 +1892,8 @@ def test_style_by_column(self): fig.clf() fig.add_subplot(111) ax = df.plot(style=markers) - for idx, line in enumerate(ax.get_lines()[: len(markers)]): - assert line.get_marker() == markers[idx] + for i, l in enumerate(ax.get_lines()[: len(markers)]): + assert l.get_marker() == markers[i] @pytest.mark.slow def test_line_label_none(self): @@ -1234,23 +1904,6 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -1285,7 +1938,7 @@ def test_all_invalid_plot_data(self): @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): - df = DataFrame(np.random.randn(10, 2), dtype=object) + df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in plotting.PlotAccessor._common_kinds: @@ -1296,14 +1949,14 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): # area plot doesn't support positive/negative mixed data kinds = ["area"] - df = DataFrame(np.random.rand(10, 2), dtype=object) + df = DataFrame(rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in kinds: with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(kind="aasdf") @@ -1379,18 +2032,33 @@ def test_hexbin_with_c(self): assert len(ax.collections) == 1 @pytest.mark.slow - @pytest.mark.parametrize( - "kwargs, expected", - [ - ({}, "BuGn"), # default cmap - ({"colormap": "cubehelix"}, "cubehelix"), - ({"cmap": "YlGn"}, "YlGn"), - ], - ) - def test_hexbin_cmap(self, kwargs, expected): + def test_hexbin_cmap(self): df = self.hexbin_df - ax = df.plot.hexbin(x="A", y="B", **kwargs) - assert ax.collections[0].cmap.name == expected + + # Default to BuGn + ax = df.plot.hexbin(x="A", y="B") + assert ax.collections[0].cmap.name == "BuGn" + + cm = "cubehelix" + ax = df.plot.hexbin(x="A", y="B", colormap=cm) + assert ax.collections[0].cmap.name == cm + + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + @pytest.mark.slow + def test_allow_cmap(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", cmap="YlGn") + assert ax.collections[0].cmap.name == "YlGn" + + with pytest.raises(TypeError): + df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") @pytest.mark.slow def test_pie_df(self): @@ -1430,20 +2098,11 @@ def test_pie_df(self): self._check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): - import matplotlib as mpl - df = DataFrame(np.random.rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan fig, axes = self.plt.subplots(ncols=4) - - # GH 37668 - kwargs = {} - if mpl.__version__ >= "3.3": - kwargs = {"normalize": True} - - with tm.assert_produces_warning(None): - df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) + df.plot.pie(subplots=True, ax=axes, legend=True) base_expected = ["0", "1", "2", "3"] for i, ax in enumerate(axes): @@ -1451,13 +2110,12 @@ def test_pie_df_nan(self): expected[i] = "" result = [x.get_text() for x in ax.texts] assert result == expected - # legend labels # NaN's not included in legend with subplots # see https://github.com/pandas-dev/pandas/issues/8390 - result_labels = [x.get_text() for x in ax.get_legend().get_texts()] - expected_labels = base_expected[:i] + base_expected[i + 1 :] - assert result_labels == expected_labels + assert [x.get_text() for x in ax.get_legend().get_texts()] == base_expected[ + :i + ] + base_expected[i + 1 :] @pytest.mark.slow def test_errorbar_plot(self): @@ -1529,11 +2187,11 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works( df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind @@ -1610,16 +2268,17 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): + np.random.seed(0) err = np.random.rand(3, 2, 5) @@ -1830,6 +2489,53 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -1961,11 +2667,11 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = self.plt.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(np.random.rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=ax) # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) + df = DataFrame({"a": randn(8), "b": randn(8)}) fig = self.plt.figure() ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") @@ -1976,21 +2682,21 @@ def test_plain_axes(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=cax) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=iax) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - DataFrame(np.random.randn(15, 2), columns=list("AB")) + pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2006,7 +2712,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = DataFrame( + df = pd.DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -2027,7 +2733,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -2042,7 +2748,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = DataFrame([2, 4], index=[1, 2]) + df = pd.DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -2054,7 +2760,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = DataFrame( + df = pd.DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -2074,9 +2780,25 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = pd.DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + def test_plot_no_rows(self): # GH 27758 - df = DataFrame(columns=["foo"], dtype=int) + df = pd.DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -2085,13 +2807,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = DataFrame(["a", "b", "c"]) + df = pd.DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -2101,7 +2823,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = DataFrame( + df = pd.DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -2132,7 +2854,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -2146,25 +2868,32 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_xlabel() == str(new_label) @pytest.mark.parametrize( - "xlabel, ylabel", + "index_name, old_label, new_label", [ - (None, None), - ("X Label", None), - (None, "Y Label"), - ("X Label", "Y Label"), + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), ], ) - @pytest.mark.parametrize("kind", ["scatter", "hexbin"]) - def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): - # GH 37001 - xcol = "Type A" - ycol = "Type B" - df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) - - # default is the labels are column names - ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) - assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) - assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) def _generate_4_axes_via_gridspec(): diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 74eb87862b9d1..41c6578743bbf 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,17 +1,27 @@ """ Test cases for DataFrame.plot """ +from datetime import date, datetime +import itertools +import string import warnings import numpy as np +from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm +from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + @td.skip_if_no_mpl class TestDataFrameColor(TestPlotBase): @@ -38,9 +48,10 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) + def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -68,7 +79,7 @@ def test_rgb_tuple_color(self): _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") @@ -112,7 +123,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -145,7 +156,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = DataFrame( + df = pd.DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -166,7 +177,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -183,25 +194,12 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() ) - @pytest.mark.slow - def test_if_hexbin_xaxis_label_is_visible(self): - # addressing issue #10678, to ensure colobar does not - # interfere with x-axis label and ticklabels with - # ipython inline backend. - random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) - - ax = df.plot.hexbin("A label", "B label", gridsize=12) - assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) - assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) - assert ax.xaxis.get_label().get_visible() - @pytest.mark.slow def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -217,7 +215,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = DataFrame( + df = pd.DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -225,45 +223,12 @@ def test_scatter_with_c_column_name_with_colors(self, cmap): ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) assert ax.collections[0].colorbar is None - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -306,7 +271,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) + pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -316,11 +281,12 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): - self._check_colors(ax.get_lines(), linecolors=[c]) + c = [c] + self._check_colors(ax.get_lines(), linecolors=c) tm.close() # single color char @@ -384,7 +350,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -427,7 +393,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -464,7 +430,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -486,7 +452,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -554,7 +520,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -604,6 +570,23 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes="red", xxxx="blue")) + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + def test_default_color_cycle(self): import cycler import matplotlib.pyplot as plt @@ -611,31 +594,24 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None + def test_invalid_colormap(self): + df = DataFrame(randn(3, 2), columns=["A", "B"]) - @pytest.mark.slow - def test_mixing_cmap_and_colormap_raises(self): - df = self.hexbin_df - msg = "Only specify one of `cmap` and `colormap`" - with pytest.raises(TypeError, match=msg): - df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") + with pytest.raises(ValueError): + df.plot(colormap="invalid_colormap") def test_passed_bar_colors(self): import matplotlib as mpl color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -643,21 +619,15 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = DataFrame({"a": [2, 4, 6]}) + df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = pd.DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() - - def test_invalid_colormap(self): - df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) - - with pytest.raises(ValueError): - df.plot(colormap="invalid_colormap") + assert legend.get_color() == line.get_color() \ No newline at end of file From 44c72f0202f38ce76b823c4a2aa01598fe633d64 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:22:54 +0300 Subject: [PATCH 117/147] PEP 8 fixes --- pandas/tests/plotting/frame/test_frame_color.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 41c6578743bbf..517e3c109fc5b 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,7 +48,6 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -630,4 +629,4 @@ def test_colors_of_columns_with_same_name(self): df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() \ No newline at end of file + assert legend.get_color() == line.get_color() From 381e673084e9915a942c44725ceb870473e12265 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 20:19:42 +0300 Subject: [PATCH 118/147] Fix merge error --- pandas/tests/plotting/frame/test_frame.py | 547 ------------------ .../plotting/frame/test_frame_groupby.py | 20 +- .../plotting/frame/test_frame_subplots.py | 190 +----- 3 files changed, 32 insertions(+), 725 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 4d339b93fd30d..396eb73e83d17 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -337,411 +337,6 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = pd.DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 def test_negative_log(self): df = -DataFrame( @@ -1342,14 +937,6 @@ def test_bar_center(self): self._check_bar_alignment(df, kind="barh", stacked=False) self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - @pytest.mark.slow def test_bar_align_single_column(self): df = DataFrame(randn(5)) @@ -1389,28 +976,6 @@ def test_bar_edge(self): df, kind="barh", subplots=True, width=0.9, align="edge" ) - @pytest.mark.slow - def test_bar_log_no_subplots(self): - # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 - # regressions in 1.2.1 - expected = np.array([0.1, 1.0, 10.0, 100]) - - # no subplots - df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) - ax = df.plot.bar(grid=True, log=True) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_bar_log_subplots(self): - expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) - - ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( - log=True, subplots=True - ) - - tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) - tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -1489,26 +1054,6 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") - @pytest.mark.slow - def test_boxplot_subplots_return_type(self): - df = self.hist_df - - # normal style: return_type=None - result = df.plot.box(subplots=True) - assert isinstance(result, Series) - self._check_box_return_type( - result, None, expected_keys=["height", "weight", "category"] - ) - - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) - @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): @@ -2489,53 +2034,6 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines - @pytest.mark.slow - def test_df_subplots_patterns_minorticks(self): - # GH 10657 - import matplotlib.pyplot as plt - - df = DataFrame( - np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), - columns=list("AB"), - ) - - # shared subplots - fig, axes = plt.subplots(2, 1, sharex=True) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - fig, axes = plt.subplots(2, 1) - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # not shared - fig, axes = plt.subplots(2, 1) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -2780,22 +2278,6 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes - def test_subplots_sharex_false(self): - # test when sharex is set to False, two plots should have different - # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) - df.iloc[5:, 1] = np.nan - df.iloc[:5, 0] = np.nan - - figs, axs = self.plt.subplots(2, 1) - df.plot.line(ax=axs, subplots=True, sharex=False) - - expected_ax1 = np.arange(4.5, 10, 0.5) - expected_ax2 = np.arange(-0.5, 5, 0.5) - - tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) - tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) - def test_plot_no_rows(self): # GH 27758 df = pd.DataFrame(columns=["foo"], dtype=int) @@ -2867,35 +2349,6 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_ylabel() == str(new_label) assert ax.get_xlabel() == str(new_label) - @pytest.mark.parametrize( - "index_name, old_label, new_label", - [ - (None, "", "new"), - ("old", "old", "new"), - (None, "", ""), - (None, "", 1), - (None, "", [1, 2]), - ], - ) - @pytest.mark.parametrize("kind", ["line", "area", "bar"]) - def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label - ): - # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) - df.index.name = index_name - - # default is the ylabel is not shown and xlabel is index name - axes = df.plot(kind=kind, subplots=True) - assert all(ax.get_ylabel() == "" for ax in axes) - assert all(ax.get_xlabel() == old_label for ax in axes) - - # old xlabel will be overriden and assigned ylabel will be used as ylabel - axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) - assert all(ax.get_ylabel() == str(new_label) for ax in axes) - assert all(ax.get_xlabel() == str(new_label) for ax in axes) - - def _generate_4_axes_via_gridspec(): import matplotlib as mpl import matplotlib.gridspec diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 06ce0d5076d69..253f81d9e2704 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,16 +1,30 @@ """ Test cases for DataFrame.plot """ +from datetime import date, datetime +import itertools +import string +import warnings + import numpy as np +from numpy.random import rand, randn +import pytest import pandas.util._test_decorators as td -from pandas import DataFrame +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting @td.skip_if_no_mpl -class TestDataFramePlotsGroupby(TestPlotBase): +class TestDataFrameGroupby(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 4a9f85d61ba2a..92d5941e7e8e6 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -1,23 +1,30 @@ """ Test cases for DataFrame.plot """ +from datetime import date, datetime +import itertools import string import warnings import numpy as np +from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting @td.skip_if_no_mpl -class TestDataFramePlotsSubplots(TestPlotBase): +class TestDataFrameGroupby(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl @@ -194,7 +201,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): pd.to_datetime("2017-08-02 00:00:00"), ], } - testdata = DataFrame(data) + testdata = pd.DataFrame(data) ax_period = testdata.plot(x="numeric", y="period") assert ( ax_period.get_lines()[0].get_data()[1] == testdata["period"].values @@ -396,6 +403,7 @@ def test_bar_subplots_center(self): self._check_bar_alignment(df, kind="barh", subplots=True) self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + @pytest.mark.slow def test_bar_log_no_subplots(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 @@ -488,7 +496,7 @@ def test_df_subplots_patterns_minorticks(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = DataFrame(np.random.rand(10, 2)) + df = pd.DataFrame(np.random.rand(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -513,10 +521,10 @@ def test_subplots_sharex_false(self): ) @pytest.mark.parametrize("kind", ["line", "area", "bar"]) def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label + self, kind, index_name, old_label, new_label ): # GH 9093 - df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -528,171 +536,3 @@ def test_xlabel_ylabel_dataframe_subplots( axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) assert all(ax.get_ylabel() == str(new_label) for ax in axes) assert all(ax.get_xlabel() == str(new_label) for ax in axes) - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(np.random.randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(np.random.randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(np.random.randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes From 7ff33d1863a9cdde607f22dddd0104f93f95ce04 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 20:22:06 +0300 Subject: [PATCH 119/147] Removing unnecessary imports --- .../tests/plotting/frame/test_frame_groupby.py | 18 +++--------------- .../plotting/frame/test_frame_subplots.py | 8 ++------ 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 253f81d9e2704..1aa5d18c076b0 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,26 +1,14 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string -import warnings - import numpy as np -from numpy.random import rand, randn -import pytest +from numpy.random import rand import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works - -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting +from pandas.tests.plotting.common import TestPlotBase @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 92d5941e7e8e6..5ed7f22c22a34 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -11,16 +11,12 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame, Series, date_range import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import TestPlotBase from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting @td.skip_if_no_mpl From 9bae261148668ca0a55e1ba2b465f5eb6a99efaa Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 20:32:36 +0300 Subject: [PATCH 120/147] PEP 8 fixes --- pandas/tests/plotting/frame/test_frame_subplots.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 5ed7f22c22a34..cb99801de6efd 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -399,7 +399,6 @@ def test_bar_subplots_center(self): self._check_bar_alignment(df, kind="barh", subplots=True) self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - @pytest.mark.slow def test_bar_log_no_subplots(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 From 9ddf0a2f2de659bf0da2c1c361a41d074c6b31d4 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Wed, 4 Nov 2020 16:34:58 +0300 Subject: [PATCH 121/147] Fixed class name --- pandas/tests/plotting/frame/test_frame_subplots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index cb99801de6efd..e97ce3262a015 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -20,7 +20,7 @@ @td.skip_if_no_mpl -class TestDataFrameGroupby(TestPlotBase): +class TestDataFrameSubplots(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl From 54e9cbefae46fef4f587d51e8026d96c6d69fddc Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Wed, 4 Nov 2020 16:46:45 +0300 Subject: [PATCH 122/147] Transfer tests of test_frame.py to test_frame_subplots.py --- pandas/tests/plotting/frame/test_frame.py | 168 ----------------- .../plotting/frame/test_frame_subplots.py | 173 ++++++++++++++++++ 2 files changed, 173 insertions(+), 168 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 396eb73e83d17..56ce87644bc39 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -534,46 +534,6 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(rand(5, 5)) @@ -848,134 +808,6 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index e97ce3262a015..3f146d2c97008 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -531,3 +531,176 @@ def test_xlabel_ylabel_dataframe_subplots( axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) assert all(ax.get_ylabel() == str(new_label) for ax in axes) assert all(ax.get_xlabel() == str(new_label) for ax in axes) + + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes + + + From 304c4301debc370d4b7ca85d99d464ccdaa68d42 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Thu, 5 Nov 2020 22:29:31 +0300 Subject: [PATCH 123/147] Transfer tests of test_frame.py to test_frame_groupby.py, test_frame_subplots.py, test_frame_color.py --- pandas/tests/plotting/frame/test_frame.py | 211 +++++++++--------- .../tests/plotting/frame/test_frame_color.py | 100 +++++---- .../plotting/frame/test_frame_groupby.py | 17 +- .../plotting/frame/test_frame_subplots.py | 28 ++- 4 files changed, 193 insertions(+), 163 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 56ce87644bc39..9aab765dca96b 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -6,7 +6,6 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -175,14 +174,14 @@ def test_nonnumeric_exclude(self): @pytest.mark.slow def test_implicit_label(self): - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) ax = df.plot(x="a", y="b") self._check_text_labels(ax.xaxis.get_label(), "a") @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(randn(2, 2), columns=["a", "b"]) + df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -337,10 +336,9 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - def test_negative_log(self): df = -DataFrame( - rand(6, 4), + np.random.rand(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -359,15 +357,20 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): def test_line_area_stacked(self): with tm.RNGContext(42): - df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) + df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"]) neg_df = -df # each column has either positive or negative value sep_df = DataFrame( - {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} + { + "w": np.random.rand(6), + "x": np.random.rand(6), + "y": -np.random.rand(6), + "z": -np.random.rand(6), + } ) # each column has positive-negative mixed value mixed_df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -435,7 +438,7 @@ def test_line_area_nan_df(self): tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) def test_line_lim(self): - df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -459,7 +462,7 @@ def test_line_lim(self): assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): - df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) + df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) neg_df = -df for stacked in [True, False]: @@ -477,7 +480,7 @@ def test_area_lim(self): @pytest.mark.slow def test_bar_linewidth(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) # regular ax = df.plot.bar(linewidth=2) @@ -498,7 +501,7 @@ def test_bar_linewidth(self): @pytest.mark.slow def test_bar_barwidth(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) width = 0.9 @@ -536,7 +539,7 @@ def test_bar_barwidth(self): @pytest.mark.slow def test_bar_bottom_left(self): - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -582,13 +585,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = pd.DataFrame( + df1 = DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = pd.DataFrame( + df2 = DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -612,7 +615,7 @@ def test_bar_categorical(self): @pytest.mark.slow def test_plot_scatter(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -631,7 +634,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = pd.DataFrame(np.random.randn(10), columns=["a"]) + df = DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -642,31 +645,30 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = pd.DataFrame({"dates": dates, "vals": vals}) + df = DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow def test_if_hexbin_xaxis_label_is_visible(self): # addressing issue #10678, to ensure colobar does not # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) @@ -677,16 +679,14 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = pd.DataFrame( - {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} - ) + df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) _check_plot_works(df.plot.scatter, x=x, y=y) @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -742,43 +742,10 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_plot_bar(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -791,7 +758,9 @@ def test_plot_bar(self): _check_plot_works(df.plot.bar, stacked=True) df = DataFrame( - randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) + np.random.randn(10, 15), + index=list(string.ascii_letters[:10]), + columns=range(15), ) _check_plot_works(df.plot.bar) @@ -867,7 +836,7 @@ def test_boxplot_vertical(self): @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame( - randn(6, 4), + np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -889,7 +858,7 @@ def test_boxplot_return_type(self): @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(randn(100, 4)) + df = DataFrame(np.random.randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) @@ -916,7 +885,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle - df = DataFrame(randn(100, 4)) + df = DataFrame(np.random.randn(100, 4)) series = df[0] ax = _check_plot_works(df.plot.hist) @@ -954,7 +923,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1124,16 +1093,16 @@ def test_hist_df_coord(self): @pytest.mark.slow def test_plot_int_columns(self): - df = DataFrame(randn(100, 4)).cumsum() + df = DataFrame(np.random.randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.slow def test_df_legend_labels(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) for kind in kinds: @@ -1162,9 +1131,9 @@ def test_df_legend_labels(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) ax = df.plot(legend=True, secondary_y="b") self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) ax = df2.plot(legend=False, ax=ax) @@ -1195,9 +1164,7 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = pd.DataFrame( - data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] - ) + df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) @@ -1220,7 +1187,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - randn(4, 4), + np.random.randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -1229,7 +1196,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") @@ -1246,7 +1213,7 @@ def test_legend_name(self): @pytest.mark.slow def test_no_legend(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) for kind in kinds: @@ -1259,7 +1226,7 @@ def test_style_by_column(self): fig = plt.gcf() - df = DataFrame(randn(100, 3)) + df = DataFrame(np.random.randn(100, 3)) for markers in [ {0: "^", 1: "+", 2: "o"}, {0: "^", 1: "+"}, @@ -1281,6 +1248,23 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -1315,7 +1299,7 @@ def test_all_invalid_plot_data(self): @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): - df = DataFrame(randn(10, 2), dtype=object) + df = DataFrame(np.random.randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in plotting.PlotAccessor._common_kinds: @@ -1326,14 +1310,14 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): # area plot doesn't support positive/negative mixed data kinds = ["area"] - df = DataFrame(rand(10, 2), dtype=object) + df = DataFrame(np.random.rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in kinds: with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(randn(10, 2)) + df = DataFrame(np.random.randn(10, 2)) with pytest.raises(ValueError): df.plot(kind="aasdf") @@ -1420,13 +1404,6 @@ def test_hexbin_cmap(self): ax = df.plot.hexbin(x="A", y="B", colormap=cm) assert ax.collections[0].cmap.name == cm - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df @@ -1997,11 +1974,11 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = self.plt.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=ax) # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": randn(8), "b": randn(8)}) + df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) fig = self.plt.figure() ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") @@ -2012,21 +1989,21 @@ def test_plain_axes(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=cax) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(rand(10)).plot(ax=ax) - Series(rand(10)).plot(ax=iax) + Series(np.random.rand(10)).plot(ax=ax) + Series(np.random.rand(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) + DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2042,7 +2019,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = pd.DataFrame( + df = DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -2063,7 +2040,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -2078,7 +2055,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = pd.DataFrame([2, 4], index=[1, 2]) + df = DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -2090,7 +2067,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = pd.DataFrame( + df = DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -2112,7 +2089,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): def test_plot_no_rows(self): # GH 27758 - df = pd.DataFrame(columns=["foo"], dtype=int) + df = DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -2121,13 +2098,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = pd.DataFrame(["a", "b", "c"]) + df = DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -2137,7 +2114,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = pd.DataFrame( + df = DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -2168,7 +2145,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -2181,6 +2158,28 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_ylabel() == str(new_label) assert ax.get_xlabel() == str(new_label) + @pytest.mark.parametrize( + "xlabel, ylabel", + [ + (None, None), + ("X Label", None), + (None, "Y Label"), + ("X Label", "Y Label"), + ], + ) + @pytest.mark.parametrize("kind", ["scatter", "hexbin"]) + def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): + # GH 37001 + xcol = "Type A" + ycol = "Type B" + df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) + + # default is the labels are column names + ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) + assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) + assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) + + def _generate_4_axes_via_gridspec(): import matplotlib as mpl import matplotlib.gridspec diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 517e3c109fc5b..18f45cd34d812 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -6,7 +6,6 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -50,7 +49,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -78,7 +77,7 @@ def test_rgb_tuple_color(self): _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(randn(10, 2)) + df = DataFrame(np.random.randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") @@ -122,7 +121,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -155,7 +154,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = pd.DataFrame( + df = DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -176,7 +175,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +197,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +213,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( + df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -222,12 +221,45 @@ def test_scatter_with_c_column_name_with_colors(self, cmap): ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) assert ax.collections[0].colorbar is None + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + @pytest.mark.slow def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -270,7 +302,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -280,7 +312,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -349,7 +381,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -392,7 +424,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -429,7 +461,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = DataFrame(np.random.rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -451,7 +483,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -519,7 +551,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -569,23 +601,6 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes="red", xxxx="blue")) - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - def test_default_color_cycle(self): import cycler import matplotlib.pyplot as plt @@ -593,14 +608,21 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(randn(5, 3)) + df = DataFrame(np.random.randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + def test_invalid_colormap(self): - df = DataFrame(randn(3, 2), columns=["A", "B"]) + df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") @@ -610,7 +632,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,14 +640,14 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 1aa5d18c076b0..32087fda4802b 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,14 +1,25 @@ """ Test cases for DataFrame.plot """ +from datetime import date, datetime +import itertools +import string +import warnings + import numpy as np -from numpy.random import rand +import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 3f146d2c97008..23b677ca1f8c2 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -6,17 +6,20 @@ import warnings import numpy as np -from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting @td.skip_if_no_mpl @@ -197,7 +200,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): pd.to_datetime("2017-08-02 00:00:00"), ], } - testdata = pd.DataFrame(data) + testdata = DataFrame(data) ax_period = testdata.plot(x="numeric", y="period") assert ( ax_period.get_lines()[0].get_data()[1] == testdata["period"].values @@ -491,7 +494,7 @@ def test_df_subplots_patterns_minorticks(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.rand(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -516,10 +519,10 @@ def test_subplots_sharex_false(self): ) @pytest.mark.parametrize("kind", ["line", "area", "bar"]) def test_xlabel_ylabel_dataframe_subplots( - self, kind, index_name, old_label, new_label + self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -534,7 +537,7 @@ def test_xlabel_ylabel_dataframe_subplots( @pytest.mark.slow def test_bar_barwidth_position(self): - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) self._check_bar_alignment( df, kind="bar", stacked=False, width=0.9, position=0.2 ) @@ -555,7 +558,7 @@ def test_bar_barwidth_position(self): @pytest.mark.slow def test_bar_barwidth_position_int(self): # GH 12979 - df = DataFrame(randn(5, 5)) + df = DataFrame(np.random.randn(5, 5)) for w in [1, 1.0]: ax = df.plot.bar(stacked=True, width=w) @@ -581,7 +584,6 @@ def test_bar_stacked_center(self): self._check_bar_alignment(df, kind="barh", stacked=True) self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - @pytest.mark.slow def test_bar_center(self): df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) @@ -592,7 +594,7 @@ def test_bar_center(self): @pytest.mark.slow def test_bar_align_single_column(self): - df = DataFrame(randn(5)) + df = DataFrame(np.random.randn(5)) self._check_bar_alignment(df, kind="bar", stacked=False) self._check_bar_alignment(df, kind="bar", stacked=True) self._check_bar_alignment(df, kind="barh", stacked=False) @@ -629,7 +631,6 @@ def test_bar_edge(self): df, kind="barh", subplots=True, width=0.9, align="edge" ) - def _check_bar_alignment( self, df, @@ -701,6 +702,3 @@ def _check_bar_alignment( raise ValueError return axes - - - From bbae667675079abeba07cf4028c2074942dd93f8 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Thu, 5 Nov 2020 22:36:13 +0300 Subject: [PATCH 124/147] Changed class names --- pandas/tests/plotting/frame/test_frame_color.py | 2 +- pandas/tests/plotting/frame/test_frame_groupby.py | 2 +- pandas/tests/plotting/frame/test_frame_subplots.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 18f45cd34d812..24e879d9491b5 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -23,7 +23,7 @@ @td.skip_if_no_mpl -class TestDataFrameColor(TestPlotBase): +class TestDataFramePlotsColor(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 32087fda4802b..968fa65e63e79 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -23,7 +23,7 @@ @td.skip_if_no_mpl -class TestDataFrameGroupby(TestPlotBase): +class TestDataFramePlotsGroupby(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 23b677ca1f8c2..ef491e438dac1 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -23,7 +23,7 @@ @td.skip_if_no_mpl -class TestDataFrameSubplots(TestPlotBase): +class TestDataFramePlotsSubplots(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl From b84454369804e7370f40f82a23af6f193656d19d Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Thu, 5 Nov 2020 22:38:34 +0300 Subject: [PATCH 125/147] Removed unnecessary imports --- pandas/tests/plotting/frame/test_frame_color.py | 11 +---------- .../tests/plotting/frame/test_frame_groupby.py | 16 ++-------------- .../tests/plotting/frame/test_frame_subplots.py | 10 ++-------- 3 files changed, 5 insertions(+), 32 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 24e879d9491b5..2d509e8f3b320 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,8 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string import warnings import numpy as np @@ -10,17 +7,11 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting - @td.skip_if_no_mpl class TestDataFramePlotsColor(TestPlotBase): diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 968fa65e63e79..92ae025145595 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,25 +1,13 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string -import warnings - import numpy as np -import pytest import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works - -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting +from pandas.tests.plotting.common import TestPlotBase @td.skip_if_no_mpl diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index ef491e438dac1..4c86a570360b0 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -1,7 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools import string import warnings @@ -10,16 +8,12 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame, Series, date_range import pandas._testing as tm -from pandas.core.arrays import integer_array -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import TestPlotBase from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting @td.skip_if_no_mpl From 21cde338f6eaabc7db4fb6bf16f27338215965a9 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Fri, 6 Nov 2020 08:43:54 +0300 Subject: [PATCH 126/147] Removed import --- pandas/tests/plotting/frame/test_frame_groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 92ae025145595..06ce0d5076d69 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -4,7 +4,6 @@ import pandas.util._test_decorators as td -import pandas as pd from pandas import DataFrame import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase From 676bd040c85d619f76540a1640efdbe61b69724c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 2 Nov 2020 13:21:19 -0800 Subject: [PATCH 127/147] TST/REF: collect indexing tests by method (#37590) --- pandas/tests/indexing/test_loc.py | 176 ++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 26c9e127bcc10..74b40bc274cfb 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1559,6 +1559,182 @@ def test_loc_setitem_mask_and_label_with_datetimeindex(self): tm.assert_frame_equal(df, expected) +class TestLocSetitemWithExpansion: + @pytest.mark.slow + def test_loc_setitem_with_expansion_large_dataframe(self): + # GH#10692 + result = DataFrame({"x": range(10 ** 6)}, dtype="int64") + result.loc[len(result)] = len(result) + 1 + expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") + tm.assert_frame_equal(result, expected) + + +class TestLocCallable: + def test_frame_loc_getitem_callable(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + # iloc cannot use boolean Series (see GH3635) + + # return bool indexer + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + # scalar + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + def test_frame_loc_getitem_callable_mixture(self): + # GH#11485 + df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + def test_frame_loc_getitem_callable_labels(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[lambda x: ["A", "C"], :] + tm.assert_frame_equal(res, df.loc[["A", "C"], :]) + + res = df.loc[lambda x: ["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + # mixture + res = df.loc[["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + res = df.loc[lambda x: ["A", "C"], "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + def test_frame_loc_setitem_callable(self): + # GH#11485 + df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.copy() + res.loc[lambda x: ["A", "C"]] = -20 + exp = df.copy() + exp.loc[["A", "C"]] = -20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], :] = 20 + exp = df.copy() + exp.loc[["A", "C"], :] = 20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = [5, 10] + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) + exp = df.copy() + exp.loc[["A", "C"], "X"] = np.array([-1, -2]) + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[["A", "C"], lambda x: ["X"]] = 10 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], "X"] = -2 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -2 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], ["X"]] = -4 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = -4 + tm.assert_frame_equal(res, exp) + + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( From 59d0a56291fb0f92093b80837df39ddeb4f5c9e5 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Sat, 7 Nov 2020 21:43:52 +0700 Subject: [PATCH 128/147] TST: match matplotlib warning message (#37666) * TST: match matplotlib warning message * TST: match full message --- pandas/tests/plotting/frame/test_frame.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 9aab765dca96b..f2d2203d25b6c 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1541,11 +1541,11 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared + msg = ( + "To output multiple subplots, " + "the figure containing the passed axes is being cleared" + ) + with tm.assert_produces_warning(UserWarning, match=msg): # Similar warnings were observed in GH #13188 axes = _check_plot_works( df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind @@ -1622,11 +1622,11 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - with tm.assert_produces_warning(UserWarning): - # _check_plot_works creates subplots inside, - # which leads to warnings like this: - # UserWarning: To output multiple subplots, - # the figure containing the passed axes is being cleared + msg = ( + "To output multiple subplots, " + "the figure containing the passed axes is being cleared" + ) + with tm.assert_produces_warning(UserWarning, match=msg): # Similar warnings were observed in GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) From cd96a7d2ecd3255de6109a7cf0215130509ded36 Mon Sep 17 00:00:00 2001 From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com> Date: Sun, 8 Nov 2020 10:00:20 +0700 Subject: [PATCH 129/147] TST: fix warning for pie chart (#37669) --- pandas/tests/plotting/frame/test_frame.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index f2d2203d25b6c..0f256e623e42c 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1452,11 +1452,20 @@ def test_pie_df(self): self._check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): + import matplotlib as mpl + df = DataFrame(np.random.rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan fig, axes = self.plt.subplots(ncols=4) - df.plot.pie(subplots=True, ax=axes, legend=True) + + # GH 37668 + kwargs = {} + if mpl.__version__ >= "3.3": + kwargs = {"normalize": True} + + with tm.assert_produces_warning(None): + df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) base_expected = ["0", "1", "2", "3"] for i, ax in enumerate(axes): From 9992d249a244af056a83e6d42609b3d70f62fc91 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:17:53 +0300 Subject: [PATCH 130/147] Transfer tests of test_frame.py to test_frame_color.py --- pandas/tests/plotting/frame/test_frame.py | 945 +++++++++++++++--- .../tests/plotting/frame/test_frame_color.py | 116 +-- 2 files changed, 878 insertions(+), 183 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 0f256e623e42c..4d339b93fd30d 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -6,6 +6,7 @@ import warnings import numpy as np +from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td @@ -174,14 +175,14 @@ def test_nonnumeric_exclude(self): @pytest.mark.slow def test_implicit_label(self): - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) ax = df.plot(x="a", y="b") self._check_text_labels(ax.xaxis.get_label(), "a") @pytest.mark.slow def test_donot_overwrite_index_name(self): # GH 8494 - df = DataFrame(np.random.randn(2, 2), columns=["a", "b"]) + df = DataFrame(randn(2, 2), columns=["a", "b"]) df.index.name = "NAME" df.plot(y="b", label="LABEL") assert df.index.name == "NAME" @@ -336,9 +337,415 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + def test_negative_log(self): df = -DataFrame( - np.random.rand(6, 4), + rand(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -357,20 +764,15 @@ def _compare_stacked_y_cood(self, normal_lines, stacked_lines): def test_line_area_stacked(self): with tm.RNGContext(42): - df = DataFrame(np.random.rand(6, 4), columns=["w", "x", "y", "z"]) + df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) neg_df = -df # each column has either positive or negative value sep_df = DataFrame( - { - "w": np.random.rand(6), - "x": np.random.rand(6), - "y": -np.random.rand(6), - "z": -np.random.rand(6), - } + {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} ) # each column has positive-negative mixed value mixed_df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["w", "x", "y", "z"], ) @@ -438,7 +840,7 @@ def test_line_area_nan_df(self): tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) def test_line_lim(self): - df = DataFrame(np.random.rand(6, 3), columns=["x", "y", "z"]) + df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) ax = df.plot() xmin, xmax = ax.get_xlim() lines = ax.get_lines() @@ -462,7 +864,7 @@ def test_line_lim(self): assert xmax >= lines[0].get_data()[0][-1] def test_area_lim(self): - df = DataFrame(np.random.rand(6, 4), columns=["x", "y", "z", "four"]) + df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) neg_df = -df for stacked in [True, False]: @@ -480,7 +882,7 @@ def test_area_lim(self): @pytest.mark.slow def test_bar_linewidth(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) # regular ax = df.plot.bar(linewidth=2) @@ -501,7 +903,7 @@ def test_bar_linewidth(self): @pytest.mark.slow def test_bar_barwidth(self): - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) width = 0.9 @@ -537,9 +939,49 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + @pytest.mark.slow def test_bar_bottom_left(self): - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.bar(stacked=False, bottom=1) result = [p.get_y() for p in ax.patches] assert result == [1] * 25 @@ -585,13 +1027,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = DataFrame( + df1 = pd.DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = DataFrame( + df2 = pd.DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -615,7 +1057,7 @@ def test_bar_categorical(self): @pytest.mark.slow def test_plot_scatter(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -634,7 +1076,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = DataFrame(np.random.randn(10), columns=["a"]) + df = pd.DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -645,30 +1087,31 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = DataFrame({"dates": dates, "vals": vals}) + df = pd.DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) + @pytest.mark.slow def test_if_hexbin_xaxis_label_is_visible(self): # addressing issue #10678, to ensure colobar does not # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) @@ -679,14 +1122,16 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) + df = pd.DataFrame( + {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} + ) _check_plot_works(df.plot.scatter, x=x, y=y) @pytest.mark.slow def test_plot_scatter_with_c(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["x", "y", "z", "four"], ) @@ -742,10 +1187,43 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" + @pytest.mark.slow def test_plot_bar(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -758,9 +1236,7 @@ def test_plot_bar(self): _check_plot_works(df.plot.bar, stacked=True) df = DataFrame( - np.random.randn(10, 15), - index=list(string.ascii_letters[:10]), - columns=range(15), + randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) ) _check_plot_works(df.plot.bar) @@ -777,6 +1253,164 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -836,7 +1470,7 @@ def test_boxplot_vertical(self): @pytest.mark.slow def test_boxplot_return_type(self): df = DataFrame( - np.random.randn(6, 4), + randn(6, 4), index=list(string.ascii_letters[:6]), columns=["one", "two", "three", "four"], ) @@ -855,10 +1489,30 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(randn(100, 4)) ax = _check_plot_works(df.plot, kind="kde") expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) @@ -885,7 +1539,7 @@ def test_kde_missing_vals(self): def test_hist_df(self): from matplotlib.patches import Rectangle - df = DataFrame(np.random.randn(100, 4)) + df = DataFrame(randn(100, 4)) series = df[0] ax = _check_plot_works(df.plot.hist) @@ -923,7 +1577,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1093,16 +1747,16 @@ def test_hist_df_coord(self): @pytest.mark.slow def test_plot_int_columns(self): - df = DataFrame(np.random.randn(100, 4)).cumsum() + df = DataFrame(randn(100, 4)).cumsum() _check_plot_works(df.plot, legend=True) @pytest.mark.slow def test_df_legend_labels(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) - df2 = DataFrame(np.random.rand(3, 3), columns=["d", "e", "f"]) - df3 = DataFrame(np.random.rand(3, 3), columns=["g", "h", "i"]) - df4 = DataFrame(np.random.rand(3, 3), columns=["j", "k", "l"]) + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) for kind in kinds: @@ -1131,9 +1785,9 @@ def test_df_legend_labels(self): # Time Series ind = date_range("1/1/2014", periods=3) - df = DataFrame(np.random.randn(3, 3), columns=["a", "b", "c"], index=ind) - df2 = DataFrame(np.random.randn(3, 3), columns=["d", "e", "f"], index=ind) - df3 = DataFrame(np.random.randn(3, 3), columns=["g", "h", "i"], index=ind) + df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) ax = df.plot(legend=True, secondary_y="b") self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) ax = df2.plot(legend=False, ax=ax) @@ -1164,7 +1818,9 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) + df = pd.DataFrame( + data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] + ) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) @@ -1187,7 +1843,7 @@ def test_missing_marker_multi_plots_on_same_ax(self): def test_legend_name(self): multi = DataFrame( - np.random.randn(4, 4), + randn(4, 4), columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], ) multi.columns.names = ["group", "individual"] @@ -1196,7 +1852,7 @@ def test_legend_name(self): leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(legend=True, ax=ax) leg_title = ax.legend_.get_title() self._check_text_labels(leg_title, "group,individual") @@ -1213,7 +1869,7 @@ def test_legend_name(self): @pytest.mark.slow def test_no_legend(self): kinds = ["line", "bar", "barh", "kde", "area", "hist"] - df = DataFrame(np.random.rand(3, 3), columns=["a", "b", "c"]) + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) for kind in kinds: @@ -1226,7 +1882,7 @@ def test_style_by_column(self): fig = plt.gcf() - df = DataFrame(np.random.randn(100, 3)) + df = DataFrame(randn(100, 3)) for markers in [ {0: "^", 1: "+", 2: "o"}, {0: "^", 1: "+"}, @@ -1248,23 +1904,6 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -1299,7 +1938,7 @@ def test_all_invalid_plot_data(self): @pytest.mark.slow def test_partially_invalid_plot_data(self): with tm.RNGContext(42): - df = DataFrame(np.random.randn(10, 2), dtype=object) + df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in plotting.PlotAccessor._common_kinds: @@ -1310,14 +1949,14 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): # area plot doesn't support positive/negative mixed data kinds = ["area"] - df = DataFrame(np.random.rand(10, 2), dtype=object) + df = DataFrame(rand(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in kinds: with pytest.raises(TypeError): df.plot(kind=kind) def test_invalid_kind(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(kind="aasdf") @@ -1404,6 +2043,13 @@ def test_hexbin_cmap(self): ax = df.plot.hexbin(x="A", y="B", colormap=cm) assert ax.collections[0].cmap.name == cm + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df @@ -1452,20 +2098,11 @@ def test_pie_df(self): self._check_colors(ax.patches, facecolors=color_args) def test_pie_df_nan(self): - import matplotlib as mpl - df = DataFrame(np.random.rand(4, 4)) for i in range(4): df.iloc[i, i] = np.nan fig, axes = self.plt.subplots(ncols=4) - - # GH 37668 - kwargs = {} - if mpl.__version__ >= "3.3": - kwargs = {"normalize": True} - - with tm.assert_produces_warning(None): - df.plot.pie(subplots=True, ax=axes, legend=True, **kwargs) + df.plot.pie(subplots=True, ax=axes, legend=True) base_expected = ["0", "1", "2", "3"] for i, ax in enumerate(axes): @@ -1550,11 +2187,11 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works( df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind @@ -1631,11 +2268,11 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" - ) - with tm.assert_produces_warning(UserWarning, match=msg): + with tm.assert_produces_warning(UserWarning): + # _check_plot_works creates subplots inside, + # which leads to warnings like this: + # UserWarning: To output multiple subplots, + # the figure containing the passed axes is being cleared # Similar warnings were observed in GH #13188 axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) @@ -1852,6 +2489,53 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -1983,11 +2667,11 @@ def test_plain_axes(self): # a plain Axes object (GH11556) fig, ax = self.plt.subplots() fig.add_axes([0.2, 0.2, 0.2, 0.2]) - Series(np.random.rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=ax) # supplied ax itself is a plain Axes, but because the cmap keyword # a new ax is created for the colorbar -> also multiples axes (GH11520) - df = DataFrame({"a": np.random.randn(8), "b": np.random.randn(8)}) + df = DataFrame({"a": randn(8), "b": randn(8)}) fig = self.plt.figure() ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") @@ -1998,21 +2682,21 @@ def test_plain_axes(self): divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=cax) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=cax) fig, ax = self.plt.subplots() from mpl_toolkits.axes_grid1.inset_locator import inset_axes iax = inset_axes(ax, width="30%", height=1.0, loc=3) - Series(np.random.rand(10)).plot(ax=ax) - Series(np.random.rand(10)).plot(ax=iax) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=iax) @pytest.mark.parametrize("method", ["line", "barh", "bar"]) def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - DataFrame(np.random.randn(15, 2), columns=list("AB")) + pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2028,7 +2712,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = DataFrame( + df = pd.DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -2049,7 +2733,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -2064,7 +2748,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = DataFrame([2, 4], index=[1, 2]) + df = pd.DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -2076,7 +2760,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = DataFrame( + df = pd.DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -2096,9 +2780,25 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = pd.DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + def test_plot_no_rows(self): # GH 27758 - df = DataFrame(columns=["foo"], dtype=int) + df = pd.DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -2107,13 +2807,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = DataFrame(["a", "b", "c"]) + df = pd.DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -2123,7 +2823,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = DataFrame( + df = pd.DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -2154,7 +2854,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -2168,25 +2868,32 @@ def test_xlabel_ylabel_dataframe_single_plot( assert ax.get_xlabel() == str(new_label) @pytest.mark.parametrize( - "xlabel, ylabel", + "index_name, old_label, new_label", [ - (None, None), - ("X Label", None), - (None, "Y Label"), - ("X Label", "Y Label"), + (None, "", "new"), + ("old", "old", "new"), + (None, "", ""), + (None, "", 1), + (None, "", [1, 2]), ], ) - @pytest.mark.parametrize("kind", ["scatter", "hexbin"]) - def test_xlabel_ylabel_dataframe_plane_plot(self, kind, xlabel, ylabel): - # GH 37001 - xcol = "Type A" - ycol = "Type B" - df = DataFrame([[1, 2], [2, 5]], columns=[xcol, ycol]) - - # default is the labels are column names - ax = df.plot(kind=kind, x=xcol, y=ycol, xlabel=xlabel, ylabel=ylabel) - assert ax.get_xlabel() == (xcol if xlabel is None else xlabel) - assert ax.get_ylabel() == (ycol if ylabel is None else ylabel) + @pytest.mark.parametrize("kind", ["line", "area", "bar"]) + def test_xlabel_ylabel_dataframe_subplots( + self, kind, index_name, old_label, new_label + ): + # GH 9093 + df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df.index.name = index_name + + # default is the ylabel is not shown and xlabel is index name + axes = df.plot(kind=kind, subplots=True) + assert all(ax.get_ylabel() == "" for ax in axes) + assert all(ax.get_xlabel() == old_label for ax in axes) + + # old xlabel will be overriden and assigned ylabel will be used as ylabel + axes = df.plot(kind=kind, ylabel=new_label, xlabel=new_label, subplots=True) + assert all(ax.get_ylabel() == str(new_label) for ax in axes) + assert all(ax.get_xlabel() == str(new_label) for ax in axes) def _generate_4_axes_via_gridspec(): diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 2d509e8f3b320..41c6578743bbf 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,20 +1,30 @@ """ Test cases for DataFrame.plot """ +from datetime import date, datetime +import itertools +import string import warnings import numpy as np +from numpy.random import rand, randn import pytest import pandas.util._test_decorators as td +from pandas.core.dtypes.api import is_list_like + import pandas as pd -from pandas import DataFrame +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range import pandas._testing as tm +from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + @td.skip_if_no_mpl -class TestDataFramePlotsColor(TestPlotBase): +class TestDataFrameColor(TestPlotBase): def setup_method(self, method): TestPlotBase.setup_method(self, method) import matplotlib as mpl @@ -38,9 +48,10 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) + def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -68,7 +79,7 @@ def test_rgb_tuple_color(self): _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(np.random.randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") @@ -112,7 +123,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -145,7 +156,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = DataFrame( + df = pd.DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -166,7 +177,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -188,7 +199,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -204,7 +215,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = DataFrame( + df = pd.DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -212,45 +223,12 @@ def test_scatter_with_c_column_name_with_colors(self, cmap): ax = df.plot.scatter(x=0, y=1, c="species", cmap=cmap) assert ax.collections[0].colorbar is None - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -293,7 +271,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) + pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -303,7 +281,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -372,7 +350,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -415,7 +393,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -452,7 +430,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(np.random.rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -474,7 +452,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -542,7 +520,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(np.random.randn(5, 5)) + df = DataFrame(randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -592,6 +570,23 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes="red", xxxx="blue")) + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + def test_default_color_cycle(self): import cycler import matplotlib.pyplot as plt @@ -599,21 +594,14 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(np.random.randn(5, 3)) + df = DataFrame(randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - def test_invalid_colormap(self): - df = DataFrame(np.random.randn(3, 2), columns=["A", "B"]) + df = DataFrame(randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") @@ -623,7 +611,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -631,15 +619,15 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = DataFrame({"a": [2, 4, 6]}) + df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = pd.DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() + assert legend.get_color() == line.get_color() \ No newline at end of file From 9f63374fd2d20b2d44e5920e0cfee878eeaf824a Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Sun, 8 Nov 2020 17:58:01 +0300 Subject: [PATCH 131/147] PEP8 --- pandas/tests/plotting/frame/test_frame_color.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 41c6578743bbf..517e3c109fc5b 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,7 +48,6 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -630,4 +629,4 @@ def test_colors_of_columns_with_same_name(self): df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() \ No newline at end of file + assert legend.get_color() == line.get_color() From 488cb5a4136f86f23c05a7d5d3045e5b1c45ee2b Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 08:13:12 +0300 Subject: [PATCH 132/147] Fixes for linter --- pandas/tests/plotting/frame/test_frame_color.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 517e3c109fc5b..47a45193b2f52 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -155,7 +155,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = pd.DataFrame( + df = DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else From 13f500d350278a2682978b39d026c6686641ef2f Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 16:46:12 +0300 Subject: [PATCH 133/147] =?UTF-8?q?=D0=A1hange=20pd.DateFrame=20to=20DateF?= =?UTF-8?q?rame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/tests/plotting/frame/test_frame_color.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 47a45193b2f52..fefa342770c7f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -176,7 +176,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +198,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +214,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( + df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -270,7 +270,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -610,7 +610,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,14 +618,14 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): From 422619803a224f1f265c8d4992906db90f638351 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 2 Nov 2020 19:17:53 +0300 Subject: [PATCH 134/147] Transfer tests of test_frame.py to test_frame_color.py --- .../tests/plotting/frame/test_frame_color.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index fefa342770c7f..41c6578743bbf 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,6 +48,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) + def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -155,7 +156,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = DataFrame( + df = pd.DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -176,7 +177,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +199,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +215,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = DataFrame( + df = pd.DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -270,7 +271,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - DataFrame(np.random.rand(10, 2)).plot(color=colors) + pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -610,7 +611,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,15 +619,15 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = DataFrame({"a": [2, 4, 6]}) + df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = pd.DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() + assert legend.get_color() == line.get_color() \ No newline at end of file From d3ad0a64611291b71a90451ecddd4ed16bb39f0c Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Sun, 8 Nov 2020 17:58:01 +0300 Subject: [PATCH 135/147] PEP8 --- pandas/tests/plotting/frame/test_frame_color.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 41c6578743bbf..517e3c109fc5b 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -48,7 +48,6 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) @@ -630,4 +629,4 @@ def test_colors_of_columns_with_same_name(self): df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): - assert legend.get_color() == line.get_color() \ No newline at end of file + assert legend.get_color() == line.get_color() From be13a43e0cb4b36fccd5768b5761e048a70d12ea Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 17:40:18 +0300 Subject: [PATCH 136/147] =?UTF-8?q?=D0=A1hange=20DateFrame=20to=20pd.DateF?= =?UTF-8?q?rame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/plotting/frame/test_frame_color.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 517e3c109fc5b..136ea43f2333f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -32,7 +32,7 @@ def setup_method(self, method): mpl.rcdefaults() self.tdf = tm.makeTimeDataFrame() - self.hexbin_df = DataFrame( + self.hexbin_df = pd.DataFrame( { "A": np.random.uniform(size=20), "B": np.random.uniform(size=20), @@ -50,7 +50,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): def test_mpl2_color_cycle_str(self): # GH 15516 - df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = pd.DataFrame(randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -68,22 +68,22 @@ def test_mpl2_color_cycle_str(self): def test_color_single_series_list(self): # GH 3486 - df = DataFrame({"A": [1, 2, 3]}) + df = pd.DataFrame({"A": [1, 2, 3]}) _check_plot_works(df.plot, color=["red"]) def test_rgb_tuple_color(self): # GH 16695 - df = DataFrame({"x": [1, 2], "y": [3, 4]}) + df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = DataFrame(randn(10, 2)) + df = pd.DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") def test_color_and_style_arguments(self): - df = DataFrame({"x": [1, 2], "y": [3, 4]}) + df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) # passing both 'color' and 'style' arguments should be allowed # if there is no color symbol in the style strings: ax = df.plot(color=["red", "black"], style=["-", "--"]) @@ -107,7 +107,7 @@ def test_color_and_style_arguments(self): ) def test_color_and_marker(self, color, expected): # GH 21003 - df = DataFrame(np.random.random((7, 4))) + df = pd.DataFrame(np.random.random((7, 4))) ax = df.plot(color=color, style="d--") # check colors result = [i.get_color() for i in ax.lines] @@ -122,7 +122,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -227,7 +227,7 @@ def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -280,7 +280,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -349,7 +349,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = pd.DataFrame(rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -392,7 +392,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -429,7 +429,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = DataFrame(rand(5, 5)) + df = pd.DataFrame(rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -451,7 +451,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -519,7 +519,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = DataFrame(randn(5, 5)) + df = pd.DataFrame(randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -580,7 +580,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): ) def test_specified_props_kwd_plot_box(self, props, expected): # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) + df = pd.DataFrame({k: np.random.random(100) for k in "ABC"}) kwd = {props: dict(color="C1")} result = df.plot.box(return_type="dict", **kwd) @@ -593,14 +593,14 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = DataFrame(randn(5, 3)) + df = pd.DataFrame(randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) def test_invalid_colormap(self): - df = DataFrame(randn(3, 2), columns=["A", "B"]) + df = pd.DataFrame(randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") From 7acb39e482be309139310857305b7aa178a4fb7a Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 18:01:36 +0300 Subject: [PATCH 137/147] =?UTF-8?q?=D0=A1hange=20pd.DateFrame=20to=20DateF?= =?UTF-8?q?rame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas/tests/plotting/frame/test_frame.py | 48 ++++++++--------- .../tests/plotting/frame/test_frame_color.py | 54 +++++++++---------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 4d339b93fd30d..03fb420517340 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -549,7 +549,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): pd.to_datetime("2017-08-02 00:00:00"), ], } - testdata = pd.DataFrame(data) + testdata = DataFrame(data) ax_period = testdata.plot(x="numeric", y="period") assert ( ax_period.get_lines()[0].get_data()[1] == testdata["period"].values @@ -1027,13 +1027,13 @@ def test_bar_nan(self): @pytest.mark.slow def test_bar_categorical(self): # GH 13019 - df1 = pd.DataFrame( + df1 = DataFrame( np.random.randn(6, 5), index=pd.Index(list("ABCDEF")), columns=pd.Index(list("abcde")), ) # categorical index must behave the same - df2 = pd.DataFrame( + df2 = DataFrame( np.random.randn(6, 5), index=pd.CategoricalIndex(list("ABCDEF")), columns=pd.CategoricalIndex(list("abcde")), @@ -1076,7 +1076,7 @@ def test_plot_scatter(self): def test_raise_error_on_datetime_time_data(self): # GH 8113, datetime.time type is not supported by matplotlib in scatter - df = pd.DataFrame(np.random.randn(10), columns=["a"]) + df = DataFrame(np.random.randn(10), columns=["a"]) df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time msg = "must be a string or a number, not 'datetime.time'" @@ -1087,19 +1087,19 @@ def test_scatterplot_datetime_data(self): # GH 30391 dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") vals = np.random.normal(0, 1, len(dates)) - df = pd.DataFrame({"dates": dates, "vals": vals}) + df = DataFrame({"dates": dates, "vals": vals}) _check_plot_works(df.plot.scatter, x="dates", y="vals") _check_plot_works(df.plot.scatter, x=0, y=1) def test_scatterplot_object_data(self): # GH 18755 - df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + df = DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + df = DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) @@ -1111,7 +1111,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax = df.plot.hexbin("A label", "B label", gridsize=12) assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) @@ -1122,7 +1122,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = pd.DataFrame( + df = DataFrame( {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} ) @@ -1210,7 +1210,7 @@ def test_scatter_colorbar_different_cmap(self): # GH 33389 import matplotlib.pyplot as plt - df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) df["x2"] = df["x"] + 1 fig, ax = plt.subplots() @@ -1577,7 +1577,7 @@ def test_hist_df(self): def test_hist_weights(self, weights): # GH 33173 np.random.seed(0) - df = pd.DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) + df = DataFrame(dict(zip(["A", "B"], np.random.randn(2, 100)))) ax1 = _check_plot_works(df.plot, kind="hist", weights=weights) ax2 = _check_plot_works(df.plot, kind="hist") @@ -1818,7 +1818,7 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = pd.DataFrame( + df = DataFrame( data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] ) fig, ax = self.plt.subplots(nrows=1, ncols=3) @@ -2696,7 +2696,7 @@ def test_plain_axes(self): def test_secondary_axis_font_size(self, method): # GH: 12565 df = ( - pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) + DataFrame(np.random.randn(15, 2), columns=list("AB")) .assign(C=lambda df: df.B.cumsum()) .assign(D=lambda df: df.C * 1.1) ) @@ -2712,7 +2712,7 @@ def test_secondary_axis_font_size(self, method): def test_x_string_values_ticks(self): # Test if string plot index have a fixed xtick position # GH: 7612, GH: 22334 - df = pd.DataFrame( + df = DataFrame( { "sales": [3, 2, 3], "visits": [20, 42, 28], @@ -2733,7 +2733,7 @@ def test_x_multiindex_values_ticks(self): # Test if multiindex plot index have a fixed xtick position # GH: 15912 index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) - df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + df = DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) ax = df.plot() ax.set_xlim(-1, 4) xticklabels = [t.get_text() for t in ax.get_xticklabels()] @@ -2748,7 +2748,7 @@ def test_x_multiindex_values_ticks(self): def test_xlim_plot_line(self, kind): # test if xlim is set correctly in plot.line and plot.area # GH 27686 - df = pd.DataFrame([2, 4], index=[1, 2]) + df = DataFrame([2, 4], index=[1, 2]) ax = df.plot(kind=kind) xlims = ax.get_xlim() assert xlims[0] < 1 @@ -2760,7 +2760,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): fig, ax = self.plt.subplots() indexes = ["k1", "k2", "k3", "k4"] - df = pd.DataFrame( + df = DataFrame( { "s1": [1000, 2000, 1500, 2000], "s2": [900, 1400, 2000, 3000], @@ -2783,7 +2783,7 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): def test_subplots_sharex_false(self): # test when sharex is set to False, two plots should have different # labels, GH 25160 - df = pd.DataFrame(np.random.rand(10, 2)) + df = DataFrame(np.random.rand(10, 2)) df.iloc[5:, 1] = np.nan df.iloc[:5, 0] = np.nan @@ -2798,7 +2798,7 @@ def test_subplots_sharex_false(self): def test_plot_no_rows(self): # GH 27758 - df = pd.DataFrame(columns=["foo"], dtype=int) + df = DataFrame(columns=["foo"], dtype=int) assert df.empty ax = df.plot() assert len(ax.get_lines()) == 1 @@ -2807,13 +2807,13 @@ def test_plot_no_rows(self): assert len(line.get_ydata()) == 0 def test_plot_no_numeric_data(self): - df = pd.DataFrame(["a", "b", "c"]) + df = DataFrame(["a", "b", "c"]) with pytest.raises(TypeError): df.plot() def test_missing_markers_legend(self): # 14958 - df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) ax = df.plot(y=["A"], marker="x", linestyle="solid") df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) @@ -2823,7 +2823,7 @@ def test_missing_markers_legend(self): def test_missing_markers_legend_using_style(self): # 14563 - df = pd.DataFrame( + df = DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": [2, 4, 1, 3, 2, 4], @@ -2854,7 +2854,7 @@ def test_xlabel_ylabel_dataframe_single_plot( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name @@ -2882,7 +2882,7 @@ def test_xlabel_ylabel_dataframe_subplots( self, kind, index_name, old_label, new_label ): # GH 9093 - df = pd.DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) + df = DataFrame([[1, 2], [2, 5]], columns=["Type A", "Type B"]) df.index.name = index_name # default is the ylabel is not shown and xlabel is index name diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 136ea43f2333f..fefa342770c7f 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -32,7 +32,7 @@ def setup_method(self, method): mpl.rcdefaults() self.tdf = tm.makeTimeDataFrame() - self.hexbin_df = pd.DataFrame( + self.hexbin_df = DataFrame( { "A": np.random.uniform(size=20), "B": np.random.uniform(size=20), @@ -50,7 +50,7 @@ def _assert_xtickslabels_visibility(self, axes, expected): def test_mpl2_color_cycle_str(self): # GH 15516 - df = pd.DataFrame(randn(10, 3), columns=["a", "b", "c"]) + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) colors = ["C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9"] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", "MatplotlibDeprecationWarning") @@ -68,22 +68,22 @@ def test_mpl2_color_cycle_str(self): def test_color_single_series_list(self): # GH 3486 - df = pd.DataFrame({"A": [1, 2, 3]}) + df = DataFrame({"A": [1, 2, 3]}) _check_plot_works(df.plot, color=["red"]) def test_rgb_tuple_color(self): # GH 16695 - df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) + df = DataFrame({"x": [1, 2], "y": [3, 4]}) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) def test_color_empty_string(self): - df = pd.DataFrame(randn(10, 2)) + df = DataFrame(randn(10, 2)) with pytest.raises(ValueError): df.plot(color="") def test_color_and_style_arguments(self): - df = pd.DataFrame({"x": [1, 2], "y": [3, 4]}) + df = DataFrame({"x": [1, 2], "y": [3, 4]}) # passing both 'color' and 'style' arguments should be allowed # if there is no color symbol in the style strings: ax = df.plot(color=["red", "black"], style=["-", "--"]) @@ -107,7 +107,7 @@ def test_color_and_style_arguments(self): ) def test_color_and_marker(self, color, expected): # GH 21003 - df = pd.DataFrame(np.random.random((7, 4))) + df = DataFrame(np.random.random((7, 4))) ax = df.plot(color=color, style="d--") # check colors result = [i.get_color() for i in ax.lines] @@ -122,7 +122,7 @@ def test_bar_colors(self): default_colors = self._unpack_cycler(plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.bar() self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) tm.close() @@ -155,7 +155,7 @@ def test_bar_colors(self): tm.close() def test_bar_user_colors(self): - df = pd.DataFrame( + df = DataFrame( {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} ) # This should *only* work when `y` is specified, else @@ -176,7 +176,7 @@ def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): # interfere with x-axis label and ticklabels with # ipython inline backend. random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) ax1 = df.plot.scatter(x="A label", y="B label") ax2 = df.plot.scatter(x="A label", y="B label", c="C label") @@ -198,7 +198,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): import matplotlib.pyplot as plt random_array = np.random.random((1000, 3)) - df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + df = DataFrame(random_array, columns=["A label", "B label", "C label"]) fig, axes = plt.subplots(1, 2) df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) @@ -214,7 +214,7 @@ def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): @pytest.mark.parametrize("cmap", [None, "Greys"]) def test_scatter_with_c_column_name_with_colors(self, cmap): # https://github.com/pandas-dev/pandas/issues/34316 - df = pd.DataFrame( + df = DataFrame( [[5.1, 3.5], [4.9, 3.0], [7.0, 3.2], [6.4, 3.2], [5.9, 3.0]], columns=["length", "width"], ) @@ -227,7 +227,7 @@ def test_line_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -270,7 +270,7 @@ def test_line_colors(self): @pytest.mark.slow def test_dont_modify_colors(self): colors = ["r", "g", "b"] - pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + DataFrame(np.random.rand(10, 2)).plot(color=colors) assert len(colors) == 3 @pytest.mark.slow @@ -280,7 +280,7 @@ def test_line_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -349,7 +349,7 @@ def test_area_colors(self): from matplotlib.collections import PolyCollection custom_colors = "rgcby" - df = pd.DataFrame(rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.area(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -392,7 +392,7 @@ def test_area_colors(self): def test_hist_colors(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) ax = df.plot.hist() self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) tm.close() @@ -429,7 +429,7 @@ def test_kde_colors(self): from matplotlib import cm custom_colors = "rgcby" - df = pd.DataFrame(rand(5, 5)) + df = DataFrame(rand(5, 5)) ax = df.plot.kde(color=custom_colors) self._check_colors(ax.get_lines(), linecolors=custom_colors) @@ -451,7 +451,7 @@ def test_kde_colors_and_styles_subplots(self): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) axes = df.plot(kind="kde", subplots=True) for ax, c in zip(axes, list(default_colors)): @@ -519,7 +519,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): default_colors = self._unpack_cycler(self.plt.rcParams) - df = pd.DataFrame(randn(5, 5)) + df = DataFrame(randn(5, 5)) bp = df.plot.box(return_type="dict") _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) tm.close() @@ -580,7 +580,7 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): ) def test_specified_props_kwd_plot_box(self, props, expected): # GH 30346 - df = pd.DataFrame({k: np.random.random(100) for k in "ABC"}) + df = DataFrame({k: np.random.random(100) for k in "ABC"}) kwd = {props: dict(color="C1")} result = df.plot.box(return_type="dict", **kwd) @@ -593,14 +593,14 @@ def test_default_color_cycle(self): colors = list("rgbk") plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) - df = pd.DataFrame(randn(5, 3)) + df = DataFrame(randn(5, 3)) ax = df.plot() expected = self._unpack_cycler(plt.rcParams)[:3] self._check_colors(ax.get_lines(), linecolors=expected) def test_invalid_colormap(self): - df = pd.DataFrame(randn(3, 2), columns=["A", "B"]) + df = DataFrame(randn(3, 2), columns=["A", "B"]) with pytest.raises(ValueError): df.plot(colormap="invalid_colormap") @@ -610,7 +610,7 @@ def test_passed_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_rcParams_bar_colors(self): @@ -618,14 +618,14 @@ def test_rcParams_bar_colors(self): color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + barplot = DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] def test_colors_of_columns_with_same_name(self): # ISSUE 11136 -> https://github.com/pandas-dev/pandas/issues/11136 # Creating a DataFrame with duplicate column labels and testing colors of them. - df = pd.DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) - df1 = pd.DataFrame({"a": [2, 4, 6]}) + df = DataFrame({"b": [0, 1, 0], "a": [1, 2, 3]}) + df1 = DataFrame({"a": [2, 4, 6]}) df_concat = pd.concat([df, df1], axis=1) result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): From 9898e1f1e079c44e906a65baf58fe25fb96136de Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 19:30:44 +0300 Subject: [PATCH 138/147] Removing imports --- pandas/tests/plotting/frame/test_frame_color.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index fefa342770c7f..386482ea82ef9 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -1,8 +1,5 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime -import itertools -import string import warnings import numpy as np @@ -11,17 +8,11 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.api import is_list_like - import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import DataFrame import pandas._testing as tm -from pandas.core.arrays import integer_array from pandas.tests.plotting.common import TestPlotBase, _check_plot_works -from pandas.io.formats.printing import pprint_thing -import pandas.plotting as plotting - @td.skip_if_no_mpl class TestDataFrameColor(TestPlotBase): From 0e3edef7571eca6aa7cb239f282349f663e87d84 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 19:31:18 +0300 Subject: [PATCH 139/147] Bug fixes --- pandas/tests/plotting/frame/test_frame.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 03fb420517340..cdda5e3d5ad88 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1122,9 +1122,7 @@ def test_if_hexbin_xaxis_label_is_visible(self): @pytest.mark.slow def test_plot_scatter_with_categorical_data(self, x, y): # after fixing GH 18755, should be able to plot categorical data - df = DataFrame( - {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} - ) + df = DataFrame({"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])}) _check_plot_works(df.plot.scatter, x=x, y=y) @@ -1193,7 +1191,6 @@ def test_scatter_colors(self): df.plot.scatter(x="a", y="b", c="c", color="green") default_colors = self._unpack_cycler(self.plt.rcParams) - ax = df.plot.scatter(x="a", y="b", c="c") tm.assert_numpy_array_equal( ax.collections[0].get_facecolor()[0], @@ -1818,9 +1815,7 @@ def test_df_legend_labels(self): def test_missing_marker_multi_plots_on_same_ax(self): # GH 18222 - df = DataFrame( - data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] - ) + df = DataFrame(data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"]) fig, ax = self.plt.subplots(nrows=1, ncols=3) # Left plot df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) From 50cdcd2ba2a083e7e24029fafb1ce19eb7207292 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 19:31:18 +0300 Subject: [PATCH 140/147] Bug fixes --- pandas/tests/plotting/frame/test_frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index cdda5e3d5ad88..9b01bf4adccb5 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1104,7 +1104,6 @@ def test_scatterplot_object_data(self): _check_plot_works(df.plot.scatter, x="a", y="b") _check_plot_works(df.plot.scatter, x=0, y=1) - @pytest.mark.slow def test_if_hexbin_xaxis_label_is_visible(self): # addressing issue #10678, to ensure colobar does not From bb5d913c8ff5619d2daffa697808887837362f49 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 21:47:00 +0300 Subject: [PATCH 141/147] Fix incorrect merge --- pandas/tests/plotting/frame/test_frame.py | 743 +----------------- .../tests/plotting/frame/test_frame_color.py | 17 - 2 files changed, 17 insertions(+), 743 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 9b01bf4adccb5..d2d4e2aad24c8 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -337,412 +337,6 @@ def test_unsorted_index_lims(self): assert xmin <= np.nanmin(lines[0].get_data()[0]) assert xmax >= np.nanmax(lines[0].get_data()[0]) - @pytest.mark.slow - def test_subplots(self): - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - for kind in ["bar", "barh", "line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - assert axes.shape == (3,) - - for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[pprint_thing(column)]) - - for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - if not (kind == "bar" and self.mpl_ge_3_1_0): - # change https://github.com/pandas-dev/pandas/issues/26714 - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, sharex=False) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - - axes = df.plot(kind=kind, subplots=True, legend=False) - for ax in axes: - assert ax.get_legend() is None - - def test_groupby_boxplot_sharey(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharey can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] - self._assert_ytickslabels_visibility(axes, expected) - - def test_groupby_boxplot_sharex(self): - # https://github.com/pandas-dev/pandas/issues/20968 - # sharex can now be switched check whether the right - # pair of axes is turned on or off - - df = DataFrame( - { - "a": [-1.43, -0.15, -3.70, -1.43, -0.14], - "b": [0.56, 0.84, 0.29, 0.56, 0.85], - "c": [0, 1, 2, 3, 1], - }, - index=[0, 1, 2, 3, 4], - ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - @pytest.mark.slow - def test_subplots_timeseries(self): - idx = date_range(start="2014-07-01", freq="M", periods=10) - df = DataFrame(np.random.rand(10, 3), index=idx) - - for kind in ["line", "area"]: - axes = df.plot(kind=kind, subplots=True, sharex=True) - self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - - for ax in axes[:-2]: - # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid - self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) - self._check_visible(ax.xaxis.get_label(), visible=False) - self._check_visible(ax.get_yticklabels()) - - self._check_visible(axes[-1].xaxis) - self._check_visible(axes[-1].get_xticklabels()) - self._check_visible(axes[-1].get_xticklabels(minor=True)) - self._check_visible(axes[-1].xaxis.get_label()) - self._check_visible(axes[-1].get_yticklabels()) - self._check_ticks_props(axes, xrot=0) - - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) - for ax in axes: - self._check_visible(ax.xaxis) - self._check_visible(ax.get_xticklabels()) - self._check_visible(ax.get_xticklabels(minor=True)) - self._check_visible(ax.xaxis.get_label()) - self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) - - def test_subplots_timeseries_y_axis(self): - # GH16953 - data = { - "numeric": np.array([1, 2, 5]), - "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), - ], - "datetime_no_tz": [ - pd.to_datetime("2017-08-01 00:00:00"), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - "datetime_all_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00", utc=True), - pd.to_datetime("2017-08-02 00:00:00", utc=True), - ], - "text": ["This", "should", "fail"], - } - testdata = DataFrame(data) - - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() - - msg = "no numeric data to plot" - with pytest.raises(TypeError, match=msg): - testdata.plot(y="text") - - @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") - def test_subplots_timeseries_y_axis_not_supported(self): - """ - This test will fail for: - period: - since period isn't yet implemented in ``select_dtypes`` - and because it will need a custom value converter + - tick formatter (as was done for x-axis plots) - - categorical: - because it will need a custom value converter + - tick formatter (also doesn't work for x-axis, as of now) - - datetime_mixed_tz: - because of the way how pandas handles ``Series`` of - ``datetime`` objects with different timezone, - generally converting ``datetime`` objects in a tz-aware - form could help with this problem - """ - data = { - "numeric": np.array([1, 2, 5]), - "period": [ - pd.Period("2017-08-01 00:00:00", freq="H"), - pd.Period("2017-08-01 02:00", freq="H"), - pd.Period("2017-08-02 00:00:00", freq="H"), - ], - "categorical": pd.Categorical( - ["c", "b", "a"], categories=["a", "b", "c"], ordered=False - ), - "datetime_mixed_tz": [ - pd.to_datetime("2017-08-01 00:00:00", utc=True), - pd.to_datetime("2017-08-01 02:00:00"), - pd.to_datetime("2017-08-02 00:00:00"), - ], - } - testdata = DataFrame(data) - ax_period = testdata.plot(x="numeric", y="period") - assert ( - ax_period.get_lines()[0].get_data()[1] == testdata["period"].values - ).all() - ax_categorical = testdata.plot(x="numeric", y="categorical") - assert ( - ax_categorical.get_lines()[0].get_data()[1] - == testdata["categorical"].values - ).all() - ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") - assert ( - ax_datetime_mixed_tz.get_lines()[0].get_data()[1] - == testdata["datetime_mixed_tz"].values - ).all() - - @pytest.mark.slow - def test_subplots_layout(self): - # GH 6667 - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, layout=(2, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(-1, 2)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(2, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) - assert axes.shape == (2, 2) - - axes = df.plot(subplots=True, layout=(1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(-1, 4)) - self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) - assert axes.shape == (1, 4) - - axes = df.plot(subplots=True, layout=(4, -1)) - self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) - assert axes.shape == (4, 1) - - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(1, 1)) - with pytest.raises(ValueError): - df.plot(subplots=True, layout=(-1, -1)) - - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) - - @pytest.mark.slow - def test_subplots_warnings(self): - # GH 9464 - with tm.assert_produces_warning(None): - df = DataFrame(np.random.randn(100, 4)) - df.plot(subplots=True, layout=(3, 2)) - - df = DataFrame( - np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) - ) - df.plot(subplots=True, layout=(3, 2)) - - @pytest.mark.slow - def test_subplots_multiple_axes(self): - # GH 5353, 6970, GH 7069 - fig, axes = self.plt.subplots(2, 3) - df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) - self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) - assert returned.shape == (3,) - assert returned[0].figure is fig - self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) - tm.close() - - with pytest.raises(ValueError): - fig, axes = self.plt.subplots(2, 3) - # pass different number of axes from required - df.plot(subplots=True, ax=axes) - - # pass 2-dim axes and invalid layout - # invalid lauout should not affect to input and return value - # (show warning is tested in - # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes - fig, axes = self.plt.subplots(2, 2) - with warnings.catch_warnings(): - warnings.simplefilter("ignore", UserWarning) - df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - returned = df.plot( - subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False - ) - self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) - assert returned.shape == (4,) - - # single column - fig, axes = self.plt.subplots(1, 1) - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - - axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) - - def test_subplots_ts_share_axes(self): - # GH 3964 - fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) - self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame( - np.random.randn(10, 9), - index=date_range(start="2014-07-01", freq="M", periods=10), - ) - for i, ax in enumerate(axes.ravel()): - df[i].plot(ax=ax, fontsize=5) - - # Rows other than bottom should not be visible - for ax in axes[0:-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=False) - - # Bottom row should be visible - for ax in axes[-1].ravel(): - self._check_visible(ax.get_xticklabels(), visible=True) - - # First column should be visible - for ax in axes[[0, 1, 2], [0]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - # Other columns should not be visible - for ax in axes[[0, 1, 2], [1]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - for ax in axes[[0, 1, 2], [2]].ravel(): - self._check_visible(ax.get_yticklabels(), visible=False) - - def test_subplots_sharex_axes_existing_axes(self): - # GH 9158 - d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} - df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) - - axes = df[["A", "B"]].plot(subplots=True) - df["C"].plot(ax=axes[0], secondary_y=True) - - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - for ax in axes.ravel(): - self._check_visible(ax.get_yticklabels(), visible=True) - - @pytest.mark.slow - def test_subplots_dup_columns(self): - # GH 10962 - df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) - axes = df.plot(subplots=True) - for ax in axes: - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - axes = df.plot(subplots=True, secondary_y="a") - for ax in axes: - # (right) is only attached when subplots=False - self._check_legend_labels(ax, labels=["a"]) - assert len(ax.lines) == 1 - tm.close() - - ax = df.plot(secondary_y="a") - self._check_legend_labels(ax, labels=["a (right)"] * 5) - assert len(ax.lines) == 0 - assert len(ax.right_ax.lines) == 5 - def test_negative_log(self): df = -DataFrame( rand(6, 4), @@ -939,46 +533,6 @@ def test_bar_barwidth(self): for r in ax.patches: assert r.get_height() == width - @pytest.mark.slow - def test_bar_barwidth_position(self): - df = DataFrame(randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) - - @pytest.mark.slow - def test_bar_barwidth_position_int(self): - # GH 12979 - df = DataFrame(randn(5, 5)) - - for w in [1, 1.0]: - ax = df.plot.bar(stacked=True, width=w) - ticks = ax.xaxis.get_ticklocs() - tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) - assert ax.get_xlim() == (-0.75, 4.75) - # check left-edge of bars - assert ax.patches[0].get_x() == -0.5 - assert ax.patches[-1].get_x() == 3.5 - - self._check_bar_alignment(df, kind="bar", stacked=True, width=1) - self._check_bar_alignment(df, kind="barh", stacked=False, width=1) - self._check_bar_alignment(df, kind="barh", stacked=True, width=1) - self._check_bar_alignment(df, kind="bar", subplots=True, width=1) - self._check_bar_alignment(df, kind="barh", subplots=True, width=1) - @pytest.mark.slow def test_bar_bottom_left(self): df = DataFrame(rand(5, 5)) @@ -1184,38 +738,6 @@ def test_plot_scatter_with_s(self): ax = df.plot.scatter(x="a", y="b", s="c") tm.assert_numpy_array_equal(df["c"].values, right=ax.collections[0].get_sizes()) - def test_scatter_colors(self): - df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) - with pytest.raises(TypeError): - df.plot.scatter(x="a", y="b", c="c", color="green") - - default_colors = self._unpack_cycler(self.plt.rcParams) - ax = df.plot.scatter(x="a", y="b", c="c") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array(self.colorconverter.to_rgba(default_colors[0])), - ) - - ax = df.plot.scatter(x="a", y="b", color="white") - tm.assert_numpy_array_equal( - ax.collections[0].get_facecolor()[0], - np.array([1, 1, 1, 1], dtype=np.float64), - ) - - def test_scatter_colorbar_different_cmap(self): - # GH 33389 - import matplotlib.pyplot as plt - - df = DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) - df["x2"] = df["x"] + 1 - - fig, ax = plt.subplots() - df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) - df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) - - assert ax.collections[0].cmap.name == "cividis" - assert ax.collections[1].cmap.name == "magma" - @pytest.mark.slow def test_plot_bar(self): df = DataFrame( @@ -1249,164 +771,6 @@ def test_plot_bar(self): ax = df.plot.barh(rot=55, fontsize=11) self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) - def _check_bar_alignment( - self, - df, - kind="bar", - stacked=False, - subplots=False, - align="center", - width=0.5, - position=0.5, - ): - - axes = df.plot( - kind=kind, - stacked=stacked, - subplots=subplots, - align=align, - width=width, - position=position, - grid=True, - ) - - axes = self._flatten_visible(axes) - - for ax in axes: - if kind == "bar": - axis = ax.xaxis - ax_min, ax_max = ax.get_xlim() - min_edge = min(p.get_x() for p in ax.patches) - max_edge = max(p.get_x() + p.get_width() for p in ax.patches) - elif kind == "barh": - axis = ax.yaxis - ax_min, ax_max = ax.get_ylim() - min_edge = min(p.get_y() for p in ax.patches) - max_edge = max(p.get_y() + p.get_height() for p in ax.patches) - else: - raise ValueError - - # GH 7498 - # compare margins between lim and bar edges - tm.assert_almost_equal(ax_min, min_edge - 0.25) - tm.assert_almost_equal(ax_max, max_edge + 0.25) - - p = ax.patches[0] - if kind == "bar" and (stacked is True or subplots is True): - edge = p.get_x() - center = edge + p.get_width() * position - elif kind == "bar" and stacked is False: - center = p.get_x() + p.get_width() * len(df.columns) * position - edge = p.get_x() - elif kind == "barh" and (stacked is True or subplots is True): - center = p.get_y() + p.get_height() * position - edge = p.get_y() - elif kind == "barh" and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position - edge = p.get_y() - else: - raise ValueError - - # Check the ticks locates on integer - assert (axis.get_ticklocs() == np.arange(len(df))).all() - - if align == "center": - # Check whether the bar locates on center - tm.assert_almost_equal(axis.get_ticklocs()[0], center) - elif align == "edge": - # Check whether the bar's edge starts from the tick - tm.assert_almost_equal(axis.get_ticklocs()[0], edge) - else: - raise ValueError - - return axes - - @pytest.mark.slow - def test_bar_stacked_center(self): - # GH2157 - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) - - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - - @pytest.mark.slow - def test_bar_align_single_column(self): - df = DataFrame(randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) - - @pytest.mark.slow - def test_bar_log_no_subplots(self): - # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 - # regressions in 1.2.1 - expected = np.array([0.1, 1.0, 10.0, 100]) - - # no subplots - df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) - ax = df.plot.bar(grid=True, log=True) - tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) - - @pytest.mark.slow - def test_bar_log_subplots(self): - expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) - - ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( - log=True, subplots=True - ) - - tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) - tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) - @pytest.mark.slow def test_boxplot(self): df = self.hist_df @@ -1485,26 +849,6 @@ def test_boxplot_return_type(self): result = df.plot.box(return_type="both") self._check_box_return_type(result, "both") - @pytest.mark.slow - def test_boxplot_subplots_return_type(self): - df = self.hist_df - - # normal style: return_type=None - result = df.plot.box(subplots=True) - assert isinstance(result, Series) - self._check_box_return_type( - result, None, expected_keys=["height", "weight", "category"] - ) - - for t in ["dict", "axes", "both"]: - returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type( - returned, - t, - expected_keys=["height", "weight", "category"], - check_ax_title=False, - ) - @pytest.mark.slow @td.skip_if_no_scipy def test_kde_df(self): @@ -1898,6 +1242,23 @@ def test_line_label_none(self): ax = s.plot(legend=True) assert ax.get_legend().get_texts()[0].get_text() == "None" + @pytest.mark.parametrize( + "props, expected", + [ + ("boxprops", "boxes"), + ("whiskerprops", "whiskers"), + ("capprops", "caps"), + ("medianprops", "medians"), + ], + ) + def test_specified_props_kwd_plot_box(self, props, expected): + # GH 30346 + df = DataFrame({k: np.random.random(100) for k in "ABC"}) + kwd = {props: dict(color="C1")} + result = df.plot.box(return_type="dict", **kwd) + + assert result[expected][0].get_color() == "C1" + def test_unordered_ts(self): df = DataFrame( np.array([3.0, 2.0, 1.0]), @@ -2037,13 +1398,6 @@ def test_hexbin_cmap(self): ax = df.plot.hexbin(x="A", y="B", colormap=cm) assert ax.collections[0].cmap.name == cm - @pytest.mark.slow - def test_no_color_bar(self): - df = self.hexbin_df - - ax = df.plot.hexbin(x="A", y="B", colorbar=None) - assert ax.collections[0].colorbar is None - @pytest.mark.slow def test_allow_cmap(self): df = self.hexbin_df @@ -2483,53 +1837,6 @@ def test_memory_leak(self): # need to actually access something to get an error results[key].lines - @pytest.mark.slow - def test_df_subplots_patterns_minorticks(self): - # GH 10657 - import matplotlib.pyplot as plt - - df = DataFrame( - np.random.randn(10, 2), - index=date_range("1/1/2000", periods=10), - columns=list("AB"), - ) - - # shared subplots - fig, axes = plt.subplots(2, 1, sharex=True) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - fig, axes = plt.subplots(2, 1) - with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=axes, sharex=True) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - # xaxis of 1st ax must be hidden - self._check_visible(axes[0].get_xticklabels(), visible=False) - self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) - self._check_visible(axes[1].get_xticklabels(), visible=True) - self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) - tm.close() - - # not shared - fig, axes = plt.subplots(2, 1) - axes = df.plot(subplots=True, ax=axes) - for ax in axes: - assert len(ax.lines) == 1 - self._check_visible(ax.get_yticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) - tm.close() - @pytest.mark.slow def test_df_gridspec_patterns(self): # GH 10819 @@ -2774,22 +2081,6 @@ def test_xlim_plot_line_correctly_in_mixed_plot_type(self): xticklabels = [t.get_text() for t in ax.get_xticklabels()] assert xticklabels == indexes - def test_subplots_sharex_false(self): - # test when sharex is set to False, two plots should have different - # labels, GH 25160 - df = DataFrame(np.random.rand(10, 2)) - df.iloc[5:, 1] = np.nan - df.iloc[:5, 0] = np.nan - - figs, axs = self.plt.subplots(2, 1) - df.plot.line(ax=axs, subplots=True, sharex=False) - - expected_ax1 = np.arange(4.5, 10, 0.5) - expected_ax2 = np.arange(-0.5, 5, 0.5) - - tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) - tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) - def test_plot_no_rows(self): # GH 27758 df = DataFrame(columns=["foo"], dtype=int) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 386482ea82ef9..a1c32b941a2f2 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -560,23 +560,6 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): # Color contains invalid key results in ValueError df.plot.box(color=dict(boxes="red", xxxx="blue")) - @pytest.mark.parametrize( - "props, expected", - [ - ("boxprops", "boxes"), - ("whiskerprops", "whiskers"), - ("capprops", "caps"), - ("medianprops", "medians"), - ], - ) - def test_specified_props_kwd_plot_box(self, props, expected): - # GH 30346 - df = DataFrame({k: np.random.random(100) for k in "ABC"}) - kwd = {props: dict(color="C1")} - result = df.plot.box(return_type="dict", **kwd) - - assert result[expected][0].get_color() == "C1" - def test_default_color_cycle(self): import cycler import matplotlib.pyplot as plt From d4d6f91d3cd785f281764b5754134b89ea41f1f7 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Mon, 9 Nov 2020 22:43:13 +0300 Subject: [PATCH 142/147] test_frame_color.py edit --- .../tests/plotting/frame/test_frame_color.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index a1c32b941a2f2..2cf327a85c6a7 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -604,3 +604,43 @@ def test_colors_of_columns_with_same_name(self): result = df_concat.plot() for legend, line in zip(result.get_legend().legendHandles, result.lines): assert legend.get_color() == line.get_color() + + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + def test_scatter_colorbar_different_cmap(self): + # GH 33389 + import matplotlib.pyplot as plt + + df = pd.DataFrame({"x": [1, 2, 3], "y": [1, 3, 2], "c": [1, 2, 3]}) + df["x2"] = df["x"] + 1 + + fig, ax = plt.subplots() + df.plot("x", "y", c="c", kind="scatter", cmap="cividis", ax=ax) + df.plot("x2", "y", c="c", kind="scatter", cmap="magma", ax=ax) + + assert ax.collections[0].cmap.name == "cividis" + assert ax.collections[1].cmap.name == "magma" From 77bda1adbf4dea6a89af51c48c5fa165eb3ca65a Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Tue, 10 Nov 2020 01:50:16 +0300 Subject: [PATCH 143/147] Fix merge error --- pandas/tests/indexing/test_loc.py | 176 ------------------------------ 1 file changed, 176 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 74b40bc274cfb..26c9e127bcc10 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1559,182 +1559,6 @@ def test_loc_setitem_mask_and_label_with_datetimeindex(self): tm.assert_frame_equal(df, expected) -class TestLocSetitemWithExpansion: - @pytest.mark.slow - def test_loc_setitem_with_expansion_large_dataframe(self): - # GH#10692 - result = DataFrame({"x": range(10 ** 6)}, dtype="int64") - result.loc[len(result)] = len(result) + 1 - expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") - tm.assert_frame_equal(result, expected) - - -class TestLocCallable: - def test_frame_loc_getitem_callable(self): - # GH#11485 - df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) - # iloc cannot use boolean Series (see GH3635) - - # return bool indexer - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.B == "b", :] - tm.assert_frame_equal(res, df.loc[df.B == "b", :]) - - res = df.loc[lambda x: x.B == "b", :] - tm.assert_frame_equal(res, df.loc[df.B == "b", :]) - - res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] - tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) - - res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] - tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) - - res = df.loc[lambda x: x.A > 2, lambda x: "B"] - tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) - - res = df.loc[lambda x: x.A > 2, lambda x: "B"] - tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) - - res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) - - # scalar - res = df.loc[lambda x: 1, lambda x: "A"] - assert res == df.loc[1, "A"] - - res = df.loc[lambda x: 1, lambda x: "A"] - assert res == df.loc[1, "A"] - - def test_frame_loc_getitem_callable_mixture(self): - # GH#11485 - df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) - - res = df.loc[lambda x: x.A > 2, ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A > 2, ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[[2, 3], lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) - - res = df.loc[[2, 3], lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) - - res = df.loc[3, lambda x: ["A", "B"]] - tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) - - res = df.loc[3, lambda x: ["A", "B"]] - tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) - - def test_frame_loc_getitem_callable_labels(self): - # GH#11485 - df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return label - res = df.loc[lambda x: ["A", "C"]] - tm.assert_frame_equal(res, df.loc[["A", "C"]]) - - res = df.loc[lambda x: ["A", "C"]] - tm.assert_frame_equal(res, df.loc[["A", "C"]]) - - res = df.loc[lambda x: ["A", "C"], :] - tm.assert_frame_equal(res, df.loc[["A", "C"], :]) - - res = df.loc[lambda x: ["A", "C"], lambda x: "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - # mixture - res = df.loc[["A", "C"], lambda x: "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[["A", "C"], lambda x: ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - res = df.loc[lambda x: ["A", "C"], "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[lambda x: ["A", "C"], ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - def test_frame_loc_setitem_callable(self): - # GH#11485 - df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return label - res = df.copy() - res.loc[lambda x: ["A", "C"]] = -20 - exp = df.copy() - exp.loc[["A", "C"]] = -20 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], :] = 20 - exp = df.copy() - exp.loc[["A", "C"], :] = 20 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 - exp = df.copy() - exp.loc[["A", "C"], "X"] = -1 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = [5, 10] - tm.assert_frame_equal(res, exp) - - # mixture - res = df.copy() - res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) - exp = df.copy() - exp.loc[["A", "C"], "X"] = np.array([-1, -2]) - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[["A", "C"], lambda x: ["X"]] = 10 - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = 10 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], "X"] = -2 - exp = df.copy() - exp.loc[["A", "C"], "X"] = -2 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], ["X"]] = -4 - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = -4 - tm.assert_frame_equal(res, exp) - - def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( From 79741531d4a79517461dcebf504b163157b857af Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Tue, 10 Nov 2020 01:57:06 +0300 Subject: [PATCH 144/147] Fix merge error --- pandas/tests/indexing/test_loc.py | 176 ------------------------------ 1 file changed, 176 deletions(-) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 74b40bc274cfb..26c9e127bcc10 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1559,182 +1559,6 @@ def test_loc_setitem_mask_and_label_with_datetimeindex(self): tm.assert_frame_equal(df, expected) -class TestLocSetitemWithExpansion: - @pytest.mark.slow - def test_loc_setitem_with_expansion_large_dataframe(self): - # GH#10692 - result = DataFrame({"x": range(10 ** 6)}, dtype="int64") - result.loc[len(result)] = len(result) + 1 - expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") - tm.assert_frame_equal(result, expected) - - -class TestLocCallable: - def test_frame_loc_getitem_callable(self): - # GH#11485 - df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) - # iloc cannot use boolean Series (see GH3635) - - # return bool indexer - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.A > 2] - tm.assert_frame_equal(res, df.loc[df.A > 2]) - - res = df.loc[lambda x: x.B == "b", :] - tm.assert_frame_equal(res, df.loc[df.B == "b", :]) - - res = df.loc[lambda x: x.B == "b", :] - tm.assert_frame_equal(res, df.loc[df.B == "b", :]) - - res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] - tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) - - res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] - tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) - - res = df.loc[lambda x: x.A > 2, lambda x: "B"] - tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) - - res = df.loc[lambda x: x.A > 2, lambda x: "B"] - tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) - - res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) - - # scalar - res = df.loc[lambda x: 1, lambda x: "A"] - assert res == df.loc[1, "A"] - - res = df.loc[lambda x: 1, lambda x: "A"] - assert res == df.loc[1, "A"] - - def test_frame_loc_getitem_callable_mixture(self): - # GH#11485 - df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) - - res = df.loc[lambda x: x.A > 2, ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[lambda x: x.A > 2, ["A", "B"]] - tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) - - res = df.loc[[2, 3], lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) - - res = df.loc[[2, 3], lambda x: ["A", "B"]] - tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) - - res = df.loc[3, lambda x: ["A", "B"]] - tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) - - res = df.loc[3, lambda x: ["A", "B"]] - tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) - - def test_frame_loc_getitem_callable_labels(self): - # GH#11485 - df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return label - res = df.loc[lambda x: ["A", "C"]] - tm.assert_frame_equal(res, df.loc[["A", "C"]]) - - res = df.loc[lambda x: ["A", "C"]] - tm.assert_frame_equal(res, df.loc[["A", "C"]]) - - res = df.loc[lambda x: ["A", "C"], :] - tm.assert_frame_equal(res, df.loc[["A", "C"], :]) - - res = df.loc[lambda x: ["A", "C"], lambda x: "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - # mixture - res = df.loc[["A", "C"], lambda x: "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[["A", "C"], lambda x: ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - res = df.loc[lambda x: ["A", "C"], "X"] - tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) - - res = df.loc[lambda x: ["A", "C"], ["X"]] - tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) - - def test_frame_loc_setitem_callable(self): - # GH#11485 - df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) - - # return label - res = df.copy() - res.loc[lambda x: ["A", "C"]] = -20 - exp = df.copy() - exp.loc[["A", "C"]] = -20 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], :] = 20 - exp = df.copy() - exp.loc[["A", "C"], :] = 20 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 - exp = df.copy() - exp.loc[["A", "C"], "X"] = -1 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = [5, 10] - tm.assert_frame_equal(res, exp) - - # mixture - res = df.copy() - res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) - exp = df.copy() - exp.loc[["A", "C"], "X"] = np.array([-1, -2]) - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[["A", "C"], lambda x: ["X"]] = 10 - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = 10 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], "X"] = -2 - exp = df.copy() - exp.loc[["A", "C"], "X"] = -2 - tm.assert_frame_equal(res, exp) - - res = df.copy() - res.loc[lambda x: ["A", "C"], ["X"]] = -4 - exp = df.copy() - exp.loc[["A", "C"], ["X"]] = -4 - tm.assert_frame_equal(res, exp) - - def test_series_loc_getitem_label_list_missing_values(): # gh-11428 key = np.array( From 4ff3f559da82697d35ce48b104782d0c47a8bb64 Mon Sep 17 00:00:00 2001 From: dezmond22 Date: Tue, 10 Nov 2020 13:26:59 +0300 Subject: [PATCH 145/147] Removing unnecessary features --- pandas/tests/plotting/frame/test_frame.py | 8 -------- pandas/tests/plotting/frame/test_frame_color.py | 8 -------- pandas/tests/plotting/frame/test_frame_subplots.py | 8 -------- 3 files changed, 24 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index ee9e98fb7f3b8..f36ae47a4ce69 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -39,14 +39,6 @@ def setup_method(self, method): } ) - def _assert_ytickslabels_visibility(self, axes, expected): - for ax, exp in zip(axes, expected): - self._check_visible(ax.get_yticklabels(), visible=exp) - - def _assert_xtickslabels_visibility(self, axes, expected): - for ax, exp in zip(axes, expected): - self._check_visible(ax.get_xticklabels(), visible=exp) - @pytest.mark.slow def test_plot(self): from pandas.plotting._matplotlib.compat import mpl_ge_3_1_0 diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 74eb87862b9d1..b9b50523fba25 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -30,14 +30,6 @@ def setup_method(self, method): } ) - def _assert_ytickslabels_visibility(self, axes, expected): - for ax, exp in zip(axes, expected): - self._check_visible(ax.get_yticklabels(), visible=exp) - - def _assert_xtickslabels_visibility(self, axes, expected): - for ax, exp in zip(axes, expected): - self._check_visible(ax.get_xticklabels(), visible=exp) - def test_mpl2_color_cycle_str(self): # GH 15516 df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 4a9f85d61ba2a..71a365eb80171 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -33,14 +33,6 @@ def setup_method(self, method): } ) - def _assert_ytickslabels_visibility(self, axes, expected): - for ax, exp in zip(axes, expected): - self._check_visible(ax.get_yticklabels(), visible=exp) - - def _assert_xtickslabels_visibility(self, axes, expected): - for ax, exp in zip(axes, expected): - self._check_visible(ax.get_xticklabels(), visible=exp) - @pytest.mark.slow def test_subplots(self): df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) From f8dc0e13b5bb7651d86ce4f4e60bbd27cb00002d Mon Sep 17 00:00:00 2001 From: Mikhaylov-yv Date: Wed, 11 Nov 2020 12:03:42 +0300 Subject: [PATCH 146/147] Resolving Commit Conflicts daf999f 365d843 --- pandas/tests/plotting/frame/test_frame.py | 105 ++++++---- .../tests/plotting/frame/test_frame_color.py | 6 +- .../plotting/frame/test_frame_groupby.py | 60 +++--- .../plotting/frame/test_frame_subplots.py | 189 ++++++++---------- 4 files changed, 182 insertions(+), 178 deletions(-) diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index f36ae47a4ce69..3c43e0b693a1b 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -45,17 +45,25 @@ def test_plot(self): df = self.tdf _check_plot_works(df.plot, grid=False) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, subplots=True) + + # _check_plot_works adds an ax so use default_axes=True to avoid warning + axes = _check_plot_works(df.plot, default_axes=True, subplots=True) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, subplots=True, layout=(-1, 2)) + axes = _check_plot_works( + df.plot, + default_axes=True, + subplots=True, + layout=(-1, 2), + ) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, subplots=True, use_index=False) + axes = _check_plot_works( + df.plot, + default_axes=True, + subplots=True, + use_index=False, + ) self._check_ticks_props(axes, xrot=0) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) @@ -76,8 +84,7 @@ def test_plot(self): _check_plot_works(df.plot, xticks=[1, 5, 10]) _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) - with tm.assert_produces_warning(UserWarning): - _check_plot_works(df.plot, subplots=True, title="blah") + _check_plot_works(df.plot, default_axes=True, subplots=True, title="blah") # We have to redo it here because _check_plot_works does two plots, # once without an ax kwarg and once with an ax kwarg and the new sharex @@ -731,9 +738,7 @@ def test_plot_bar(self): _check_plot_works(df.plot.bar) _check_plot_works(df.plot.bar, legend=False) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - _check_plot_works(df.plot.bar, subplots=True) + _check_plot_works(df.plot.bar, default_axes=True, subplots=True) _check_plot_works(df.plot.bar, stacked=True) df = DataFrame( @@ -797,9 +802,13 @@ def test_boxplot_vertical(self): self._check_text_labels(ax.get_yticklabels(), labels) assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot.box, subplots=True, vert=False, logx=True) + axes = _check_plot_works( + df.plot.box, + default_axes=True, + subplots=True, + vert=False, + logx=True, + ) self._check_axes_shape(axes, axes_num=3, layout=(1, 3)) self._check_ax_scales(axes, xaxis="log") for ax, label in zip(axes, labels): @@ -846,8 +855,12 @@ def test_kde_df(self): ax = df.plot(kind="kde", rot=20, fontsize=5) self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot, kind="kde", subplots=True) + axes = _check_plot_works( + df.plot, + default_axes=True, + kind="kde", + subplots=True, + ) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) axes = df.plot(kind="kde", logy=True, subplots=True) @@ -871,8 +884,12 @@ def test_hist_df(self): expected = [pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot.hist, subplots=True, logy=True) + axes = _check_plot_works( + df.plot.hist, + default_axes=True, + subplots=True, + logy=True, + ) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) self._check_ax_scales(axes, yaxis="log") @@ -1400,9 +1417,11 @@ def test_pie_df(self): ax = _check_plot_works(df.plot.pie, y=2) self._check_text_labels(ax.texts, df.index) - # _check_plot_works adds an ax so catch warning. see GH #13188 - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works(df.plot.pie, subplots=True) + axes = _check_plot_works( + df.plot.pie, + default_axes=True, + subplots=True, + ) assert len(axes) == len(df.columns) for ax in axes: self._check_text_labels(ax.texts, df.index) @@ -1411,10 +1430,13 @@ def test_pie_df(self): labels = ["A", "B", "C", "D", "E"] color_args = ["r", "g", "b", "c", "m"] - with tm.assert_produces_warning(UserWarning): - axes = _check_plot_works( - df.plot.pie, subplots=True, labels=labels, colors=color_args - ) + axes = _check_plot_works( + df.plot.pie, + default_axes=True, + subplots=True, + labels=labels, + colors=color_args, + ) assert len(axes) == len(df.columns) for ax in axes: @@ -1521,16 +1543,15 @@ def test_errorbar_plot_different_kinds(self, kind): ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" + axes = _check_plot_works( + df.plot, + default_axes=True, + yerr=df_err, + xerr=df_err, + subplots=True, + kind=kind, ) - with tm.assert_produces_warning(UserWarning, match=msg): - # Similar warnings were observed in GH #13188 - axes = _check_plot_works( - df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind - ) - self._check_has_errorbars(axes, xerr=1, yerr=1) + self._check_has_errorbars(axes, xerr=1, yerr=1) @pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError) @pytest.mark.slow @@ -1602,14 +1623,14 @@ def test_errorbar_timeseries(self, kind): ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - msg = ( - "To output multiple subplots, " - "the figure containing the passed axes is being cleared" + axes = _check_plot_works( + tdf.plot, + default_axes=True, + kind=kind, + yerr=tdf_err, + subplots=True, ) - with tm.assert_produces_warning(UserWarning, match=msg): - # Similar warnings were observed in GH #13188 - axes = _check_plot_works(tdf.plot, kind=kind, yerr=tdf_err, subplots=True) - self._check_has_errorbars(axes, xerr=0, yerr=1) + self._check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): np.random.seed(0) diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index b9b50523fba25..d9fe7363a15ad 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -53,11 +53,11 @@ def test_color_single_series_list(self): df = DataFrame({"A": [1, 2, 3]}) _check_plot_works(df.plot, color=["red"]) - def test_rgb_tuple_color(self): + @pytest.mark.parametrize("color", [(1, 0, 0), (1, 0, 0, 0.5)]) + def test_rgb_tuple_color(self, color): # GH 16695 df = DataFrame({"x": [1, 2], "y": [3, 4]}) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) - _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) + _check_plot_works(df.plot, x="x", y="y", color=color) def test_color_empty_string(self): df = DataFrame(np.random.randn(10, 2)) diff --git a/pandas/tests/plotting/frame/test_frame_groupby.py b/pandas/tests/plotting/frame/test_frame_groupby.py index 06ce0d5076d69..9c1676d6d97fb 100644 --- a/pandas/tests/plotting/frame/test_frame_groupby.py +++ b/pandas/tests/plotting/frame/test_frame_groupby.py @@ -1,6 +1,7 @@ """ Test cases for DataFrame.plot """ import numpy as np +import pytest import pandas.util._test_decorators as td @@ -34,11 +35,21 @@ def _assert_xtickslabels_visibility(self, axes, expected): for ax, exp in zip(axes, expected): self._check_visible(ax.get_xticklabels(), visible=exp) - def test_groupby_boxplot_sharey(self): + @pytest.mark.parametrize( + "kwargs, expected", + [ + # behavior without keyword + ({}, [True, False, True, False]), + # set sharey=True should be identical + ({"sharey": True}, [True, False, True, False]), + # sharey=False, all yticklabels should be visible + ({"sharey": False}, [True, True, True, True]), + ], + ) + def test_groupby_boxplot_sharey(self, kwargs, expected): # https://github.com/pandas-dev/pandas/issues/20968 # sharey can now be switched check whether the right # pair of axes is turned on or off - df = DataFrame( { "a": [-1.43, -0.15, -3.70, -1.43, -0.14], @@ -47,23 +58,22 @@ def test_groupby_boxplot_sharey(self): }, index=[0, 1, 2, 3, 4], ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # set sharey=True should be identical - axes = df.groupby("c").boxplot(sharey=True) - expected = [True, False, True, False] - self._assert_ytickslabels_visibility(axes, expected) - - # sharey=False, all yticklabels should be visible - axes = df.groupby("c").boxplot(sharey=False) - expected = [True, True, True, True] + axes = df.groupby("c").boxplot(**kwargs) self._assert_ytickslabels_visibility(axes, expected) - def test_groupby_boxplot_sharex(self): + @pytest.mark.parametrize( + "kwargs, expected", + [ + # behavior without keyword + ({}, [True, True, True, True]), + # set sharex=False should be identical + ({"sharex": False}, [True, True, True, True]), + # sharex=True, xticklabels should be visible + # only for bottom plots + ({"sharex": True}, [False, False, True, True]), + ], + ) + def test_groupby_boxplot_sharex(self, kwargs, expected): # https://github.com/pandas-dev/pandas/issues/20968 # sharex can now be switched check whether the right # pair of axes is turned on or off @@ -76,19 +86,5 @@ def test_groupby_boxplot_sharex(self): }, index=[0, 1, 2, 3, 4], ) - - # behavior without keyword - axes = df.groupby("c").boxplot() - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # set sharex=False should be identical - axes = df.groupby("c").boxplot(sharex=False) - expected = [True, True, True, True] - self._assert_xtickslabels_visibility(axes, expected) - - # sharex=True, yticklabels should be visible - # only for bottom plots - axes = df.groupby("c").boxplot(sharex=True) - expected = [False, False, True, True] + axes = df.groupby("c").boxplot(**kwargs) self._assert_xtickslabels_visibility(axes, expected) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 71a365eb80171..58f5d89ad15ca 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -128,24 +128,12 @@ def test_subplots_timeseries_y_axis(self): } testdata = DataFrame(data) - ax_numeric = testdata.plot(y="numeric") - assert ( - ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values - ).all() - ax_timedelta = testdata.plot(y="timedelta") - assert ( - ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values - ).all() - ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") - assert ( - ax_datetime_no_tz.get_lines()[0].get_data()[1] - == testdata["datetime_no_tz"].values - ).all() - ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") - assert ( - ax_datetime_all_tz.get_lines()[0].get_data()[1] - == testdata["datetime_all_tz"].values - ).all() + y_cols = ["numeric", "timedelta", "datetime_no_tz", "datetime_all_tz"] + for col in y_cols: + ax = testdata.plot(y=col) + result = ax.get_lines()[0].get_data()[1] + expected = testdata[col].values + assert (result == expected).all() msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): @@ -203,7 +191,7 @@ def test_subplots_timeseries_y_axis_not_supported(self): ).all() @pytest.mark.slow - def test_subplots_layout(self): + def test_subplots_layout_multi_column(self): # GH 6667 df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) @@ -236,15 +224,27 @@ def test_subplots_layout(self): with pytest.raises(ValueError): df.plot(subplots=True, layout=(-1, -1)) - # single column - df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) - axes = df.plot(subplots=True) - self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - assert axes.shape == (1,) + @pytest.mark.slow + @pytest.mark.parametrize( + "kwargs, expected_axes_num, expected_layout, expected_shape", + [ + ({}, 1, (1, 1), (1,)), + ({"layout": (3, 3)}, 1, (3, 3), (3, 3)), + ], + ) + def test_subplots_layout_single_column( + self, kwargs, expected_axes_num, expected_layout, expected_shape + ): - axes = df.plot(subplots=True, layout=(3, 3)) - self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) - assert axes.shape == (3, 3) + # GH 6667 + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True, **kwargs) + self._check_axes_shape( + axes, + axes_num=expected_axes_num, + layout=expected_layout, + ) + assert axes.shape == expected_shape @pytest.mark.slow def test_subplots_warnings(self): @@ -380,14 +380,6 @@ def test_subplots_dup_columns(self): assert len(ax.lines) == 0 assert len(ax.right_ax.lines) == 5 - @pytest.mark.slow - def test_bar_subplots_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) - self._check_bar_alignment(df, kind="barh", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) - @pytest.mark.slow def test_bar_log_no_subplots(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 @@ -522,80 +514,75 @@ def test_xlabel_ylabel_dataframe_subplots( assert all(ax.get_xlabel() == str(new_label) for ax in axes) @pytest.mark.slow - def test_bar_stacked_center(self): + @pytest.mark.parametrize( + "kwargs", + [ + # stacked center + dict(kind="bar", stacked=True), + dict(kind="bar", stacked=True, width=0.9), + dict(kind="barh", stacked=True), + dict(kind="barh", stacked=True, width=0.9), + # center + dict(kind="bar", stacked=False), + dict(kind="bar", stacked=False, width=0.9), + dict(kind="barh", stacked=False), + dict(kind="barh", stacked=False, width=0.9), + # subplots center + dict(kind="bar", subplots=True), + dict(kind="bar", subplots=True, width=0.9), + dict(kind="barh", subplots=True), + dict(kind="barh", subplots=True, width=0.9), + # align edge + dict(kind="bar", stacked=True, align="edge"), + dict(kind="bar", stacked=True, width=0.9, align="edge"), + dict(kind="barh", stacked=True, align="edge"), + dict(kind="barh", stacked=True, width=0.9, align="edge"), + dict(kind="bar", stacked=False, align="edge"), + dict(kind="bar", stacked=False, width=0.9, align="edge"), + dict(kind="barh", stacked=False, align="edge"), + dict(kind="barh", stacked=False, width=0.9, align="edge"), + dict(kind="bar", subplots=True, align="edge"), + dict(kind="bar", subplots=True, width=0.9, align="edge"), + dict(kind="barh", subplots=True, align="edge"), + dict(kind="barh", subplots=True, width=0.9, align="edge"), + ], + ) + def test_bar_align_multiple_columns(self, kwargs): # GH2157 df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) - - @pytest.mark.slow - def test_bar_center(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + self._check_bar_alignment(df, **kwargs) @pytest.mark.slow - def test_bar_align_single_column(self): + @pytest.mark.parametrize( + "kwargs", + [ + dict(kind="bar", stacked=False), + dict(kind="bar", stacked=True), + dict(kind="barh", stacked=False), + dict(kind="barh", stacked=True), + dict(kind="bar", subplots=True), + dict(kind="barh", subplots=True), + ], + ) + def test_bar_align_single_column(self, kwargs): df = DataFrame(np.random.randn(5)) - self._check_bar_alignment(df, kind="bar", stacked=False) - self._check_bar_alignment(df, kind="bar", stacked=True) - self._check_bar_alignment(df, kind="barh", stacked=False) - self._check_bar_alignment(df, kind="barh", stacked=True) - self._check_bar_alignment(df, kind="bar", subplots=True) - self._check_bar_alignment(df, kind="barh", subplots=True) - - @pytest.mark.slow - def test_bar_edge(self): - df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) - - self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") - self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, align="edge" - ) - - self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, align="edge" - ) - self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, align="edge" - ) + self._check_bar_alignment(df, **kwargs) @pytest.mark.slow - def test_bar_barwidth_position(self): + @pytest.mark.parametrize( + "kwargs", + [ + {"kind": "bar", "stacked": False}, + {"kind": "bar", "stacked": True}, + {"kind": "barh", "stacked": False}, + {"kind": "barh", "stacked": True}, + {"kind": "bar", "subplots": True}, + {"kind": "barh", "subplots": True}, + ], + ) + def test_bar_barwidth_position(self, kwargs): df = DataFrame(np.random.randn(5, 5)) - self._check_bar_alignment( - df, kind="bar", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) - self._check_bar_alignment( - df, kind="barh", stacked=False, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", stacked=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="bar", subplots=True, width=0.9, position=0.2 - ) - self._check_bar_alignment( - df, kind="barh", subplots=True, width=0.9, position=0.2 - ) + self._check_bar_alignment(df, width=0.9, position=0.2, **kwargs) @pytest.mark.slow def test_bar_barwidth_position_int(self): From 170683cbf1bd22ad097b8f97046271e667f6f8ca Mon Sep 17 00:00:00 2001 From: Mikhaylov-yv Date: Wed, 11 Nov 2020 13:46:46 +0300 Subject: [PATCH 147/147] black fix --- pandas/tests/plotting/frame/test_frame_subplots.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 58f5d89ad15ca..413c5b8a87dc7 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -233,7 +233,7 @@ def test_subplots_layout_multi_column(self): ], ) def test_subplots_layout_single_column( - self, kwargs, expected_axes_num, expected_layout, expected_shape + self, kwargs, expected_axes_num, expected_layout, expected_shape ): # GH 6667