diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index d3c907f4ce30f..6137cadc93125 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -352,7 +352,7 @@ def test_assigning_ops(self): df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp) - def test_setitem_single_row_categorical(self): + def test_loc_setitem_single_row_categorical(self): # GH 25495 df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index d9cdfa5ea45ec..079cc12389835 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -1,7 +1,16 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, period_range +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + DataFrame, + MultiIndex, + Timestamp, + get_dummies, + period_range, +) import pandas._testing as tm @@ -29,3 +38,99 @@ def test_getitem_periodindex(self): ts = df["1/1/2000"] tm.assert_series_equal(ts, df.iloc[:, 0]) + + def test_getitem_list_of_labels_categoricalindex_cols(self): + # GH#16115 + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + expected = DataFrame( + [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats + ) + dummies = get_dummies(cats) + result = dummies[list(dummies.columns)] + tm.assert_frame_equal(result, expected) + + +class TestGetitemCallable: + def test_getitem_callable(self, float_frame): + # GH#12533 + result = float_frame[lambda x: "A"] + expected = float_frame.loc[:, "A"] + tm.assert_series_equal(result, expected) + + result = float_frame[lambda x: ["A", "B"]] + expected = float_frame.loc[:, ["A", "B"]] + tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) + + df = float_frame[:3] + result = df[lambda x: [True, False, True]] + expected = float_frame.iloc[[0, 2], :] + tm.assert_frame_equal(result, expected) + + +class TestGetitemBooleanMask: + def test_getitem_bool_mask_categorical_index(self): + + df3 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex( + [1, 1, 2, 1, 3, 2], + dtype=CategoricalDtype([3, 2, 1], ordered=True), + name="B", + ), + ) + df4 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + }, + index=CategoricalIndex( + [1, 1, 2, 1, 3, 2], + dtype=CategoricalDtype([3, 2, 1], ordered=False), + name="B", + ), + ) + + result = df3[df3.index == "a"] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == "a"] + expected = df4.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index == 1] + expected = df3.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == 1] + expected = df4.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # since we have an ordered categorical + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=True, + # name='B') + result = df3[df3.index < 2] + expected = df3.iloc[[4]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index > 1] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + # unordered + # cannot be compared + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=False, + # name='B') + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): + df4[df4.index < 2] + with pytest.raises(TypeError, match=msg): + df4[df4.index > 1] diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4214ac14cba49..ff9646d45c0ac 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -110,21 +110,6 @@ def test_getitem_listlike(self, idx_type, levels, float_frame): with pytest.raises(KeyError, match="not in index"): frame[idx] - def test_getitem_callable(self, float_frame): - # GH 12533 - result = float_frame[lambda x: "A"] - expected = float_frame.loc[:, "A"] - tm.assert_series_equal(result, expected) - - result = float_frame[lambda x: ["A", "B"]] - expected = float_frame.loc[:, ["A", "B"]] - tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) - - df = float_frame[:3] - result = df[lambda x: [True, False, True]] - expected = float_frame.iloc[[0, 2], :] - tm.assert_frame_equal(result, expected) - def test_setitem_list(self, float_frame): float_frame["E"] = "foo" diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 2e06d8c73d7d1..46299fadf7789 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame +from pandas import DataFrame, Series import pandas._testing as tm @@ -39,3 +39,72 @@ def test_at_with_duplicate_axes_requires_scalar_lookup(self): df.at[1, ["A"]] = 1 with pytest.raises(ValueError, match=msg): df.at[:, "A"] = 1 + + +class TestAtErrors: + # TODO: De-duplicate/parametrize + # test_at_series_raises_key_error, test_at_frame_raises_key_error, + # test_at_series_raises_key_error2, test_at_frame_raises_key_error2 + + def test_at_series_raises_key_error(self): + # GH#31724 .at should match .loc + + ser = Series([1, 2, 3], index=[3, 2, 1]) + result = ser.at[1] + assert result == 3 + result = ser.loc[1] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + ser.at["a"] + with pytest.raises(KeyError, match="a"): + # .at should match .loc + ser.loc["a"] + + def test_at_frame_raises_key_error(self): + # GH#31724 .at should match .loc + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + + result = df.at[1, 0] + assert result == 3 + result = df.loc[1, 0] + assert result == 3 + + with pytest.raises(KeyError, match="a"): + df.at["a", 0] + with pytest.raises(KeyError, match="a"): + df.loc["a", 0] + + with pytest.raises(KeyError, match="a"): + df.at[1, "a"] + with pytest.raises(KeyError, match="a"): + df.loc[1, "a"] + + def test_at_series_raises_key_error2(self): + # at should not fallback + # GH#7814 + # GH#31724 .at should match .loc + ser = Series([1, 2, 3], index=list("abc")) + result = ser.at["a"] + assert result == 1 + result = ser.loc["a"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + ser.at[0] + with pytest.raises(KeyError, match="^0$"): + ser.loc[0] + + def test_at_frame_raises_key_error2(self): + # GH#31724 .at should match .loc + df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) + result = df.at["a", "A"] + assert result == 1 + result = df.loc["a", "A"] + assert result == 1 + + with pytest.raises(KeyError, match="^0$"): + df.at["a", 0] + with pytest.raises(KeyError, match="^0$"): + df.loc["a", 0] diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 6cdd73d37aec8..9885765bf53e4 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -2,7 +2,6 @@ import pytest from pandas.core.dtypes.common import is_categorical_dtype -from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd from pandas import ( @@ -276,27 +275,6 @@ def test_slicing_doc_examples(self): ) tm.assert_frame_equal(result, expected) - def test_getitem_category_type(self): - # GH 14580 - # test iloc() on Series with Categorical data - - s = Series([1, 2, 3]).astype("category") - - # get slice - result = s.iloc[0:2] - expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) - tm.assert_series_equal(result, expected) - - # get list of indexes - result = s.iloc[[0, 1]] - expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) - tm.assert_series_equal(result, expected) - - # get boolean array - result = s.iloc[[True, False, False]] - expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) - tm.assert_series_equal(result, expected) - def test_loc_listlike(self): # list of labels @@ -413,17 +391,6 @@ def test_loc_listlike_dtypes(self): with pytest.raises(KeyError, match=msg): df.loc[["a", "x"]] - def test_getitem_with_listlike(self): - # GH 16115 - cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) - - expected = DataFrame( - [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats - ) - dummies = pd.get_dummies(cats) - result = dummies[list(dummies.columns)] - tm.assert_frame_equal(result, expected) - def test_ix_categorical_index(self): # GH 12531 df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ")) @@ -512,68 +479,6 @@ def test_loc_and_at_with_categorical_index(self): assert df.loc["B", 1] == 4 assert df.at["B", 1] == 4 - def test_getitem_bool_mask_categorical_index(self): - - df3 = DataFrame( - { - "A": np.arange(6, dtype="int64"), - }, - index=CategoricalIndex( - [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=True), name="B" - ), - ) - df4 = DataFrame( - { - "A": np.arange(6, dtype="int64"), - }, - index=CategoricalIndex( - [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=False), name="B" - ), - ) - - result = df3[df3.index == "a"] - expected = df3.iloc[[]] - tm.assert_frame_equal(result, expected) - - result = df4[df4.index == "a"] - expected = df4.iloc[[]] - tm.assert_frame_equal(result, expected) - - result = df3[df3.index == 1] - expected = df3.iloc[[0, 1, 3]] - tm.assert_frame_equal(result, expected) - - result = df4[df4.index == 1] - expected = df4.iloc[[0, 1, 3]] - tm.assert_frame_equal(result, expected) - - # since we have an ordered categorical - - # CategoricalIndex([1, 1, 2, 1, 3, 2], - # categories=[3, 2, 1], - # ordered=True, - # name='B') - result = df3[df3.index < 2] - expected = df3.iloc[[4]] - tm.assert_frame_equal(result, expected) - - result = df3[df3.index > 1] - expected = df3.iloc[[]] - tm.assert_frame_equal(result, expected) - - # unordered - # cannot be compared - - # CategoricalIndex([1, 1, 2, 1, 3, 2], - # categories=[3, 2, 1], - # ordered=False, - # name='B') - msg = "Unordered Categoricals can only compare equality or not" - with pytest.raises(TypeError, match=msg): - df4[df4.index < 2] - with pytest.raises(TypeError, match=msg): - df4[df4.index > 1] - def test_indexing_with_category(self): # https://github.com/pandas-dev/pandas/issues/12564 diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index fad3478499929..e7bf186ae6456 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -7,33 +7,6 @@ class TestDatetimeIndex: - def test_setitem_with_datetime_tz(self): - # 16889 - # support .loc with alignment and tz-aware DatetimeIndex - mask = np.array([True, False, True, False]) - - idx = date_range("20010101", periods=4, tz="UTC") - df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") - - result = df.copy() - result.loc[mask, :] = df.loc[mask, :] - tm.assert_frame_equal(result, df) - - result = df.copy() - result.loc[mask] = df.loc[mask] - tm.assert_frame_equal(result, df) - - idx = date_range("20010101", periods=4) - df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") - - result = df.copy() - result.loc[mask, :] = df.loc[mask, :] - tm.assert_frame_equal(result, df) - - result = df.copy() - result.loc[mask] = df.loc[mask] - tm.assert_frame_equal(result, df) - def test_indexing_with_datetime_tz(self): # GH#8260 @@ -187,22 +160,6 @@ def test_indexing_with_datetimeindex_tz(self): expected = Series([0, 5], index=index) tm.assert_series_equal(result, expected) - def test_partial_setting_with_datetimelike_dtype(self): - - # GH9478 - # a datetimeindex alignment issue with partial setting - df = DataFrame( - np.arange(6.0).reshape(3, 2), - columns=list("AB"), - index=date_range("1/1/2000", periods=3, freq="1H"), - ) - expected = df.copy() - expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] - - mask = df.A < 1 - df.loc[mask, "C"] = df.loc[mask].index - tm.assert_frame_equal(df, expected) - def test_series_partial_set_datetime(self): # GH 11497 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6c80354610a78..f8dfda3dab486 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -7,7 +7,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, concat, date_range, isna +from pandas import CategoricalDtype, DataFrame, Series, concat, date_range, isna import pandas._testing as tm from pandas.api.types import is_scalar from pandas.core.indexing import IndexingError @@ -748,6 +748,27 @@ def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): tm.assert_series_equal(result, expected) + def test_iloc_getitem_categorical_values(self): + # GH#14580 + # test iloc() on Series with Categorical data + + ser = Series([1, 2, 3]).astype("category") + + # get slice + result = ser.iloc[0:2] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get list of indexes + result = ser.iloc[[0, 1]] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get boolean array + result = ser.iloc[[True, False, False]] + expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 0faa784634fd2..6939b280a988b 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1501,6 +1501,62 @@ def test_loc_getitem_slice_floats_inexact(self): s1 = df.loc[52195.1:52198.9] assert len(s1) == 3 + def test_loc_getitem_float_slice_float64index(self): + ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) + + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + idx = np.arange(10, 20, dtype=float) + idx[2] = 12.2 + ser.index = idx + assert len(ser.loc[12.0:]) == 8 + assert len(ser.loc[12.5:]) == 7 + + +class TestLocBooleanMask: + def test_loc_setitem_mask_with_datetimeindex_tz(self): + # GH#16889 + # support .loc with alignment and tz-aware DatetimeIndex + mask = np.array([True, False, True, False]) + + idx = date_range("20010101", periods=4, tz="UTC") + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + idx = date_range("20010101", periods=4) + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + def test_loc_setitem_mask_and_label_with_datetimeindex(self): + # GH#9478 + # a datetimeindex alignment issue with partial setting + df = DataFrame( + np.arange(6.0).reshape(3, 2), + columns=list("AB"), + index=date_range("1/1/2000", periods=3, freq="1H"), + ) + expected = df.copy() + expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] + + mask = df.A < 1 + df.loc[mask, "C"] = df.loc[mask].index + tm.assert_frame_equal(df, expected) + def test_series_loc_getitem_label_list_missing_values(): # gh-11428 diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index 72296bb222a5a..127d00c217a15 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -146,69 +146,6 @@ def test_frame_at_with_duplicate_axes(self): expected = Series([2.0, 2.0], index=["A", "A"], name=1) tm.assert_series_equal(df.iloc[1], expected) - def test_series_at_raises_type_error(self): - # at should not fallback - # GH 7814 - # GH#31724 .at should match .loc - ser = Series([1, 2, 3], index=list("abc")) - result = ser.at["a"] - assert result == 1 - result = ser.loc["a"] - assert result == 1 - - with pytest.raises(KeyError, match="^0$"): - ser.at[0] - with pytest.raises(KeyError, match="^0$"): - ser.loc[0] - - def test_frame_raises_key_error(self): - # GH#31724 .at should match .loc - df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) - result = df.at["a", "A"] - assert result == 1 - result = df.loc["a", "A"] - assert result == 1 - - with pytest.raises(KeyError, match="^0$"): - df.at["a", 0] - with pytest.raises(KeyError, match="^0$"): - df.loc["a", 0] - - def test_series_at_raises_key_error(self): - # GH#31724 .at should match .loc - - ser = Series([1, 2, 3], index=[3, 2, 1]) - result = ser.at[1] - assert result == 3 - result = ser.loc[1] - assert result == 3 - - with pytest.raises(KeyError, match="a"): - ser.at["a"] - with pytest.raises(KeyError, match="a"): - # .at should match .loc - ser.loc["a"] - - def test_frame_at_raises_key_error(self): - # GH#31724 .at should match .loc - - df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) - - result = df.at[1, 0] - assert result == 3 - result = df.loc[1, 0] - assert result == 3 - - with pytest.raises(KeyError, match="a"): - df.at["a", 0] - with pytest.raises(KeyError, match="a"): - df.loc["a", 0] - - with pytest.raises(KeyError, match="a"): - df.at[1, "a"] - with pytest.raises(KeyError, match="a"): - df.loc[1, "a"] - # TODO: belongs somewhere else? def test_getitem_list_missing_key(self): # GH 13822, incorrect error string with non-unique columns when missing diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index b2fc2e2d0619d..44fb8dc519322 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -57,23 +57,6 @@ def test_fancy_setitem(): assert (s[48:54] == -3).all() -def test_dti_reset_index_round_trip(): - dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) - d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti) - d2 = d1.reset_index() - assert d2.dtypes[0] == np.dtype("M8[ns]") - d3 = d2.set_index("index") - tm.assert_frame_equal(d1, d3, check_names=False) - - # #2329 - stamp = datetime(2012, 11, 22) - df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"]) - df = df.set_index("Date") - - assert df.index[0] == stamp - assert df.reset_index()["Date"][0] == stamp - - def test_slicing_datetimes(): # GH 7523 diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index 86af29eac1bae..2ad21d8221e25 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -86,16 +86,3 @@ def test_slice_float_get_set(datetime_series): datetime_series[4.5:10.0] with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): datetime_series[4.5:10.0] = 0 - - -def test_slice_floats2(): - s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) - - assert len(s.loc[12.0:]) == 8 - assert len(s.loc[12.5:]) == 7 - - i = np.arange(10, 20, dtype=float) - i[2] = 12.2 - s.index = i - assert len(s.loc[12.0:]) == 8 - assert len(s.loc[12.5:]) == 7 diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 8044b590b3463..3cd9d52f8e754 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -18,6 +18,7 @@ Series, Timedelta, Timestamp, + cut, date_range, ) import pandas._testing as tm @@ -76,6 +77,35 @@ def test_astype_dict_like(self, dtype_class): class TestAstype: + @pytest.mark.parametrize("dtype", np.typecodes["All"]) + def test_astype_empty_constructor_equality(self, dtype): + # see GH#15524 + + if dtype not in ( + "S", + "V", # poor support (if any) currently + "M", + "m", # Generic timestamps raise a ValueError. Already tested. + ): + init_empty = Series([], dtype=dtype) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) + + @pytest.mark.parametrize("dtype", [str, np.str_]) + @pytest.mark.parametrize( + "series", + [ + Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), + Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), + ], + ) + def test_astype_str_map(self, dtype, series): + # see GH#4405 + result = series.astype(dtype) + expected = series.map(str) + tm.assert_series_equal(result, expected) + def test_astype_float_to_period(self): result = Series([np.nan]).astype("period[D]") expected = Series([NaT], dtype="period[D]") @@ -309,6 +339,21 @@ def test_astype_unicode(self): class TestAstypeCategorical: + def test_astype_categorical_invalid_conversions(self): + # invalid conversion (these are NOT a dtype) + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.randint(0, 10000, 100)).sort_values() + ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) + + msg = ( + "dtype '' " + "not understood" + ) + with pytest.raises(TypeError, match=msg): + ser.astype(Categorical) + with pytest.raises(TypeError, match=msg): + ser.astype("object").astype(Categorical) + def test_astype_categoricaldtype(self): s = Series(["a", "b", "a"]) result = s.astype(CategoricalDtype(["a", "b"], ordered=True)) diff --git a/pandas/tests/series/methods/test_item.py b/pandas/tests/series/methods/test_item.py new file mode 100644 index 0000000000000..a7ddc0c22dcf4 --- /dev/null +++ b/pandas/tests/series/methods/test_item.py @@ -0,0 +1,49 @@ +import pytest + +from pandas import Series, Timedelta, Timestamp, date_range + + +class TestItem: + def test_item(self): + ser = Series([1]) + result = ser.item() + assert result == 1 + assert result == ser.iloc[0] + assert isinstance(result, int) # i.e. not np.int64 + + ser = Series([0.5], index=[3]) + result = ser.item() + assert isinstance(result, float) + assert result == 0.5 + + ser = Series([1, 2]) + msg = "can only convert an array of size 1" + with pytest.raises(ValueError, match=msg): + ser.item() + + dti = date_range("2016-01-01", periods=2) + with pytest.raises(ValueError, match=msg): + dti.item() + with pytest.raises(ValueError, match=msg): + Series(dti).item() + + val = dti[:1].item() + assert isinstance(val, Timestamp) + val = Series(dti)[:1].item() + assert isinstance(val, Timestamp) + + tdi = dti - dti + with pytest.raises(ValueError, match=msg): + tdi.item() + with pytest.raises(ValueError, match=msg): + Series(tdi).item() + + val = tdi[:1].item() + assert isinstance(val, Timedelta) + val = Series(tdi)[:1].item() + assert isinstance(val, Timedelta) + + # Case where ser[0] would not work + ser = Series(dti, index=[5, 6]) + val = ser[:1].item() + assert val == dti[0] diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 13d6a3b1447a1..40e567a8c33ca 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -1,12 +1,30 @@ +from datetime import datetime + import numpy as np import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series +from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series, date_range import pandas._testing as tm class TestResetIndex: + def test_reset_index_dti_round_trip(self): + dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None) + d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + assert d2.dtypes[0] == np.dtype("M8[ns]") + d3 = d2.set_index("index") + tm.assert_frame_equal(d1, d3, check_names=False) + + # GH#2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"]) + df = df.set_index("Date") + + assert df.index[0] == stamp + assert df.reset_index()["Date"][0] == stamp + def test_reset_index(self): df = tm.makeDataFrame()[:5] ser = df.stack() diff --git a/pandas/tests/series/methods/test_values.py b/pandas/tests/series/methods/test_values.py index e28a714ea656d..2982dcd52991d 100644 --- a/pandas/tests/series/methods/test_values.py +++ b/pandas/tests/series/methods/test_values.py @@ -18,3 +18,8 @@ def test_values_object_extension_dtypes(self, data): result = Series(data).values expected = np.array(data.astype(object)) tm.assert_numpy_array_equal(result, expected) + + def test_values(self, datetime_series): + tm.assert_almost_equal( + datetime_series.values, datetime_series, check_dtype=False + ) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index beace074894a8..ea0e1203e22ed 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, Timedelta, Timestamp, date_range +from pandas import DataFrame, Index, Series, date_range import pandas._testing as tm @@ -112,11 +112,6 @@ def test_not_hashable(self): def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) - def test_values(self, datetime_series): - tm.assert_almost_equal( - datetime_series.values, datetime_series, check_dtype=False - ) - def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" @@ -135,50 +130,6 @@ def test_class_axis(self): # no exception and no empty docstring assert pydoc.getdoc(Series.index) - def test_item(self): - s = Series([1]) - result = s.item() - assert result == 1 - assert result == s.iloc[0] - assert isinstance(result, int) # i.e. not np.int64 - - ser = Series([0.5], index=[3]) - result = ser.item() - assert isinstance(result, float) - assert result == 0.5 - - ser = Series([1, 2]) - msg = "can only convert an array of size 1" - with pytest.raises(ValueError, match=msg): - ser.item() - - dti = pd.date_range("2016-01-01", periods=2) - with pytest.raises(ValueError, match=msg): - dti.item() - with pytest.raises(ValueError, match=msg): - Series(dti).item() - - val = dti[:1].item() - assert isinstance(val, Timestamp) - val = Series(dti)[:1].item() - assert isinstance(val, Timestamp) - - tdi = dti - dti - with pytest.raises(ValueError, match=msg): - tdi.item() - with pytest.raises(ValueError, match=msg): - Series(tdi).item() - - val = tdi[:1].item() - assert isinstance(val, Timedelta) - val = Series(tdi)[:1].item() - assert isinstance(val, Timedelta) - - # Case where ser[0] would not work - ser = Series(dti, index=[5, 6]) - val = ser[:1].item() - assert val == dti[0] - def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 2fbed92567f71..f5c3623fb9986 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -1,5 +1,3 @@ -import string - import numpy as np import pytest @@ -16,20 +14,6 @@ def test_dtype(self, datetime_series): assert datetime_series.dtype == np.dtype("float64") assert datetime_series.dtypes == np.dtype("float64") - @pytest.mark.parametrize("dtype", [str, np.str_]) - @pytest.mark.parametrize( - "series", - [ - Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), - Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), - ], - ) - def test_astype_str_map(self, dtype, series): - # see gh-4405 - result = series.astype(dtype) - expected = series.map(str) - tm.assert_series_equal(result, expected) - def test_astype_from_categorical(self): items = ["a", "b", "c", "a"] s = Series(items) @@ -120,36 +104,6 @@ def cmp(a, b): s.astype("object").astype(CategoricalDtype()), roundtrip_expected ) - def test_invalid_conversions(self): - # invalid conversion (these are NOT a dtype) - cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series(np.random.randint(0, 10000, 100)).sort_values() - ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) - - msg = ( - "dtype '' " - "not understood" - ) - with pytest.raises(TypeError, match=msg): - ser.astype(Categorical) - with pytest.raises(TypeError, match=msg): - ser.astype("object").astype(Categorical) - - @pytest.mark.parametrize("dtype", np.typecodes["All"]) - def test_astype_empty_constructor_equality(self, dtype): - # see gh-15524 - - if dtype not in ( - "S", - "V", # poor support (if any) currently - "M", - "m", # Generic timestamps raise a ValueError. Already tested. - ): - init_empty = Series([], dtype=dtype) - with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): - as_type_empty = Series([]).astype(dtype) - tm.assert_series_equal(init_empty, as_type_empty) - def test_series_to_categorical(self): # see gh-16524: test conversion of Series to Categorical series = Series(["a", "b", "c"])