diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 45e0d503f30e7..5b0004a395334 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -59,30 +59,6 @@ def test_isin_cats(): tm.assert_numpy_array_equal(expected, result) -@pytest.mark.parametrize( - "to_replace, value, result, expected_error_msg", - [ - ("b", "c", ["a", "c"], "Categorical.categories are different"), - ("c", "d", ["a", "b"], None), - # https://github.com/pandas-dev/pandas/issues/33288 - ("a", "a", ["a", "b"], None), - ("b", None, ["a", None], "Categorical.categories length are different"), - ], -) -def test_replace(to_replace, value, result, expected_error_msg): - # GH 26988 - cat = pd.Categorical(["a", "b"]) - expected = pd.Categorical(result) - result = cat.replace(to_replace, value) - tm.assert_categorical_equal(result, expected) - if to_replace == "b": # the "c" test is supposed to be unchanged - with pytest.raises(AssertionError, match=expected_error_msg): - # ensure non-inplace call does not affect original - tm.assert_categorical_equal(cat, expected) - cat.replace(to_replace, value, inplace=True) - tm.assert_categorical_equal(cat, expected) - - @pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) def test_isin_empty(empty): s = pd.Categorical(["a", "b"]) @@ -105,94 +81,3 @@ def test_diff(): result = df.diff() tm.assert_frame_equal(result, expected) - - -class TestTake: - # https://github.com/pandas-dev/pandas/issues/20664 - - def test_take_default_allow_fill(self): - cat = pd.Categorical(["a", "b"]) - with tm.assert_produces_warning(None): - result = cat.take([0, -1]) - - assert result.equals(cat) - - def test_take_positive_no_warning(self): - cat = pd.Categorical(["a", "b"]) - with tm.assert_produces_warning(None): - cat.take([0, 0]) - - def test_take_bounds(self, allow_fill): - # https://github.com/pandas-dev/pandas/issues/20664 - cat = pd.Categorical(["a", "b", "a"]) - if allow_fill: - msg = "indices are out-of-bounds" - else: - msg = "index 4 is out of bounds for( axis 0 with)? size 3" - with pytest.raises(IndexError, match=msg): - cat.take([4, 5], allow_fill=allow_fill) - - def test_take_empty(self, allow_fill): - # https://github.com/pandas-dev/pandas/issues/20664 - cat = pd.Categorical([], categories=["a", "b"]) - if allow_fill: - msg = "indices are out-of-bounds" - else: - msg = "cannot do a non-empty take from an empty axes" - with pytest.raises(IndexError, match=msg): - cat.take([0], allow_fill=allow_fill) - - def test_positional_take(self, ordered): - cat = pd.Categorical( - ["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered - ) - result = cat.take([0, 1, 2], allow_fill=False) - expected = pd.Categorical( - ["a", "a", "b"], categories=cat.categories, ordered=ordered - ) - tm.assert_categorical_equal(result, expected) - - def test_positional_take_unobserved(self, ordered): - cat = pd.Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) - result = cat.take([1, 0], allow_fill=False) - expected = pd.Categorical( - ["b", "a"], categories=cat.categories, ordered=ordered - ) - tm.assert_categorical_equal(result, expected) - - def test_take_allow_fill(self): - # https://github.com/pandas-dev/pandas/issues/23296 - cat = pd.Categorical(["a", "a", "b"]) - result = cat.take([0, -1, -1], allow_fill=True) - expected = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b"]) - tm.assert_categorical_equal(result, expected) - - def test_take_fill_with_negative_one(self): - # -1 was a category - cat = pd.Categorical([-1, 0, 1]) - result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) - expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1]) - tm.assert_categorical_equal(result, expected) - - def test_take_fill_value(self): - # https://github.com/pandas-dev/pandas/issues/23296 - cat = pd.Categorical(["a", "b", "c"]) - result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) - expected = pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]) - tm.assert_categorical_equal(result, expected) - - def test_take_fill_value_new_raises(self): - # https://github.com/pandas-dev/pandas/issues/23296 - cat = pd.Categorical(["a", "b", "c"]) - xpr = r"'fill_value=d' is not present in this Categorical's categories" - with pytest.raises(ValueError, match=xpr): - cat.take([0, 1, -1], fill_value="d", allow_fill=True) - - def test_take_nd_deprecated(self): - cat = pd.Categorical(["a", "b", "c"]) - with tm.assert_produces_warning(FutureWarning): - cat.take_nd([0, 1]) - - ci = pd.Index(cat) - with tm.assert_produces_warning(FutureWarning): - ci.take_nd([0, 1]) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 4bf9b4b40d0b6..98dcdd1692117 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -359,10 +359,3 @@ def test_validate_inplace_raises(self, value): with pytest.raises(ValueError, match=msg): cat.sort_values(inplace=value) - - def test_isna(self): - exp = np.array([False, False, True]) - c = Categorical(["a", "b", np.nan]) - res = c.isna() - - tm.assert_numpy_array_equal(res, exp) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index bf0b5289b5df1..6068166cb8618 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -1,7 +1,17 @@ import numpy as np import pytest -from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Interval, + IntervalIndex, + PeriodIndex, + Series, + Timedelta, + Timestamp, +) import pandas._testing as tm import pandas.core.common as com from pandas.tests.arrays.categorical.common import TestCategorical @@ -256,6 +266,55 @@ def test_where_ordered_differs_rasies(self): ser.where([True, False, True], other) +class TestContains: + def test_contains(self): + # GH#21508 + c = Categorical(list("aabbca"), categories=list("cab")) + + assert "b" in c + assert "z" not in c + assert np.nan not in c + with pytest.raises(TypeError, match="unhashable type: 'list'"): + assert [1] in c + + # assert codes NOT in index + assert 0 not in c + assert 1 not in c + + c = Categorical(list("aabbca") + [np.nan], categories=list("cab")) + assert np.nan in c + + @pytest.mark.parametrize( + "item, expected", + [ + (Interval(0, 1), True), + (1.5, True), + (Interval(0.5, 1.5), False), + ("a", False), + (Timestamp(1), False), + (Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH#23705 + cat = Categorical(IntervalIndex.from_breaks(range(3))) + result = item in cat + assert result is expected + + def test_contains_list(self): + # GH#21729 + cat = Categorical([1, 2, 3]) + + assert "a" not in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in cat + + @pytest.mark.parametrize("index", [True, False]) def test_mask_with_boolean(index): s = Series(range(3)) diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 364c290edc46c..cb0ba128c1fb7 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -11,6 +11,13 @@ class TestCategoricalMissing: + def test_isna(self): + exp = np.array([False, False, True]) + cat = Categorical(["a", "b", np.nan]) + res = cat.isna() + + tm.assert_numpy_array_equal(res, exp) + def test_na_flags_int_categories(self): # #1457 diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 51dc66c18a3e6..328b5771e617c 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -395,50 +395,3 @@ def test_numeric_like_ops(self): msg = "Object with dtype category cannot perform the numpy op log" with pytest.raises(TypeError, match=msg): np.log(s) - - def test_contains(self): - # GH21508 - c = Categorical(list("aabbca"), categories=list("cab")) - - assert "b" in c - assert "z" not in c - assert np.nan not in c - with pytest.raises(TypeError, match="unhashable type: 'list'"): - assert [1] in c - - # assert codes NOT in index - assert 0 not in c - assert 1 not in c - - c = Categorical(list("aabbca") + [np.nan], categories=list("cab")) - assert np.nan in c - - @pytest.mark.parametrize( - "item, expected", - [ - (pd.Interval(0, 1), True), - (1.5, True), - (pd.Interval(0.5, 1.5), False), - ("a", False), - (pd.Timestamp(1), False), - (pd.Timedelta(1), False), - ], - ids=str, - ) - def test_contains_interval(self, item, expected): - # GH 23705 - cat = Categorical(pd.IntervalIndex.from_breaks(range(3))) - result = item in cat - assert result is expected - - def test_contains_list(self): - # GH#21729 - cat = Categorical([1, 2, 3]) - - assert "a" not in cat - - with pytest.raises(TypeError, match="unhashable type"): - ["a"] in cat - - with pytest.raises(TypeError, match="unhashable type"): - ["a", "b"] in cat diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py index 8b784fde1d3c5..5889195ad68db 100644 --- a/pandas/tests/arrays/categorical/test_replace.py +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -2,6 +2,7 @@ import pytest import pandas as pd +from pandas import Categorical import pandas._testing as tm @@ -45,3 +46,28 @@ def test_replace(to_replace, value, expected, flip_categories): tm.assert_series_equal(expected, result, check_category_order=False) tm.assert_series_equal(expected, s, check_category_order=False) + + +@pytest.mark.parametrize( + "to_replace, value, result, expected_error_msg", + [ + ("b", "c", ["a", "c"], "Categorical.categories are different"), + ("c", "d", ["a", "b"], None), + # https://github.com/pandas-dev/pandas/issues/33288 + ("a", "a", ["a", "b"], None), + ("b", None, ["a", None], "Categorical.categories length are different"), + ], +) +def test_replace2(to_replace, value, result, expected_error_msg): + # TODO: better name + # GH#26988 + cat = Categorical(["a", "b"]) + expected = Categorical(result) + result = cat.replace(to_replace, value) + tm.assert_categorical_equal(result, expected) + if to_replace == "b": # the "c" test is supposed to be unchanged + with pytest.raises(AssertionError, match=expected_error_msg): + # ensure non-inplace call does not affect original + tm.assert_categorical_equal(cat, expected) + cat.replace(to_replace, value, inplace=True) + tm.assert_categorical_equal(cat, expected) diff --git a/pandas/tests/arrays/categorical/test_take.py b/pandas/tests/arrays/categorical/test_take.py new file mode 100644 index 0000000000000..7a27f5c3e73ad --- /dev/null +++ b/pandas/tests/arrays/categorical/test_take.py @@ -0,0 +1,92 @@ +import numpy as np +import pytest + +from pandas import Categorical, Index +import pandas._testing as tm + + +class TestTake: + # https://github.com/pandas-dev/pandas/issues/20664 + + def test_take_default_allow_fill(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + result = cat.take([0, -1]) + + assert result.equals(cat) + + def test_take_positive_no_warning(self): + cat = Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + cat.take([0, 0]) + + def test_take_bounds(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical(["a", "b", "a"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "index 4 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + cat.take([4, 5], allow_fill=allow_fill) + + def test_take_empty(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = Categorical([], categories=["a", "b"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "cannot do a non-empty take from an empty axes" + with pytest.raises(IndexError, match=msg): + cat.take([0], allow_fill=allow_fill) + + def test_positional_take(self, ordered): + cat = Categorical(["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered) + result = cat.take([0, 1, 2], allow_fill=False) + expected = Categorical( + ["a", "a", "b"], categories=cat.categories, ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + + def test_positional_take_unobserved(self, ordered): + cat = Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered) + result = cat.take([1, 0], allow_fill=False) + expected = Categorical(["b", "a"], categories=cat.categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_take_allow_fill(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "a", "b"]) + result = cat.take([0, -1, -1], allow_fill=True) + expected = Categorical(["a", np.nan, np.nan], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_with_negative_one(self): + # -1 was a category + cat = Categorical([-1, 0, 1]) + result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) + expected = Categorical([-1, -1, 0], categories=[-1, 0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) + expected = Categorical(["a", "b", "a"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value_new_raises(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = Categorical(["a", "b", "c"]) + xpr = r"'fill_value=d' is not present in this Categorical's categories" + with pytest.raises(ValueError, match=xpr): + cat.take([0, 1, -1], fill_value="d", allow_fill=True) + + def test_take_nd_deprecated(self): + cat = Categorical(["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning): + cat.take_nd([0, 1]) + + ci = Index(cat) + with tm.assert_produces_warning(FutureWarning): + ci.take_nd([0, 1])