Skip to content

Commit

Permalink
REF: simplify extension reduction tests (pandas-dev#54394)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Aug 4, 2023
1 parent 23b3ac8 commit 7bc2000
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 104 deletions.
1 change: 1 addition & 0 deletions pandas/tests/extension/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class TestMyDtype(BaseDtypeTests):
BaseBooleanReduceTests,
BaseNoReduceTests,
BaseNumericReduceTests,
BaseReduceTests,
)
from pandas.tests.extension.base.reshaping import BaseReshapingTests # noqa: F401
from pandas.tests.extension.base.setitem import BaseSetitemTests # noqa: F401
83 changes: 49 additions & 34 deletions pandas/tests/extension/base/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class BaseReduceTests(BaseExtensionTests):
make sense for numeric/boolean operations.
"""

def _supports_reduction(self, obj, op_name: str) -> bool:
# Specify if we expect this reduction to succeed.
return False

def check_reduce(self, s, op_name, skipna):
# We perform the same operation on the np.float64 data and check
# that the results match. Override if you need to cast to something
Expand Down Expand Up @@ -66,47 +70,42 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):

tm.assert_extension_array_equal(result1, expected)


class BaseNoReduceTests(BaseReduceTests):
"""we don't define any reductions"""

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
s = pd.Series(data)

msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)

with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
op_name = all_boolean_reductions
s = pd.Series(data)

msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)
if not self._supports_reduction(s, op_name):
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)

with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)
with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)

else:
self.check_reduce(s, op_name, skipna)

class BaseNumericReduceTests(BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_numeric_reductions, skipna):
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
s = pd.Series(data)

# min/max with empty produce numpy warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
self.check_reduce(s, op_name, skipna)
if not self._supports_reduction(s, op_name):
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)

with pytest.raises(TypeError, match=msg):
getattr(s, op_name)(skipna=skipna)

else:
# min/max with empty produce numpy warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
self.check_reduce(s, op_name, skipna)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
Expand All @@ -118,12 +117,28 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna):
if op_name in ["count", "kurt", "sem"]:
pytest.skip(f"{op_name} not an array method")

if not self._supports_reduction(s, op_name):
pytest.skip(f"Reduction {op_name} not supported for this dtype")

self.check_reduce_frame(s, op_name, skipna)


# TODO: deprecate BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests
class BaseNoReduceTests(BaseReduceTests):
"""we don't define any reductions"""


class BaseNumericReduceTests(BaseReduceTests):
# For backward compatibility only, this only runs the numeric reductions
def _supports_reduction(self, obj, op_name: str) -> bool:
if op_name in ["any", "all"]:
pytest.skip("These are tested in BaseBooleanReduceTests")
return True


class BaseBooleanReduceTests(BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_boolean_reductions, skipna):
op_name = all_boolean_reductions
s = pd.Series(data)
self.check_reduce(s, op_name, skipna)
# For backward compatibility only, this only runs the numeric reductions
def _supports_reduction(self, obj, op_name: str) -> bool:
if op_name not in ["any", "all"]:
pytest.skip("These are tested in BaseNumericReduceTests")
return True
9 changes: 4 additions & 5 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ def test_fillna_series_method(self, data_missing, fillna_method):


class Reduce:
def _supports_reduction(self, obj, op_name: str) -> bool:
return True

def check_reduce(self, s, op_name, skipna):
if op_name in ["median", "skew", "kurt", "sem"]:
msg = r"decimal does not support the .* operation"
Expand Down Expand Up @@ -185,7 +188,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
tm.assert_series_equal(result, expected)


class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
class TestReduce(Reduce, base.BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
Expand All @@ -196,10 +199,6 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna):
return super().test_reduce_frame(data, all_numeric_reductions, skipna)


class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
pass


class TestMethods(base.BaseMethodsTests):
def test_fillna_copy_frame(self, data_missing, using_copy_on_write):
warn = FutureWarning if not using_copy_on_write else None
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def test_fillna_frame(self):
unhashable = pytest.mark.xfail(reason="Unhashable")


class TestReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
pass


Expand Down
77 changes: 39 additions & 38 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,10 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques
self.check_accumulate(ser, op_name, skipna)


class TestBaseNumericReduce(base.BaseNumericReduceTests):
class TestReduce(base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
return True

def check_reduce(self, ser, op_name, skipna):
pa_dtype = ser.dtype.pyarrow_dtype
if op_name == "count":
Expand All @@ -437,7 +440,7 @@ def check_reduce(self, ser, op_name, skipna):
tm.assert_almost_equal(result, expected)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request):
pa_dtype = data.dtype.pyarrow_dtype
opname = all_numeric_reductions

Expand Down Expand Up @@ -505,44 +508,10 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request):
"median",
}:
request.node.add_marker(xfail_mark)
super().test_reduce_series(data, all_numeric_reductions, skipna)

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
if op_name not in ["median", "var", "std"]:
cmp_dtype = arr.dtype
else:
cmp_dtype = "float64[pyarrow]"
elif op_name in ["median", "var", "std", "mean", "skew"]:
cmp_dtype = "float64[pyarrow]"
else:
cmp_dtype = {
"i": "int64[pyarrow]",
"u": "uint64[pyarrow]",
"f": "float64[pyarrow]",
}[arr.dtype.kind]
return cmp_dtype
super().test_reduce_series_numeric(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name == "skew":
assert not hasattr(data, op_name)
return
return super().test_reduce_frame(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
def test_median_not_approximate(self, typ):
# GH 52679
result = pd.Series([1, 2], dtype=f"{typ}[pyarrow]").median()
assert result == 1.5


class TestBaseBooleanReduce(base.BaseBooleanReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(
def test_reduce_series_boolean(
self, data, all_boolean_reductions, skipna, na_value, request
):
pa_dtype = data.dtype.pyarrow_dtype
Expand Down Expand Up @@ -574,6 +543,38 @@ def test_reduce_series(
result = getattr(ser, op_name)(skipna=skipna)
assert result is (op_name == "any")

def _get_expected_reduction_dtype(self, arr, op_name: str):
if op_name in ["max", "min"]:
cmp_dtype = arr.dtype
elif arr.dtype.name == "decimal128(7, 3)[pyarrow]":
if op_name not in ["median", "var", "std"]:
cmp_dtype = arr.dtype
else:
cmp_dtype = "float64[pyarrow]"
elif op_name in ["median", "var", "std", "mean", "skew"]:
cmp_dtype = "float64[pyarrow]"
else:
cmp_dtype = {
"i": "int64[pyarrow]",
"u": "uint64[pyarrow]",
"f": "float64[pyarrow]",
}[arr.dtype.kind]
return cmp_dtype

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
if op_name == "skew":
assert not hasattr(data, op_name)
return
return super().test_reduce_frame(data, all_numeric_reductions, skipna)

@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])
def test_median_not_approximate(self, typ):
# GH 52679
result = pd.Series([1, 2], dtype=f"{typ}[pyarrow]").median()
assert result == 1.5


class TestBaseGroupby(base.BaseGroupbyTests):
def test_in_numeric_groupby(self, data_for_grouping):
Expand Down
9 changes: 4 additions & 5 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,10 @@ def test_groupby_sum_mincount(self, data_for_grouping, min_count):
tm.assert_frame_equal(result, expected)


class TestNumericReduce(base.BaseNumericReduceTests):
class TestReduce(base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
return True

def check_reduce(self, s, op_name, skipna):
if op_name == "count":
result = getattr(s, op_name)()
Expand Down Expand Up @@ -236,10 +239,6 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
return cmp_dtype


class TestBooleanReduce(base.BaseBooleanReduceTests):
pass


class TestPrinting(base.BasePrintingTests):
pass

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class TestMissing(base.BaseMissingTests):
pass


class TestReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
pass


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests):
pass


class TestReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/extension/test_masked_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,12 @@ class TestGroupby(base.BaseGroupbyTests):
pass


class TestNumericReduce(base.BaseNumericReduceTests):
class TestReduce(base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
if op_name in ["any", "all"]:
pytest.skip(reason="Tested in tests/reductions/test_reductions.py")
return True

def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
# overwrite to ensure pd.NA is tested instead of np.nan
# https://github.com/pandas-dev/pandas/issues/30958
Expand Down Expand Up @@ -265,11 +270,6 @@ def _get_expected_reduction_dtype(self, arr, op_name: str):
return cmp_dtype


@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py")
class TestBooleanReduce(base.BaseBooleanReduceTests):
pass


class TestAccumulation(base.BaseAccumulateTests):
def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool:
return True
Expand Down
26 changes: 14 additions & 12 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,27 +306,29 @@ class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
pass


class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests):
class TestReduce(BaseNumPyTests, base.BaseReduceTests):
def _supports_reduction(self, obj, op_name: str) -> bool:
if tm.get_dtype(obj).kind == "O":
return op_name in ["sum", "min", "max", "any", "all"]
return True

def check_reduce(self, s, op_name, skipna):
result = getattr(s, op_name)(skipna=skipna)
res_op = getattr(s, op_name)
# avoid coercing int -> float. Just cast to the actual numpy type.
expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna)
exp_op = getattr(s.astype(s.dtype._dtype), op_name)
if op_name == "count":
result = res_op()
expected = exp_op()
else:
result = res_op(skipna=skipna)
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)

@pytest.mark.skip("tests not written yet")
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
pass

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series(self, data, all_boolean_reductions, skipna):
super().test_reduce_series(data, all_boolean_reductions, skipna)


@skip_nested
class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests):
pass


class TestMissing(BaseNumPyTests, base.BaseMissingTests):
@skip_nested
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_fillna_no_op_returns_copy(self, data):
tm.assert_extension_array_equal(result, data)


class TestNoReduce(base.BaseNoReduceTests):
class TestReduce(base.BaseReduceTests):
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
Expand Down

0 comments on commit 7bc2000

Please sign in to comment.