Skip to content

Commit

Permalink
API: ExtensionDtype._is_numeric (pandas-dev#22345)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored and jorisvandenbossche committed Aug 20, 2018
1 parent b6e35ff commit 513c02c
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ ExtensionType Changes
- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ def is_signed_integer(self):
def is_unsigned_integer(self):
return self.kind == 'u'

@property
def _is_numeric(self):
return True

@cache_readonly
def numpy_dtype(self):
""" Return an instance of our numpy dtype """
Expand Down
17 changes: 17 additions & 0 deletions pandas/core/dtypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ def is_dtype(cls, dtype):
except TypeError:
return False

@property
def _is_numeric(self):
# type: () -> bool
"""
Whether columns with this dtype should be considered numeric.
By default ExtensionDtypes are assumed to be non-numeric.
They'll be excluded from operations that exclude non-numeric
columns, like (groupby) reductions, plotting, etc.
"""
return False


class ExtensionDtype(_DtypeOpsMixin):
"""A custom data type, to be paired with an ExtensionArray.
Expand All @@ -109,6 +121,11 @@ class ExtensionDtype(_DtypeOpsMixin):
* name
* construct_from_string
The following attributes influence the behavior of the dtype in
pandas operations
* _is_numeric
Optionally one can override construct_array_type for construction
with the name of this dtype via the Registry
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
pass

newb = make_block(values, placement=self.mgr_locs,
klass=klass)
klass=klass, ndim=self.ndim)
except:
if errors == 'raise':
raise
Expand Down Expand Up @@ -1950,6 +1950,10 @@ def is_view(self):
"""Extension arrays are never treated as views."""
return False

@property
def is_numeric(self):
return self.values.dtype._is_numeric

def setitem(self, indexer, value, mgr=None):
"""Set the value inplace, returning a same-typed block.
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/extension/base/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,16 @@ def test_groupby_extension_apply(self, data_for_grouping, op):
df.groupby("B").A.apply(op)
df.groupby("A").apply(op)
df.groupby("A").B.apply(op)

def test_in_numeric_groupby(self, data_for_grouping):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
"B": data_for_grouping,
"C": [1, 1, 1, 1, 1, 1, 1, 1]})
result = df.groupby("A").sum().columns

if data_for_grouping.dtype._is_numeric:
expected = pd.Index(['B', 'C'])
else:
expected = pd.Index(['C'])

tm.assert_index_equal(result, expected)
4 changes: 4 additions & 0 deletions pandas/tests/extension/base/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,7 @@ def test_no_values_attribute(self, data):
# code, disallowing this for now until solved
assert not hasattr(data, 'values')
assert not hasattr(data, '_values')

def test_is_numeric_honored(self, data):
result = pd.Series(data)
assert result._data.blocks[0].is_numeric is data.dtype._is_numeric
4 changes: 4 additions & 0 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def construct_from_string(cls, string):
raise TypeError("Cannot construct a '{}' from "
"'{}'".format(cls, string))

@property
def _is_numeric(self):
return True


class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):

Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/extension/integer/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,22 @@ def test_cross_type_arithmetic():
tm.assert_series_equal(result, expected)


def test_groupby_mean_included():
df = pd.DataFrame({
"A": ['a', 'b', 'b'],
"B": [1, None, 3],
"C": IntegerArray([1, None, 3], dtype='Int64'),
})

result = df.groupby("A").sum()
# TODO(#22346): preserve Int64 dtype
expected = pd.DataFrame({
"B": np.array([1.0, 3.0]),
"C": np.array([1, 3], dtype="int64")
}, index=pd.Index(['a', 'b'], name='A'))
tm.assert_frame_equal(result, expected)


def test_astype_nansafe():
# https://github.com/pandas-dev/pandas/pull/22343
arr = IntegerArray([np.nan, 1, 2], dtype="Int8")
Expand Down
14 changes: 13 additions & 1 deletion pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import numpy as np

from pandas import (DataFrame, Series, Timestamp, date_range, compat,
option_context)
option_context, Categorical)
from pandas.core.arrays import IntegerArray, IntervalArray
from pandas.compat import StringIO
import pandas as pd

Expand Down Expand Up @@ -436,6 +437,17 @@ def test_get_numeric_data(self):
expected = df
assert_frame_equal(result, expected)

def test_get_numeric_data_extension_dtype(self):
# GH 22290
df = DataFrame({
'A': IntegerArray([-10, np.nan, 0, 10, 20, 30], dtype='Int64'),
'B': Categorical(list('abcabc')),
'C': IntegerArray([0, 1, 2, 3, np.nan, 5], dtype='UInt8'),
'D': IntervalArray.from_breaks(range(7))})
result = df._get_numeric_data()
expected = df.loc[:, ['A', 'C']]
assert_frame_equal(result, expected)

def test_convert_objects(self):

oops = self.mixed_frame.T.T
Expand Down

0 comments on commit 513c02c

Please sign in to comment.