Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: standardize values coercion in Blocks #37009

Merged
merged 1 commit into from
Oct 9, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,28 @@ def _simple_new(
def __init__(self, values, placement, ndim=None):
self.ndim = self._check_ndim(values, ndim)
self.mgr_locs = placement
self.values = values
self.values = self._maybe_coerce_values(values)

if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
raise ValueError(
f"Wrong number of items passed {len(self.values)}, "
f"placement implies {len(self.mgr_locs)}"
)

def _maybe_coerce_values(self, values):
"""
Ensure we have correctly-typed values.
Parameters
----------
values : np.ndarray, ExtensionArray, Index
Returns
-------
np.ndarray or ExtensionArray
"""
return values

def _check_ndim(self, values, ndim):
"""
ndim inference and validation.
Expand Down Expand Up @@ -1614,7 +1628,6 @@ def __init__(self, values, placement, ndim=None):
This will call continue to call __init__ for the other base
classes mixed in with this Mixin.
"""
values = self._maybe_coerce_values(values)

# Placement must be converted to BlockPlacement so that we can check
# its length
Expand Down Expand Up @@ -2109,10 +2122,6 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block):
__slots__ = ()
is_datetime = True

def __init__(self, values, placement, ndim=None):
values = self._maybe_coerce_values(values)
super().__init__(values, placement=placement, ndim=ndim)

@property
def _can_hold_na(self):
return True
Expand Down Expand Up @@ -2366,14 +2375,14 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
is_numeric = False
fill_value = np.timedelta64("NaT", "ns")

def __init__(self, values, placement, ndim=None):
def _maybe_coerce_values(self, values):
if values.dtype != TD64NS_DTYPE:
# e.g. non-nano or int64
values = TimedeltaArray._from_sequence(values)._data
if isinstance(values, TimedeltaArray):
values = values._data
assert isinstance(values, np.ndarray), type(values)
super().__init__(values, placement=placement, ndim=ndim)
return values

@property
def _holder(self):
Expand Down Expand Up @@ -2426,11 +2435,10 @@ class ObjectBlock(Block):
is_object = True
_can_hold_na = True

def __init__(self, values, placement=None, ndim=2):
def _maybe_coerce_values(self, values):
if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)

super().__init__(values, ndim=ndim, placement=placement)
return values

@property
def is_bool(self):
Expand Down