Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: de-kludge Block.quantile #27627

Merged
merged 2 commits into from
Jul 31, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 14 additions & 25 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1526,18 +1526,7 @@ def quantile(self, qs, interpolation="linear", axis=0):
# We should always have ndim == 2 becase Series dispatches to DataFrame
assert self.ndim == 2

if self.is_datetimetz:
# TODO: cleanup this special case.
# We need to operate on i8 values for datetimetz
# but `Block.get_values()` returns an ndarray of objects
# right now. We need an API for "values to do numeric-like ops on"
values = self.values.view("M8[ns]")

# TODO: NonConsolidatableMixin shape
# Usual shape inconsistencies for ExtensionBlocks
values = values[None, :]
else:
values = self.get_values()
values = self.get_values()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

grr I really dis-like strongly get_values()


is_empty = values.shape[axis] == 0
orig_scalar = not is_list_like(qs)
Expand Down Expand Up @@ -1576,7 +1565,6 @@ def quantile(self, qs, interpolation="linear", axis=0):
result = lib.item_from_zerodim(result)

ndim = getattr(result, "ndim", None) or 0
result = self._try_coerce_result(result)
return make_block(result, placement=np.arange(len(result)), ndim=ndim)

def _replace_coerce(
Expand Down Expand Up @@ -2477,21 +2465,9 @@ def _try_coerce_result(self, result):
result = self._holder._from_sequence(
result.astype(np.int64), freq=None, dtype=self.values.dtype
)
elif result.dtype == "M8[ns]":
# otherwise we get here via quantile and already have M8[ns]
result = self._holder._simple_new(
result, freq=None, dtype=self.values.dtype
)

elif isinstance(result, np.datetime64):
# also for post-quantile
result = self._box_func(result)
return result

@property
def _box_func(self):
return lambda x: tslibs.Timestamp(x, tz=self.dtype.tz)

def diff(self, n, axis=0):
"""1st discrete difference

Expand Down Expand Up @@ -2564,6 +2540,19 @@ def equals(self, other):
return False
return (self.values.view("i8") == other.values.view("i8")).all()

def quantile(self, qs, interpolation="linear", axis=0):
naive = self.values.view("M8[ns]")

# kludge for 2D block with 1D values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"kludge" - am I to assume that you're planning on cleaning this up or splitting more cleanly in the future? One-liners like these don't seem "super-kludgy" IMO

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, once we allow 2D EAs this entire method will be unnecessary.

naive = naive.reshape(self.shape)

blk = self.make_block(naive)
res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis)

# ravel is kludge for 2D block with 1D values, assumes column-like
aware = self._holder(res_blk.values.ravel(), dtype=self.dtype)
return self.make_block_same_class(aware, ndim=res_blk.ndim)


class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
__slots__ = ()
Expand Down