From d4618d33e75b6ef0dce2aa8c4062c8caf3a82b0f Mon Sep 17 00:00:00 2001 From: Kernc Date: Tue, 6 Jun 2017 20:57:47 +0200 Subject: [PATCH 1/8] BUG: Fix/test SparseSeries/SparseDataFrame stack/unstack --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/reshape/reshape.py | 42 +++++++++++++++++++++++---------- pandas/tests/test_multilevel.py | 23 ++++++++++++++++++ 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 261e12b824509..8d6dc95f094af 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -588,7 +588,7 @@ Sparse - Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) - Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`) - +- Bug in :func:`SparseSeries.unstack` and :func:`SparseDataFrame.stack` (:issue:`16614`, :issue:`15045`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index bff09be6149f3..7e5ef86a07a9c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -10,7 +10,7 @@ from pandas.core.dtypes.common import ( _ensure_platform_int, is_list_like, is_bool_dtype, - needs_i8_conversion) + needs_i8_conversion, is_sparse) from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.missing import notna import pandas.core.dtypes.concat as _concat @@ -75,10 +75,15 @@ def __init__(self, values, index, level=-1, value_columns=None, fill_value=None): self.is_categorical = None + self.is_sparse = is_sparse(values) if values.ndim == 1: if isinstance(values, Categorical): self.is_categorical = values values = np.array(values) + elif self.is_sparse: + # XXX: Makes SparseArray *dense*, but it's supposedly + # a single column at a time, so it's "doable" + values = values.values values = values[:, np.newaxis] self.values = values self.value_columns = value_columns @@ -177,7 +182,8 @@ def get_result(self): ordered=ordered) for i in range(values.shape[-1])] - return DataFrame(values, index=index, columns=columns) + klass = SparseDataFrame if self.is_sparse else DataFrame + return klass(values, index=index, columns=columns) def get_new_values(self): values = self.values @@ -472,7 +478,7 @@ def _unstack_frame(obj, level, fill_value=None): from pandas.core.internals import BlockManager, make_block if obj._is_mixed_type: - unstacker = _Unstacker(np.empty(obj.shape, dtype=bool), # dummy + unstacker = _Unstacker(np.empty((0, 0)), # dummy obj.index, level=level, value_columns=obj.columns) new_columns = unstacker.get_new_columns() @@ -480,7 +486,7 @@ def _unstack_frame(obj, level, fill_value=None): new_axes = [new_columns, new_index] new_blocks = [] - mask_blocks = [] + mask_blocks = np.zeros_like(new_columns, dtype=bool) for blk in obj._data.blocks: blk_items = obj._data.items[blk.mgr_locs.indexer] bunstacker = _Unstacker(blk.values.T, obj.index, level=level, @@ -490,15 +496,25 @@ def _unstack_frame(obj, level, fill_value=None): new_placement = new_columns.get_indexer(new_items) new_values, mask = bunstacker.get_new_values() - mblk = make_block(mask.T, placement=new_placement) - mask_blocks.append(mblk) + mask_blocks[new_placement] = mask.any(0) - newb = make_block(new_values.T, placement=new_placement) - new_blocks.append(newb) + # BlockManager can't handle SparseBlocks with multiple items, + # so lets make one block for each item + if is_sparse(blk.values): + new_placement = [[i] for i in new_placement] + new_values = new_values.T + else: + new_placement = [new_placement] + new_values = [new_values.T] + + for cols, placement in zip(new_values, new_placement): + newb = blk.make_block_same_class(cols, placement=placement) + new_blocks.append(newb) - result = DataFrame(BlockManager(new_blocks, new_axes)) - mask_frame = DataFrame(BlockManager(mask_blocks, new_axes)) - return result.loc[:, mask_frame.sum(0) > 0] + klass = type(obj) + assert klass in (SparseDataFrame, DataFrame), klass + result = klass(BlockManager(new_blocks, new_axes)) + return result.loc[:, mask_blocks] else: unstacker = _Unstacker(obj.values, obj.index, level=level, value_columns=obj.columns, @@ -559,7 +575,9 @@ def factorize(index): mask = notna(new_values) new_values = new_values[mask] new_index = new_index[mask] - return Series(new_values, index=new_index) + + klass = type(frame)._constructor_sliced + return klass(new_values, index=new_index) def stack_multiple(frame, level, dropna=True): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 6976fe162c5d5..70310388158b6 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2388,6 +2388,29 @@ def test_iloc_mi(self): tm.assert_frame_equal(result, expected) +class TestSparse(object): + def setup_method(self, method): + self.sdf = pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye + self.mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + def test_sparse_frame_stack(self): + ss = self.sdf.stack() + expected = pd.SparseSeries(np.ones(3), index=self.mi) + tm.assert_sp_series_equal(ss, expected) + + def test_sparse_frame_unstack(self): + mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) + sdf = self.sdf + sdf.index = mi + df = pd.DataFrame(np.eye(3), index=mi).replace(0, np.nan) + + tm.assert_numpy_array_equal(df.unstack().values, sdf.unstack().values) + + def test_sparse_series_unstack(self): + frame = pd.SparseSeries(np.ones(3), index=self.mi).unstack() + tm.assert_sp_frame_equal(frame, self.sdf) + + class TestSorted(Base): """ everthing you wanted to test about sorting """ From 9ae1c10ce4919c8f7954b38eeead82c347a15cb6 Mon Sep 17 00:00:00 2001 From: Kernc Date: Tue, 6 Jun 2017 22:36:37 +0200 Subject: [PATCH 2/8] fix test_reshape.TestDataFrameReshape.test_unstack_preserve_dtypes --- pandas/core/reshape/reshape.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 7e5ef86a07a9c..d45c80c2e716c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -475,7 +475,7 @@ def unstack(obj, level, fill_value=None): def _unstack_frame(obj, level, fill_value=None): - from pandas.core.internals import BlockManager, make_block + from pandas.core.internals import BlockManager, make_block as _make_block if obj._is_mixed_type: unstacker = _Unstacker(np.empty((0, 0)), # dummy @@ -503,12 +503,14 @@ def _unstack_frame(obj, level, fill_value=None): if is_sparse(blk.values): new_placement = [[i] for i in new_placement] new_values = new_values.T + make_block = blk.make_block_same_class else: new_placement = [new_placement] new_values = [new_values.T] + make_block = _make_block for cols, placement in zip(new_values, new_placement): - newb = blk.make_block_same_class(cols, placement=placement) + newb = make_block(cols, placement=placement) new_blocks.append(newb) klass = type(obj) From f621ea6d9bebb63f0169c23331f78e3b9a983e37 Mon Sep 17 00:00:00 2001 From: Kernc Date: Tue, 11 Jul 2017 17:43:09 +0200 Subject: [PATCH 3/8] Move .unstack() logic onto BlockManager and Block --- pandas/core/categorical.py | 2 ++ pandas/core/internals.py | 31 +++++++++++++++++++++++ pandas/core/reshape/reshape.py | 45 ++++------------------------------ 3 files changed, 38 insertions(+), 40 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 98d6d7a68017a..a8989b65da06e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -128,6 +128,8 @@ def maybe_to_categorical(array): """ coerce to a categorical if a series is given """ if isinstance(array, (ABCSeries, ABCCategoricalIndex)): return array._values + elif isinstance(array, np.ndarray): + return Categorical(array) return array diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 2046bae759b9a..4d928215f4ff6 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1484,6 +1484,10 @@ def equals(self, other): return False return array_equivalent(self.values, other.values) + def _unstack(self, new_values, new_placement): + """Return a list of unstacked blocks of self""" + return [make_block(new_values, placement=new_placement)] + def quantile(self, qs, interpolation='linear', axis=0, mgr=None): """ compute the quantiles of the @@ -1712,6 +1716,12 @@ def _slice(self, slicer): def _try_cast_result(self, result, dtype=None): return result + def _unstack(self, new_values, new_placement): + # NonConsolidatable blocks can have a single item only, so we return + # one block per item + return [self.make_block_same_class(vals, [place]) + for vals, place in zip(new_values, new_placement)] + class NumericBlock(Block): __slots__ = () @@ -4167,6 +4177,27 @@ def canonicalize(block): return all(block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)) + def unstack(self, unstacker): + """Return blockmanager with all blocks unstacked""" + dummy = unstacker(np.empty((0, 0)), value_columns=self.items) + new_columns = dummy.get_new_columns() + new_index = dummy.get_new_index() + new_blocks = [] + mask_columns = np.zeros_like(new_columns, dtype=bool) + + for blk in self.blocks: + bunstacker = unstacker( + blk.values.T, value_columns=self.items[blk.mgr_locs.indexer]) + new_items = bunstacker.get_new_columns() + new_values, mask = bunstacker.get_new_values() + new_placement = new_columns.get_indexer(new_items) + mask_columns[new_placement] = mask.any(0) + new_blocks.extend(blk._unstack(new_values.T, new_placement)) + + bm = BlockManager(new_blocks, [new_columns, new_index]) + bm = bm.take(mask_columns.nonzero()[0], axis=0) + return bm + class SingleBlockManager(BlockManager): """ manage a single block with """ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d45c80c2e716c..d280c4f3f73d7 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -2,6 +2,7 @@ # pylint: disable=W0703,W0622,W0613,W0201 from pandas.compat import range, text_type, zip from pandas import compat +from functools import partial import itertools import re @@ -475,48 +476,12 @@ def unstack(obj, level, fill_value=None): def _unstack_frame(obj, level, fill_value=None): - from pandas.core.internals import BlockManager, make_block as _make_block - if obj._is_mixed_type: - unstacker = _Unstacker(np.empty((0, 0)), # dummy - obj.index, level=level, - value_columns=obj.columns) - new_columns = unstacker.get_new_columns() - new_index = unstacker.get_new_index() - new_axes = [new_columns, new_index] - - new_blocks = [] - mask_blocks = np.zeros_like(new_columns, dtype=bool) - for blk in obj._data.blocks: - blk_items = obj._data.items[blk.mgr_locs.indexer] - bunstacker = _Unstacker(blk.values.T, obj.index, level=level, - value_columns=blk_items, - fill_value=fill_value) - new_items = bunstacker.get_new_columns() - new_placement = new_columns.get_indexer(new_items) - new_values, mask = bunstacker.get_new_values() - - mask_blocks[new_placement] = mask.any(0) - - # BlockManager can't handle SparseBlocks with multiple items, - # so lets make one block for each item - if is_sparse(blk.values): - new_placement = [[i] for i in new_placement] - new_values = new_values.T - make_block = blk.make_block_same_class - else: - new_placement = [new_placement] - new_values = [new_values.T] - make_block = _make_block - - for cols, placement in zip(new_values, new_placement): - newb = make_block(cols, placement=placement) - new_blocks.append(newb) - + unstacker = partial(_Unstacker, index=obj.index, + level=level, fill_value=fill_value) + blocks = obj._data.unstack(unstacker) klass = type(obj) - assert klass in (SparseDataFrame, DataFrame), klass - result = klass(BlockManager(new_blocks, new_axes)) - return result.loc[:, mask_blocks] + return klass(blocks) else: unstacker = _Unstacker(obj.values, obj.index, level=level, value_columns=obj.columns, From 369e315c23109149dc1e30abcd3859c3d1b0454e Mon Sep 17 00:00:00 2001 From: Kernc Date: Tue, 11 Jul 2017 18:02:27 +0200 Subject: [PATCH 4/8] replace tests with pytest style --- pandas/tests/test_multilevel.py | 44 ++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 70310388158b6..00a69ebd673fc 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2388,27 +2388,37 @@ def test_iloc_mi(self): tm.assert_frame_equal(result, expected) -class TestSparse(object): - def setup_method(self, method): - self.sdf = pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye - self.mi = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) +@pytest.fixture +def sparse_df(): + return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye + + +@pytest.fixture +def multi_index3(): + return MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + +def test_sparse_frame_stack(sparse_df, multi_index3): + ss = sparse_df.stack() + expected = pd.SparseSeries(np.ones(3), index=multi_index3) + tm.assert_sp_series_equal(ss, expected) + - def test_sparse_frame_stack(self): - ss = self.sdf.stack() - expected = pd.SparseSeries(np.ones(3), index=self.mi) - tm.assert_sp_series_equal(ss, expected) +def test_sparse_frame_unstack(sparse_df): + mi = MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) + sparse_df.index = mi + arr = np.array([[1, np.nan, np.nan], + [np.nan, 1, np.nan], + [np.nan, np.nan, 1]]) + unstacked_df = pd.DataFrame(arr, index=mi).unstack() + unstacked_sdf = sparse_df.unstack() - def test_sparse_frame_unstack(self): - mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) - sdf = self.sdf - sdf.index = mi - df = pd.DataFrame(np.eye(3), index=mi).replace(0, np.nan) + tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values) - tm.assert_numpy_array_equal(df.unstack().values, sdf.unstack().values) - def test_sparse_series_unstack(self): - frame = pd.SparseSeries(np.ones(3), index=self.mi).unstack() - tm.assert_sp_frame_equal(frame, self.sdf) +def test_sparse_series_unstack(sparse_df, multi_index3): + frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() + tm.assert_sp_frame_equal(frame, sparse_df) class TestSorted(Base): From a27c04727aa6ffdcfd83a81be88cda4c6af40a75 Mon Sep 17 00:00:00 2001 From: Kernc Date: Sat, 22 Jul 2017 01:24:15 +0200 Subject: [PATCH 5/8] TST: move sparse stack/unstacking tests to sparse/test_reshape --- pandas/tests/sparse/test_reshape.py | 38 +++++++++++++++++++++++++++++ pandas/tests/test_multilevel.py | 33 ------------------------- 2 files changed, 38 insertions(+), 33 deletions(-) create mode 100644 pandas/tests/sparse/test_reshape.py diff --git a/pandas/tests/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py new file mode 100644 index 0000000000000..b492c47375bcf --- /dev/null +++ b/pandas/tests/sparse/test_reshape.py @@ -0,0 +1,38 @@ +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm + + +@pytest.fixture +def sparse_df(): + return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye + + +@pytest.fixture +def multi_index3(): + return pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + +def test_sparse_frame_stack(sparse_df, multi_index3): + ss = sparse_df.stack() + expected = pd.SparseSeries(np.ones(3), index=multi_index3) + tm.assert_sp_series_equal(ss, expected) + + +def test_sparse_frame_unstack(sparse_df): + mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) + sparse_df.index = mi + arr = np.array([[1, np.nan, np.nan], + [np.nan, 1, np.nan], + [np.nan, np.nan, 1]]) + unstacked_df = pd.DataFrame(arr, index=mi).unstack() + unstacked_sdf = sparse_df.unstack() + + tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values) + + +def test_sparse_series_unstack(sparse_df, multi_index3): + frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() + tm.assert_sp_frame_equal(frame, sparse_df) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 00a69ebd673fc..6976fe162c5d5 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2388,39 +2388,6 @@ def test_iloc_mi(self): tm.assert_frame_equal(result, expected) -@pytest.fixture -def sparse_df(): - return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}}) # eye - - -@pytest.fixture -def multi_index3(): - return MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) - - -def test_sparse_frame_stack(sparse_df, multi_index3): - ss = sparse_df.stack() - expected = pd.SparseSeries(np.ones(3), index=multi_index3) - tm.assert_sp_series_equal(ss, expected) - - -def test_sparse_frame_unstack(sparse_df): - mi = MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)]) - sparse_df.index = mi - arr = np.array([[1, np.nan, np.nan], - [np.nan, 1, np.nan], - [np.nan, np.nan, 1]]) - unstacked_df = pd.DataFrame(arr, index=mi).unstack() - unstacked_sdf = sparse_df.unstack() - - tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values) - - -def test_sparse_series_unstack(sparse_df, multi_index3): - frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() - tm.assert_sp_frame_equal(frame, sparse_df) - - class TestSorted(Base): """ everthing you wanted to test about sorting """ From 0acb6f3fbc990d133c47691ddd306ecb86cc1577 Mon Sep 17 00:00:00 2001 From: Kernc Date: Sat, 22 Jul 2017 03:44:09 +0200 Subject: [PATCH 6/8] Move more code from BlockManager.unstack to Block._unstack --- pandas/core/internals.py | 61 +++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 17 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 4d928215f4ff6..2e8962946190b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1484,9 +1484,19 @@ def equals(self, other): return False return array_equivalent(self.values, other.values) - def _unstack(self, new_values, new_placement): + def _unstack(self, unstacker_t, new_columns): """Return a list of unstacked blocks of self""" - return [make_block(new_values, placement=new_placement)] + unstacker = unstacker_t(self.values.T) + new_items = unstacker.get_new_columns() + new_placement = new_columns.get_indexer(new_items) + new_values, mask = unstacker.get_new_values() + + mask = mask.any(0) + new_values = new_values.T[mask] + new_placement = new_placement[mask] + + blocks = [make_block(new_values, placement=new_placement)] + return blocks, mask def quantile(self, qs, interpolation='linear', axis=0, mgr=None): """ @@ -1716,11 +1726,21 @@ def _slice(self, slicer): def _try_cast_result(self, result, dtype=None): return result - def _unstack(self, new_values, new_placement): + def _unstack(self, unstacker_t, new_columns): # NonConsolidatable blocks can have a single item only, so we return # one block per item - return [self.make_block_same_class(vals, [place]) - for vals, place in zip(new_values, new_placement)] + unstacker = unstacker_t(self.values.T) + new_items = unstacker.get_new_columns() + new_placement = new_columns.get_indexer(new_items) + new_values, mask = unstacker.get_new_values() + + mask = mask.any(0) + new_values = new_values.T[mask] + new_placement = new_placement[mask] + + blocks = [self.make_block_same_class(vals, [place]) + for vals, place in zip(new_values, new_placement)] + return blocks, mask class NumericBlock(Block): @@ -4177,25 +4197,32 @@ def canonicalize(block): return all(block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)) - def unstack(self, unstacker): - """Return blockmanager with all blocks unstacked""" - dummy = unstacker(np.empty((0, 0)), value_columns=self.items) + def unstack(self, unstacker_t): + """Return a blockmanager with all blocks unstacked. + + Parameters + ---------- + unstacker_t : type + A (partially-applied) ``pd.core.reshape._Unstacker`` class. + """ + dummy = unstacker_t(np.empty((0, 0)), value_columns=self.items) new_columns = dummy.get_new_columns() new_index = dummy.get_new_index() new_blocks = [] - mask_columns = np.zeros_like(new_columns, dtype=bool) + columns_mask = [] for blk in self.blocks: - bunstacker = unstacker( - blk.values.T, value_columns=self.items[blk.mgr_locs.indexer]) - new_items = bunstacker.get_new_columns() - new_values, mask = bunstacker.get_new_values() - new_placement = new_columns.get_indexer(new_items) - mask_columns[new_placement] = mask.any(0) - new_blocks.extend(blk._unstack(new_values.T, new_placement)) + blocks, mask = blk._unstack( + partial(unstacker_t, + value_columns=self.items[blk.mgr_locs.indexer]), + new_columns) + + new_blocks.extend(blocks) + columns_mask.extend(mask) + + new_columns = new_columns[columns_mask] bm = BlockManager(new_blocks, [new_columns, new_index]) - bm = bm.take(mask_columns.nonzero()[0], axis=0) return bm From 8aad74cd3f011f257d4de5f6dba4daefd9503f68 Mon Sep 17 00:00:00 2001 From: Kernc Date: Fri, 11 Aug 2017 15:26:50 +0200 Subject: [PATCH 7/8] Extend docstrings --- pandas/core/internals.py | 53 +++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 2e8962946190b..78252ebbbb85f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1484,9 +1484,24 @@ def equals(self, other): return False return array_equivalent(self.values, other.values) - def _unstack(self, unstacker_t, new_columns): - """Return a list of unstacked blocks of self""" - unstacker = unstacker_t(self.values.T) + def _unstack(self, unstacker_func, new_columns): + """Return a list of unstacked blocks of self + + Parameters + ---------- + unstacker_func : callable + Partially applied unstacker. + new_columns : Index + All columns of the unstacked BlockManager. + + Returns + ------- + blocks : list of Block + New blocks of unstacked values. + mask : array_like of bool + The mask of columns of `blocks` we should keep. + """ + unstacker = unstacker_func(self.values.T) new_items = unstacker.get_new_columns() new_placement = new_columns.get_indexer(new_items) new_values, mask = unstacker.get_new_values() @@ -1726,10 +1741,26 @@ def _slice(self, slicer): def _try_cast_result(self, result, dtype=None): return result - def _unstack(self, unstacker_t, new_columns): + def _unstack(self, unstacker_func, new_columns): + """Return a list of unstacked blocks of self + + Parameters + ---------- + unstacker_func : callable + Partially applied unstacker. + new_columns : Index + All columns of the unstacked BlockManager. + + Returns + ------- + blocks : list of Block + New blocks of unstacked values. + mask : array_like of bool + The mask of columns of `blocks` we should keep. + """ # NonConsolidatable blocks can have a single item only, so we return # one block per item - unstacker = unstacker_t(self.values.T) + unstacker = unstacker_func(self.values.T) new_items = unstacker.get_new_columns() new_placement = new_columns.get_indexer(new_items) new_values, mask = unstacker.get_new_values() @@ -4197,15 +4228,19 @@ def canonicalize(block): return all(block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)) - def unstack(self, unstacker_t): + def unstack(self, unstacker_func): """Return a blockmanager with all blocks unstacked. Parameters ---------- - unstacker_t : type + unstacker_func : callable A (partially-applied) ``pd.core.reshape._Unstacker`` class. + + Returns + ------- + unstacked : BlockManager """ - dummy = unstacker_t(np.empty((0, 0)), value_columns=self.items) + dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items) new_columns = dummy.get_new_columns() new_index = dummy.get_new_index() new_blocks = [] @@ -4213,7 +4248,7 @@ def unstack(self, unstacker_t): for blk in self.blocks: blocks, mask = blk._unstack( - partial(unstacker_t, + partial(unstacker_func, value_columns=self.items[blk.mgr_locs.indexer]), new_columns) From cef5f476e13e33a29e7fc64a3fe483175c98f7be Mon Sep 17 00:00:00 2001 From: Kernc Date: Tue, 26 Sep 2017 15:50:54 +0200 Subject: [PATCH 8/8] Mark _maybe_to_categorical() as internal --- pandas/core/categorical.py | 8 ++++++-- pandas/core/internals.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index a8989b65da06e..43164e9e150b8 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -124,8 +124,12 @@ def f(self, other): return f -def maybe_to_categorical(array): - """ coerce to a categorical if a series is given """ +def _maybe_to_categorical(array): + """ + Coerce to a categorical if a series is given. + + Internal use ONLY. + """ if isinstance(array, (ABCSeries, ABCCategoricalIndex)): return array._values elif isinstance(array, np.ndarray): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 78252ebbbb85f..a39154f581def 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -56,7 +56,7 @@ from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer -from pandas.core.categorical import Categorical, maybe_to_categorical +from pandas.core.categorical import Categorical, _maybe_to_categorical from pandas.core.indexes.datetimes import DatetimeIndex from pandas.io.formats.printing import pprint_thing @@ -2288,7 +2288,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): def __init__(self, values, placement, fastpath=False, **kwargs): # coerce to categorical if we can - super(CategoricalBlock, self).__init__(maybe_to_categorical(values), + super(CategoricalBlock, self).__init__(_maybe_to_categorical(values), fastpath=True, placement=placement, **kwargs)