Skip to content

Commit

Permalink
[ArrayManager] TST: run (+fix/skip) pandas/tests/frame/indexing tests (
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Mar 12, 2021
1 parent 2b2e9b6 commit c331ba8
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 46 deletions.
9 changes: 1 addition & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,7 @@ jobs:
run: |
source activate pandas-dev
pytest pandas/tests/frame/methods
pytest pandas/tests/frame/test_constructors.py
pytest pandas/tests/frame/test_*
pytest pandas/tests/frame/test_reductions.py
pytest pandas/tests/frame/
pytest pandas/tests/reductions/
pytest pandas/tests/generic/test_generic.py
pytest pandas/tests/arithmetic/
Expand All @@ -170,10 +167,6 @@ jobs:
pytest pandas/tests/series/test_*
# indexing subset (temporary since other tests don't pass yet)
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
pytest pandas/tests/frame/indexing/test_where.py
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,7 +861,7 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
# DataFrame into 1D array when loc is an integer
if isinstance(value, np.ndarray) and value.ndim == 2:
assert value.shape[1] == 1
value = value[0, :]
value = value[:, 0]

# TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
# but we should avoid that and pass directly the proper array
Expand Down Expand Up @@ -1163,10 +1163,6 @@ def axes(self):
def index(self) -> Index:
return self._axes[0]

@property
def array(self):
return self.arrays[0]

@property
def dtype(self):
return self.array.dtype
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ def isna(self: T, func) -> T:
class SingleDataManager(DataManager):
ndim = 1

@property
def array(self):
"""
Quick access to the backing array of the Block or SingleArrayManager.
"""
return self.arrays[0] # type: ignore[attr-defined]


def interleaved_dtype(dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
"""
Expand Down
63 changes: 46 additions & 17 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

from pandas._libs import iNaT
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer

Expand Down Expand Up @@ -534,6 +535,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self):
with pytest.raises(KeyError, match=r"^3$"):
df2.loc[3:11] = 0

@td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view
def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):
sliced = float_string_frame.iloc[:, -3:]
assert sliced["D"].dtype == np.float64
Expand Down Expand Up @@ -592,6 +594,7 @@ def test_getitem_fancy_scalar(self, float_frame):
for idx in f.index[::5]:
assert ix[idx, col] == ts[idx]

@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
def test_setitem_fancy_scalar(self, float_frame):
f = float_frame
expected = float_frame.copy()
Expand Down Expand Up @@ -631,6 +634,7 @@ def test_getitem_fancy_boolean(self, float_frame):
expected = f.reindex(index=f.index[boolvec], columns=["C", "D"])
tm.assert_frame_equal(result, expected)

@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
def test_setitem_fancy_boolean(self, float_frame):
# from 2d, set with booleans
frame = float_frame.copy()
Expand Down Expand Up @@ -990,21 +994,29 @@ def test_iloc_row(self):
expected = df.loc[8:14]
tm.assert_frame_equal(result, expected)

# list of integers
result = df.iloc[[1, 2, 4, 6]]
expected = df.reindex(df.index[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_row_slice_view(self, using_array_manager):
df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2))
original = df.copy()

# verify slice is view
# setting it makes it raise/warn
subset = df.iloc[slice(4, 8)]

msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
result[2] = 0.0
subset[2] = 0.0

exp_col = df[2].copy()
exp_col[4:8] = 0.0
exp_col = original[2].copy()
# TODO(ArrayManager) verify it is expected that the original didn't change
if not using_array_manager:
exp_col[4:8] = 0.0
tm.assert_series_equal(df[2], exp_col)

# list of integers
result = df.iloc[[1, 2, 4, 6]]
expected = df.reindex(df.index[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_col(self):

df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
Expand All @@ -1022,19 +1034,32 @@ def test_iloc_col(self):
expected = df.loc[:, 8:14]
tm.assert_frame_equal(result, expected)

# verify slice is view
# and that we are setting a copy
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
result[8] = 0.0

assert (df[8] == 0).all()

# list of integers
result = df.iloc[:, [1, 2, 4, 6]]
expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_col_slice_view(self, using_array_manager):
df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
original = df.copy()
subset = df.iloc[:, slice(4, 8)]

if not using_array_manager:
# verify slice is view
# and that we are setting a copy
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
subset[8] = 0.0

assert (df[8] == 0).all()
else:
# TODO(ArrayManager) verify this is the desired behaviour
subset[8] = 0.0
# subset changed
assert (subset[8] == 0).all()
# but df itself did not change (setitem replaces full column)
tm.assert_frame_equal(df, original)

def test_loc_duplicates(self):
# gh-17105

Expand Down Expand Up @@ -1218,7 +1243,7 @@ def test_setitem(self, uint64_frame):
)


def test_object_casting_indexing_wraps_datetimelike():
def test_object_casting_indexing_wraps_datetimelike(using_array_manager):
# GH#31649, check the indexing methods all the way down the stack
df = DataFrame(
{
Expand All @@ -1240,6 +1265,10 @@ def test_object_casting_indexing_wraps_datetimelike():
assert isinstance(ser.values[1], Timestamp)
assert isinstance(ser.values[2], pd.Timedelta)

if using_array_manager:
# remainder of the test checking BlockManager internals
return

mgr = df._mgr
mgr._rebuild_blknos_and_blklocs()
arr = mgr.fast_xs(0)
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,17 @@ def test_insert_with_columns_dups(self):
)
tm.assert_frame_equal(df, exp)

def test_insert_item_cache(self):
def test_insert_item_cache(self, using_array_manager):
df = DataFrame(np.random.randn(4, 3))
ser = df[0]

with tm.assert_produces_warning(PerformanceWarning):
if using_array_manager:
expected_warning = None
else:
# with BlockManager warn about high fragmentation of single dtype
expected_warning = PerformanceWarning

with tm.assert_produces_warning(expected_warning):
for n in range(100):
df[n + 3] = df[1] * n

Expand Down
26 changes: 18 additions & 8 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas.core.dtypes.base import registry as ea_registry
from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -298,12 +300,12 @@ def test_setitem_dt64tz(self, timezone_frame):

# assert that A & C are not sharing the same base (e.g. they
# are copies)
b1 = df._mgr.blocks[1]
b2 = df._mgr.blocks[2]
tm.assert_extension_array_equal(b1.values, b2.values)
b1base = b1.values._data.base
b2base = b2.values._data.base
assert b1base is None or (id(b1base) != id(b2base))
v1 = df._mgr.arrays[1]
v2 = df._mgr.arrays[2]
tm.assert_extension_array_equal(v1, v2)
v1base = v1._data.base
v2base = v2._data.base
assert v1base is None or (id(v1base) != id(v2base))

# with nan
df2 = df.copy()
Expand Down Expand Up @@ -366,7 +368,7 @@ def test_setitem_frame_length_0_str_key(self, indexer):
expected["A"] = expected["A"].astype("object")
tm.assert_frame_equal(df, expected)

def test_setitem_frame_duplicate_columns(self):
def test_setitem_frame_duplicate_columns(self, using_array_manager):
# GH#15695
cols = ["A", "B", "C"] * 2
df = DataFrame(index=range(3), columns=cols)
Expand All @@ -382,6 +384,11 @@ def test_setitem_frame_duplicate_columns(self):
columns=cols,
dtype="object",
)
if using_array_manager:
# setitem replaces column so changes dtype
expected["C"] = expected["C"].astype("int64")
# TODO(ArrayManager) .loc still overwrites
expected["B"] = expected["B"].astype("int64")
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]])
Expand Down Expand Up @@ -628,6 +635,8 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):


class TestDataFrameSetItemWithExpansion:
# TODO(ArrayManager) update parent (_maybe_update_cacher)
@td.skip_array_manager_not_yet_implemented
def test_setitem_listlike_views(self):
# GH#38148
df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]})
Expand Down Expand Up @@ -699,7 +708,7 @@ def test_setitem_with_expansion_categorical_dtype(self):

result1 = df["D"]
result2 = df["E"]
tm.assert_categorical_equal(result1._mgr._block.values, cat)
tm.assert_categorical_equal(result1._mgr.array, cat)

# sorting
ser.name = "E"
Expand Down Expand Up @@ -767,6 +776,7 @@ def inc(x):


class TestDataFrameSetItemBooleanMask:
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
Expand Down
36 changes: 30 additions & 6 deletions pandas/tests/frame/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,22 @@ def test_xs_keep_level(self):
result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False)
tm.assert_frame_equal(result, expected)

def test_xs_view(self):
def test_xs_view(self, using_array_manager):
# in 0.14 this will return a view if possible a copy otherwise, but
# this is numpy dependent

dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))

dm.xs(2)[:] = 10
assert (dm.xs(2) == 10).all()
if using_array_manager:
# INFO(ArrayManager) with ArrayManager getting a row as a view is
# not possible
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
dm.xs(2)[:] = 20
assert not (dm.xs(2) == 20).any()
else:
dm.xs(2)[:] = 20
assert (dm.xs(2) == 20).all()


class TestXSWithMultiIndex:
Expand Down Expand Up @@ -327,10 +335,26 @@ def test_xs_droplevel_false(self):
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)

def test_xs_droplevel_false_view(self):
def test_xs_droplevel_false_view(self, using_array_manager):
# GH#37832
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
df.values[0, 0] = 2
expected = DataFrame({"a": [2]})
# check that result still views the same data as df
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
# modifying original df also modifies result when having a single block
df.iloc[0, 0] = 2
if not using_array_manager:
expected = DataFrame({"a": [2]})
else:
# TODO(ArrayManager) iloc does not update the array inplace using
# "split" path
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)

# with mixed dataframe, modifying the parent doesn't modify result
# TODO the "split" path behaves differently here as with single block
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
df.iloc[0, 0] = 2
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)

0 comments on commit c331ba8

Please sign in to comment.