Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ArrayManager] TST: run (+fix/skip) pandas/tests/frame/indexing tests #40323

Merged
merged 11 commits into from
Mar 12, 2021
Merged
9 changes: 1 addition & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,7 @@ jobs:
run: |
source activate pandas-dev

pytest pandas/tests/frame/methods
pytest pandas/tests/frame/test_constructors.py
pytest pandas/tests/frame/test_*
pytest pandas/tests/frame/test_reductions.py
pytest pandas/tests/frame/
pytest pandas/tests/reductions/
pytest pandas/tests/generic/test_generic.py
pytest pandas/tests/arithmetic/
Expand All @@ -170,10 +167,6 @@ jobs:
pytest pandas/tests/series/test_*

# indexing subset (temporary since other tests don't pass yet)
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
pytest pandas/tests/frame/indexing/test_where.py
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column

Expand Down
6 changes: 1 addition & 5 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
# DataFrame into 1D array when loc is an integer
if isinstance(value, np.ndarray) and value.ndim == 2:
assert value.shape[1] == 1
value = value[0, :]
value = value[:, 0]

# TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
# but we should avoid that and pass directly the proper array
Expand Down Expand Up @@ -1178,10 +1178,6 @@ def axes(self):
def index(self) -> Index:
return self._axes[0]

@property
def array(self):
return self.arrays[0]

@property
def dtype(self):
return self.array.dtype
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/internals/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,10 @@ def equals(self, other: object) -> bool:

class SingleDataManager(DataManager):
ndim = 1

@property
def array(self):
"""
Quick access to the backing array of the Block or SingleArrayManager.
"""
return self.arrays[0] # type: ignore[attr-defined]
63 changes: 46 additions & 17 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

from pandas._libs import iNaT
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer

Expand Down Expand Up @@ -534,6 +535,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self):
with pytest.raises(KeyError, match=r"^3$"):
df2.loc[3:11] = 0

@td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view
def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):
sliced = float_string_frame.iloc[:, -3:]
assert sliced["D"].dtype == np.float64
Expand Down Expand Up @@ -592,6 +594,7 @@ def test_getitem_fancy_scalar(self, float_frame):
for idx in f.index[::5]:
assert ix[idx, col] == ts[idx]

@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
def test_setitem_fancy_scalar(self, float_frame):
f = float_frame
expected = float_frame.copy()
Expand Down Expand Up @@ -631,6 +634,7 @@ def test_getitem_fancy_boolean(self, float_frame):
expected = f.reindex(index=f.index[boolvec], columns=["C", "D"])
tm.assert_frame_equal(result, expected)

@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
def test_setitem_fancy_boolean(self, float_frame):
# from 2d, set with booleans
frame = float_frame.copy()
Expand Down Expand Up @@ -990,21 +994,29 @@ def test_iloc_row(self):
expected = df.loc[8:14]
tm.assert_frame_equal(result, expected)

# list of integers
result = df.iloc[[1, 2, 4, 6]]
expected = df.reindex(df.index[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_row_slice_view(self, using_array_manager):
df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2))
original = df.copy()

# verify slice is view
# setting it makes it raise/warn
subset = df.iloc[slice(4, 8)]

msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
result[2] = 0.0
subset[2] = 0.0

exp_col = df[2].copy()
exp_col[4:8] = 0.0
exp_col = original[2].copy()
# TODO(ArrayManager) verify it is expected that the original didn't change
if not using_array_manager:
exp_col[4:8] = 0.0
tm.assert_series_equal(df[2], exp_col)

# list of integers
result = df.iloc[[1, 2, 4, 6]]
expected = df.reindex(df.index[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_col(self):

df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
Expand All @@ -1022,19 +1034,32 @@ def test_iloc_col(self):
expected = df.loc[:, 8:14]
tm.assert_frame_equal(result, expected)

# verify slice is view
# and that we are setting a copy
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
result[8] = 0.0

assert (df[8] == 0).all()

# list of integers
result = df.iloc[:, [1, 2, 4, 6]]
expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
tm.assert_frame_equal(result, expected)

def test_iloc_col_slice_view(self, using_array_manager):
df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
original = df.copy()
subset = df.iloc[:, slice(4, 8)]

if not using_array_manager:
# verify slice is view
# and that we are setting a copy
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
subset[8] = 0.0

assert (df[8] == 0).all()
else:
# TODO(ArrayManager) verify this is the desired behaviour
subset[8] = 0.0
# subset changed
assert (subset[8] == 0).all()
# but df itself did not change (setitem replaces full column)
tm.assert_frame_equal(df, original)

def test_loc_duplicates(self):
# gh-17105

Expand Down Expand Up @@ -1218,7 +1243,7 @@ def test_setitem(self, uint64_frame):
)


def test_object_casting_indexing_wraps_datetimelike():
def test_object_casting_indexing_wraps_datetimelike(using_array_manager):
# GH#31649, check the indexing methods all the way down the stack
df = DataFrame(
{
Expand All @@ -1240,6 +1265,10 @@ def test_object_casting_indexing_wraps_datetimelike():
assert isinstance(ser.values[1], Timestamp)
assert isinstance(ser.values[2], pd.Timedelta)

if using_array_manager:
# remainder of the test checking BlockManager internals
return

mgr = df._mgr
mgr._rebuild_blknos_and_blklocs()
arr = mgr.fast_xs(0)
Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/frame/indexing/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,17 @@ def test_insert_with_columns_dups(self):
)
tm.assert_frame_equal(df, exp)

def test_insert_item_cache(self):
def test_insert_item_cache(self, using_array_manager):
df = DataFrame(np.random.randn(4, 3))
ser = df[0]

with tm.assert_produces_warning(PerformanceWarning):
if using_array_manager:
expected_warning = None
else:
# with BlockManager warn about high fragmentation of single dtype
expected_warning = PerformanceWarning

with tm.assert_produces_warning(expected_warning):
for n in range(100):
df[n + 3] = df[1] * n

Expand Down
20 changes: 17 additions & 3 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas.core.dtypes.base import registry as ea_registry
from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -282,7 +284,7 @@ def test_frame_setitem_existing_datetime64_col_other_units(self, unit):
df["dates"] = vals
assert (df["dates"].values == ex_vals).all()

def test_setitem_dt64tz(self, timezone_frame):
def test_setitem_dt64tz(self, timezone_frame, using_array_manager):

df = timezone_frame
idx = df["B"].rename("foo")
Expand All @@ -296,6 +298,10 @@ def test_setitem_dt64tz(self, timezone_frame):
tm.assert_series_equal(df["D"], Series(idx, name="D"))
del df["D"]

if using_array_manager:
# TODO(ArrayManager) rewrite test for ArrayManager
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xfail/skip?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first part of the test is still useful to run, so I would rather not skip it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you skip/xfail at this point, then the part up to here will still run

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the end was easy to rewrite using .arrays so it works for both


# assert that A & C are not sharing the same base (e.g. they
# are copies)
b1 = df._mgr.blocks[1]
Expand Down Expand Up @@ -366,7 +372,7 @@ def test_setitem_frame_length_0_str_key(self, indexer):
expected["A"] = expected["A"].astype("object")
tm.assert_frame_equal(df, expected)

def test_setitem_frame_duplicate_columns(self):
def test_setitem_frame_duplicate_columns(self, using_array_manager):
# GH#15695
cols = ["A", "B", "C"] * 2
df = DataFrame(index=range(3), columns=cols)
Expand All @@ -382,6 +388,11 @@ def test_setitem_frame_duplicate_columns(self):
columns=cols,
dtype="object",
)
if using_array_manager:
# setitem replaces column so changes dtype
expected["C"] = expected["C"].astype("int64")
# TODO(ArrayManager) .loc still overwrites
expected["B"] = expected["B"].astype("int64")
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]])
Expand Down Expand Up @@ -628,6 +639,8 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):


class TestDataFrameSetItemWithExpansion:
# TODO(ArrayManager) update parent (_maybe_update_cacher)
@td.skip_array_manager_not_yet_implemented
def test_setitem_listlike_views(self):
# GH#38148
df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]})
Expand Down Expand Up @@ -699,7 +712,7 @@ def test_setitem_with_expansion_categorical_dtype(self):

result1 = df["D"]
result2 = df["E"]
tm.assert_categorical_equal(result1._mgr._block.values, cat)
tm.assert_categorical_equal(result1._mgr.array, cat)

# sorting
ser.name = "E"
Expand Down Expand Up @@ -767,6 +780,7 @@ def inc(x):


class TestDataFrameSetItemBooleanMask:
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
Expand Down
20 changes: 16 additions & 4 deletions pandas/tests/frame/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas import (
DataFrame,
Index,
Expand Down Expand Up @@ -109,14 +111,22 @@ def test_xs_keep_level(self):
result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False)
tm.assert_frame_equal(result, expected)

def test_xs_view(self):
def test_xs_view(self, using_array_manager):
# in 0.14 this will return a view if possible a copy otherwise, but
# this is numpy dependent

dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))

dm.xs(2)[:] = 10
assert (dm.xs(2) == 10).all()
if using_array_manager:
# INFO(ArrayManager) with ArrayManager getting a row as a view is
# not possible
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
dm.xs(2)[:] = 20
assert not (dm.xs(2) == 20).any()
else:
dm.xs(2)[:] = 20
assert (dm.xs(2) == 20).all()


class TestXSWithMultiIndex:
Expand Down Expand Up @@ -327,10 +337,12 @@ def test_xs_droplevel_false(self):
expected = DataFrame({"a": [1]})
tm.assert_frame_equal(result, expected)

# TODO(ArrayManager) xs with axis=1 can return view
@td.skip_array_manager_not_yet_implemented
def test_xs_droplevel_false_view(self):
# GH#37832
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
result = df.xs("a", axis=1, drop_level=False)
df.values[0, 0] = 2
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this version feels much clearer as to whats going on

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a suggestion for how to then do it? (we can't rely on .values to mutate)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess what we really care about here is np.shares_memory(result._values, df.iloc[:, 0])

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the np.shares_memory check. And turned out that also with ArrayManager this actually gives a view. But it's the iloc modifying the parent that doesn't result in modifying the child result, when using ArrayManager, but also when using a mixed DataFrame. So added that as an explicit test case as well.

df.iloc[0, 0] = 2
expected = DataFrame({"a": [2]})
tm.assert_frame_equal(result, expected)