pandas-dev · jorisvandenbossche · Feb 3, 2021 · Feb 4, 2021 · Feb 5, 2021 · Feb 12, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -158,3 +158,4 @@ jobs:
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
         pytest pandas/tests/frame/indexing --array-manager
+        pytest pandas/tests/indexing --array-manager
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -382,11 +382,14 @@ def __len__(self):
 # Indices
 # ----------------------------------------------------------------
 @pytest.fixture
-def multiindex_year_month_day_dataframe_random_data():
+def multiindex_year_month_day_dataframe_random_data(using_array_manager):
     """
     DataFrame with 3 level MultiIndex (year, month, day) covering
     first 100 business days from 2000-01-01 with random data
     """
+    if using_array_manager:
+        # TODO(ArrayManager) groupby
+        pytest.skip("Not yet implemented for ArrayManager")
     tdf = tm.makeTimeDataFrame(100)
     ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
     # use Int64Index, to make sure things work

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -324,8 +324,8 @@ def length_of_indexer(indexer, target=None) -> int:
             start, stop = stop + 1, start + 1
             step = -step
         return (stop - start + step - 1) // step
-    elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):
-        if isinstance(indexer, list):
+    elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list, range)):
+        if isinstance(indexer, (list, range)):
             indexer = np.array(indexer)
 
         if indexer.dtype == bool:

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -642,7 +642,14 @@ def fast_xs(self, loc: int) -> ArrayLike:
         else:
             temp_dtype = dtype
 
-        result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype)
+        if dtype == "object":
+            # TODO properly test this, check
+            # pandas/tests/indexing/test_chaining_and_caching.py::TestChaining
+            # ::test_chained_getitem_with_lists
+            result = np.empty(self.shape_proper[1], dtype=dtype)
+            result[:] = [arr[loc] for arr in self.arrays]
+        else:
+            result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype)
         if isinstance(dtype, ExtensionDtype):
             result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)
         return result

diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas import DataFrame, MultiIndex, Series
 import pandas._testing as tm
 import pandas.core.common as com
@@ -28,6 +30,7 @@ def test_detect_chained_assignment():
         zed["eyes"]["right"].fillna(value=555, inplace=True)
 
 
+@td.skip_array_manager_invalid_test  # with ArrayManager df.loc[0] is not a view
 def test_cache_updating():
     # 5216
     # make sure that we don't try to set a dead cache

diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna
 import pandas._testing as tm
@@ -116,6 +118,9 @@ def check(target, indexers, value, compare_fn, expected=None):
                 expected=copy,
             )
 
+    # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
+    # all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
+    @td.skip_array_manager_not_yet_implemented
     def test_multiindex_setitem(self):
 
         # GH 3738
@@ -170,7 +175,7 @@ def test_multiindex_setitem(self):
         df.loc[idx[:, :, "Stock"], "price"] *= 2
         tm.assert_frame_equal(df, expected)
 
-    def test_multiindex_assignment(self):
+    def test_multiindex_assignment(self, using_array_manager):
 
         # GH3777 part 2
 
@@ -195,11 +200,15 @@ def test_multiindex_assignment(self):
 
         df.loc[4, "c"] = arr
         exp = Series(arr, index=[8, 10], name="c", dtype="float64")
+        if using_array_manager:
+            exp = exp.astype("int64")
         tm.assert_series_equal(df.loc[4, "c"], exp)
 
         # scalar ok
         df.loc[4, "c"] = 10
         exp = Series(10, index=[8, 10], name="c", dtype="float64")
+        if using_array_manager:
+            exp = exp.astype("int64")
         tm.assert_series_equal(df.loc[4, "c"], exp)
 
         # invalid assignments
@@ -312,6 +321,8 @@ def test_frame_getitem_setitem_multislice(self):
         df.loc[:, :] = 10
         tm.assert_frame_equal(df, result)
 
+    # TODO(ArrayManager) iset with multiple elements not yet implemented
+    @td.skip_array_manager_not_yet_implemented
     def test_frame_setitem_multi_column(self):
         df = DataFrame(
             np.random.randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]]
@@ -417,6 +428,8 @@ def test_nonunique_assignment_1750(self):
 
         assert (df.xs((1, 1))["C"] == "_").all()
 
+    # TODO(ArrayManager) iset with multiple elements not yet implemented
+    @td.skip_array_manager_not_yet_implemented
     def test_astype_assignment_with_dups(self):
 
         # GH 4686
@@ -439,6 +452,8 @@ def test_setitem_nonmonotonic(self):
         tm.assert_frame_equal(df, expected)
 
 
+@td.skip_array_manager_invalid_test  # df["foo"] select multiple columns -> .values
+# is not a view
 def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
     # this works because we are modifying the underlying array
     # really a no-no

diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py
@@ -7,14 +7,19 @@
 import pandas._testing as tm
 
 
-def test_at_timezone():
+def test_at_timezone(using_array_manager):
     # https://github.com/pandas-dev/pandas/issues/33544
     result = DataFrame({"foo": [datetime(2000, 1, 1)]})
-    result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
-    expected = DataFrame(
-        {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
-    )
-    tm.assert_frame_equal(result, expected)
+    if using_array_manager:
+        # TODO(ArrayManager) this should give a better error message
+        with pytest.raises(TypeError, match="tz-naive and tz-aware"):
+            result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
+    else:
+        result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
+        expected = DataFrame(
+            {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
+        )
+        tm.assert_frame_equal(result, expected)
 
 
 class TestAtSetItem:

diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py
@@ -133,7 +133,7 @@ def test_setitem_chained_setfault(self):
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.arm_slow
-    def test_detect_chained_assignment(self):
+    def test_detect_chained_assignment(self, using_array_manager):
 
         pd.set_option("chained_assignment", "raise")
 
@@ -150,19 +150,25 @@ def test_detect_chained_assignment(self):
         df = DataFrame(
             {
                 "A": Series(range(2), dtype="int64"),
-                "B": np.array(np.arange(2, 4), dtype=np.float64),
+                "B": np.array(np.arange(1, 4, 2), dtype=np.float64),
             }
         )
         assert df._is_copy is None
 
         msg = "A value is trying to be set on a copy of a slice from a DataFrame"
-        with pytest.raises(com.SettingWithCopyError, match=msg):
-            df["A"][0] = -5
+        if not using_array_manager:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df["A"][0] = -5
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
-            df["A"][1] = np.nan
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df["A"][1] = np.nan
 
-        assert df["A"]._is_copy is None
+            assert df["A"]._is_copy is None
+        else:
+            df["A"][0] = -5
+            df["A"][1] = -6
+            expected["B"] = expected["B"].astype("float64")
+            tm.assert_frame_equal(df, expected)
 
         # Using a copy (the chain), fails
         df = DataFrame(
@@ -191,13 +197,17 @@ def test_detect_chained_assignment(self):
         expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
         df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
+        if not using_array_manager:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df["A"][0] = 111
+
+            df.loc[0, "A"] = 111
+        else:
             df["A"][0] = 111
 
         with pytest.raises(com.SettingWithCopyError, match=msg):
             df.loc[0]["A"] = 111
 
-        df.loc[0, "A"] = 111
         tm.assert_frame_equal(df, expected)
 
         # gh-5475: Make sure that is_copy is picked up reconstruction
@@ -256,10 +266,12 @@ def random_text(nobs=100):
         df.loc[:, "letters"] = df["letters"].apply(str.lower)
 
         # Should be ok even though it's a copy!
-        assert df._is_copy is None
+        if not using_array_manager:
+            assert df._is_copy is None
 
         df["letters"] = df["letters"].apply(str.lower)
-        assert df._is_copy is None
+        if not using_array_manager:
+            assert df._is_copy is None
 
         df = random_text(100000)
         indexer = df.letters.apply(lambda x: len(x) > 10)
@@ -313,8 +325,9 @@ def random_text(nobs=100):
         with pytest.raises(com.SettingWithCopyError, match=msg):
             df.loc[2]["C"] = "foo"
 
-        with pytest.raises(com.SettingWithCopyError, match=msg):
-            df["C"][2] = "foo"
+        if not using_array_manager:
+            with pytest.raises(com.SettingWithCopyError, match=msg):
+                df["C"][2] = "foo"
 
     def test_setting_with_copy_bug(self):
 

diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py
@@ -7,7 +7,7 @@
 
 
 class TestDatetimeIndex:
-    def test_indexing_with_datetime_tz(self):
+    def test_indexing_with_datetime_tz(self, using_array_manager):
 
         # GH#8260
         # support datetime64 with tz
@@ -65,10 +65,14 @@ def test_indexing_with_datetime_tz(self):
         # trying to set a single element on a part of a different timezone
         # this converts to object
         df2 = df.copy()
-        df2.loc[df2.new_col == "new", "time"] = v
-
-        expected = Series([v[0], df.loc[1, "time"]], name="time")
-        tm.assert_series_equal(df2.time, expected)
+        if using_array_manager:
+            with pytest.raises(ValueError, match="Timezones don't match"):
+                df2.loc[df2.new_col == "new", "time"] = v
+        else:
+            df2.loc[df2.new_col == "new", "time"] = v
+
+            expected = Series([v[0], df.loc[1, "time"]], name="time")
+            tm.assert_series_equal(df2.time, expected)
 
         v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s")
         df.loc[df.new_col == "new", "time"] = v