pandas-dev · jorisvandenbossche · Feb 3, 2021 · Feb 4, 2021 · Feb 5, 2021 · Feb 12, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -153,3 +153,5 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/frame/indexing --array-manager
+        pytest pandas/tests/indexing --array-manager
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -382,11 +382,14 @@ def __len__(self):
 # Indices
 # ----------------------------------------------------------------
 @pytest.fixture
-def multiindex_year_month_day_dataframe_random_data():
+def multiindex_year_month_day_dataframe_random_data(using_array_manager):
     """
     DataFrame with 3 level MultiIndex (year, month, day) covering
     first 100 business days from 2000-01-01 with random data
     """
+    if using_array_manager:
+        # TODO(ArrayManager) groupby
+        pytest.skip("Not yet implemented for ArrayManager")
     tdf = tm.makeTimeDataFrame(100)
     ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
     # use Int64Index, to make sure things work

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -621,6 +621,10 @@ def _as_manager(self, typ: str) -> DataFrame:
         # fastpath of passing a manager doesn't check the option/manager class
         return DataFrame(new_mgr)
 
+    @property
+    def _has_array_manager(self):
+        return isinstance(self._mgr, ArrayManager)
+
     # ----------------------------------------------------------------------
 
     @property
@@ -3231,7 +3235,9 @@ def _setitem_array(self, key, value):
                     key, axis=1, raise_missing=False
                 )[1]
                 self._check_setitem_copy()
-                self.iloc[:, indexer] = value
+                self.iloc._setitem_with_indexer(
+                    (slice(None), indexer), value, name="setitem"
+                )
 
     def _setitem_frame(self, key, value):
         # support boolean setting with DataFrame input, e.g.

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -324,8 +324,8 @@ def length_of_indexer(indexer, target=None) -> int:
             start, stop = stop + 1, start + 1
             step = -step
         return (stop - start + step - 1) // step
-    elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):
-        if isinstance(indexer, list):
+    elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list, range)):
+        if isinstance(indexer, (list, range)):
             indexer = np.array(indexer)
 
         if indexer.dtype == bool:

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1684,7 +1684,9 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
 
             elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
                 # We are setting multiple rows in a single column.
-                self._setitem_single_column(ilocs[0], value, pi)
+                self._setitem_single_column(
+                    ilocs[0], value, pi, overwrite=name == "setitem"
+                )
 
             elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
                 # We are trying to set N values into M entries of a single
@@ -1708,7 +1710,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
             elif len(ilocs) == len(value):
                 # We are setting multiple columns in a single row.
                 for loc, v in zip(ilocs, value):
-                    self._setitem_single_column(loc, v, pi)
+                    self._setitem_single_column(loc, v, pi, overwrite=name == "setitem")
 
             elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
                 # This is a setitem-with-expansion, see
@@ -1728,7 +1730,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str):
 
             # scalar value
             for loc in ilocs:
-                self._setitem_single_column(loc, value, pi)
+                self._setitem_single_column(loc, value, pi, overwrite=name == "setitem")
 
     def _setitem_with_indexer_2d_value(self, indexer, value):
         # We get here with np.ndim(value) == 2, excluding DataFrame,
@@ -1797,7 +1799,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str
 
                 self._setitem_single_column(loc, val, pi)
 
-    def _setitem_single_column(self, loc: int, value, plane_indexer):
+    def _setitem_single_column(self, loc: int, value, plane_indexer, overwrite=True):
         """
 
         Parameters
@@ -1806,7 +1808,14 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
             Indexer for column position
         plane_indexer : int, slice, listlike[int]
             The indexer we use for setitem along axis=0.
+        overwrite : bool
+            Whether to overwrite the original column, or update the existing
+            column inplace
         """
+        if not overwrite and self.obj._has_array_manager:
+            self.obj._mgr.setitem(plane_indexer, value, loc)
+            return
+
         pi = plane_indexer
 
         ser = self.obj._ixs(loc, axis=1)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -14,9 +14,11 @@
 from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar
 from pandas.core.dtypes.common import (
     is_bool_dtype,
+    is_datetime64_dtype,
     is_dtype_equal,
     is_extension_array_dtype,
     is_numeric_dtype,
+    is_scalar,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype, PandasDtype
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
@@ -339,9 +341,24 @@ def where(self, other, cond, align: bool, errors: str, axis: int) -> ArrayManage
             axis=axis,
         )
 
-    # TODO what is this used for?
-    # def setitem(self, indexer, value) -> ArrayManager:
-    #     return self.apply_with_block("setitem", indexer=indexer, value=value)
+    def setitem(self, indexer, value, column_idx) -> ArrayManager:
+        """
+        Set value for a single column and a given row indexer. For example, from
+        ``df.loc[indexer] = value``
+        """
+        arr = self.arrays[column_idx]
+
+        # TODO this special case can be removed once we only store EAs
+        # special case to support setting np.nan in a non-float numpy array
+        if (
+            isinstance(arr, np.ndarray)
+            and is_datetime64_dtype(arr.dtype)
+            and is_scalar(value)
+            and isna(value)
+        ):
+            value = np.datetime64("NaT", "ns")
+
+        arr[indexer] = value
 
     def putmask(self, mask, new, align: bool = True):
 
@@ -454,7 +471,7 @@ def is_mixed_type(self) -> bool:
 
     @property
     def is_numeric_mixed_type(self) -> bool:
-        return False
+        return all(is_numeric_dtype(t) for t in self.get_dtypes())
 
     @property
     def any_extension_types(self) -> bool:
@@ -625,7 +642,14 @@ def fast_xs(self, loc: int) -> ArrayLike:
         else:
             temp_dtype = dtype
 
-        result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype)
+        if dtype == "object":
+            # TODO properly test this, check
+            # pandas/tests/indexing/test_chaining_and_caching.py::TestChaining
+            # ::test_chained_getitem_with_lists
+            result = np.empty(self.shape_proper[1], dtype=dtype)
+            result[:] = [arr[loc] for arr in self.arrays]
+        else:
+            result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype)
         if isinstance(dtype, ExtensionDtype):
             result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)
         return result

diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py
@@ -243,23 +243,32 @@ def test_setitem_mask_categorical(self, exp_multi_row):
         # category c is kept in .categories
         tm.assert_frame_equal(df, exp_fancy)
 
-    def test_loc_setitem_categorical_values_partial_column_slice(self):
+    def test_loc_setitem_categorical_values_partial_column_slice(
+        self, using_array_manager
+    ):
         # Assigning a Category to parts of a int/... column uses the values of
         # the Categorical
         df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
         exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
-        df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
-        df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
-        tm.assert_frame_equal(df, exp)
-
-    def test_loc_setitem_single_row_categorical(self):
+        if using_array_manager:
+            with pytest.raises(ValueError, match=""):
+                df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
+        else:
+            df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
+            df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
+            tm.assert_frame_equal(df, exp)
+
+    def test_loc_setitem_single_row_categorical(self, using_array_manager):
         # GH 25495
         df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
         categories = Categorical(df["Alpha"], categories=["a", "b", "c"])
         df.loc[:, "Alpha"] = categories
 
         result = df["Alpha"]
         expected = Series(categories, index=df.index, name="Alpha")
+        if using_array_manager:
+            # with ArrayManager the object dtype is preserved
+            expected = expected.astype(object)
         tm.assert_series_equal(result, expected)
 
     def test_loc_indexing_preserves_index_category_dtype(self):