diff --git a/databricks/koalas/indexing.py b/databricks/koalas/indexing.py index 90e4fdf6e..4a204071c 100644 --- a/databricks/koalas/indexing.py +++ b/databricks/koalas/indexing.py @@ -1696,6 +1696,25 @@ def _select_cols_else( ) def __setitem__(self, key, value): + if is_list_like(value) and not isinstance(value, spark.Column): + iloc_item = self[key] + if not is_list_like(key) or not is_list_like(iloc_item): + raise ValueError("setting an array element with a sequence.") + else: + shape_iloc_item = iloc_item.shape + len_iloc_item = shape_iloc_item[0] + len_value = len(value) + if len_iloc_item != len_value: + if self._is_series: + raise ValueError( + "cannot set using a list-like indexer with a different length than " + "the value" + ) + else: + raise ValueError( + "shape mismatch: value array of shape ({},) could not be broadcast " + "to indexing result of shape {}".format(len_value, shape_iloc_item) + ) super().__setitem__(key, value) # Update again with resolved_copy to drop extra columns. self._kdf._update_internal_frame( diff --git a/databricks/koalas/tests/test_indexing.py b/databricks/koalas/tests/test_indexing.py index 4353e7faa..b243baa81 100644 --- a/databricks/koalas/tests/test_indexing.py +++ b/databricks/koalas/tests/test_indexing.py @@ -1088,7 +1088,7 @@ def test_frame_iloc_setitem(self): kdf.iloc[0, 1] = 50 self.assert_eq(kdf, pdf) - with self.assertRaisesRegex(ValueError, "Incompatible indexer with Series"): + with self.assertRaisesRegex(ValueError, "setting an array element with a sequence."): kdf.iloc[0, 0] = -kdf.max_speed with self.assertRaisesRegex(ValueError, "shape mismatch"): kdf.iloc[:, [1, 0]] = -kdf.max_speed @@ -1226,14 +1226,13 @@ def test_series_iloc_setitem(self): self.assert_eq(kser, pser) self.assert_eq(kdf, pdf) - # TODO: matching the behavior with pandas 1.2 and uncomment below test. - # with self.assertRaisesRegex( - # ValueError, - # "cannot set using a list-like indexer with a different length than the value", - # ): - # kser.iloc[[1]] = -kdf.b + with self.assertRaisesRegex( + ValueError, + "cannot set using a list-like indexer with a different length than the value", + ): + kser.iloc[[1]] = -kdf.b - with self.assertRaisesRegex(ValueError, "Incompatible indexer with DataFrame"): + with self.assertRaisesRegex(ValueError, "setting an array element with a sequence."): kser.iloc[1] = kdf[["b"]] def test_iloc_raises(self): diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index e6a11caff..34c27df2c 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1149,25 +1149,17 @@ def test_frame_iloc_setitem(self): pdf.iloc[[0, 1, 2], 1] = -pdf.max_speed self.assert_eq(kdf, pdf) - # TODO: matching the behavior with pandas 1.2 and uncomment below test - # with self.assertRaisesRegex( - # ValueError, - # "shape mismatch: value array of shape (3,) could not be broadcast to indexing " - # "result of shape (2,1)", - # ): - # kdf.iloc[[1, 2], [1]] = -another_kdf.max_speed + with self.assertRaisesRegex( + ValueError, "shape mismatch", + ): + kdf.iloc[[1, 2], [1]] = -another_kdf.max_speed kdf.iloc[[0, 1, 2], 1] = 10 * another_kdf.max_speed pdf.iloc[[0, 1, 2], 1] = 10 * pdf.max_speed self.assert_eq(kdf, pdf) - # TODO: matching the behavior with pandas 1.2 and uncomment below test - # with self.assertRaisesRegex( - # ValueError, - # "shape mismatch: value array of shape (3,) could not be broadcast to indexing " - # "result of shape (1,)", - # ): - # kdf.iloc[[0], 1] = 10 * another_kdf.max_speed + with self.assertRaisesRegex(ValueError, "shape mismatch"): + kdf.iloc[[0], 1] = 10 * another_kdf.max_speed def test_series_loc_setitem(self): pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"]) @@ -1267,12 +1259,11 @@ def test_series_iloc_setitem(self): self.assert_eq(kdf, pdf) self.assert_eq(ksery, psery) - # TODO: matching the behavior with pandas 1.2 and uncomment below test. - # with self.assertRaisesRegex( - # ValueError, - # "cannot set using a list-like indexer with a different length than the value", - # ): - # kser.iloc[[1, 2]] = -kser_another + with self.assertRaisesRegex( + ValueError, + "cannot set using a list-like indexer with a different length than the value", + ): + kser.iloc[[1, 2]] = -kser_another kser.iloc[[0, 1, 2]] = 10 * kser_another pser.iloc[[0, 1, 2]] = 10 * pser_another @@ -1280,11 +1271,11 @@ def test_series_iloc_setitem(self): self.assert_eq(kdf, pdf) self.assert_eq(ksery, psery) - # with self.assertRaisesRegex( - # ValueError, - # "cannot set using a list-like indexer with a different length than the value", - # ): - # kser.iloc[[0]] = 10 * kser_another + with self.assertRaisesRegex( + ValueError, + "cannot set using a list-like indexer with a different length than the value", + ): + kser.iloc[[0]] = 10 * kser_another kser1.iloc[[0, 1, 2]] = -kser_another pser1.iloc[[0, 1, 2]] = -pser_another @@ -1292,11 +1283,11 @@ def test_series_iloc_setitem(self): self.assert_eq(kdf, pdf) self.assert_eq(ksery, psery) - # with self.assertRaisesRegex( - # ValueError, - # "cannot set using a list-like indexer with a different length than the value", - # ): - # kser1.iloc[[1, 2]] = -kser_another + with self.assertRaisesRegex( + ValueError, + "cannot set using a list-like indexer with a different length than the value", + ): + kser1.iloc[[1, 2]] = -kser_another pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"]) kdf = ks.from_pandas(pdf) @@ -1315,12 +1306,11 @@ def test_series_iloc_setitem(self): self.assert_eq(kdf, pdf) self.assert_eq(ksery, psery) - # TODO: matching the behavior with pandas 1.2 and uncomment below test. - # with self.assertRaisesRegex( - # ValueError, - # "cannot set using a list-like indexer with a different length than the value", - # ): - # kiloc[[1, 2]] = -kser_another + with self.assertRaisesRegex( + ValueError, + "cannot set using a list-like indexer with a different length than the value", + ): + kiloc[[1, 2]] = -kser_another kiloc[[0, 1, 2]] = 10 * kser_another piloc[[0, 1, 2]] = 10 * pser_another @@ -1328,11 +1318,11 @@ def test_series_iloc_setitem(self): self.assert_eq(kdf, pdf) self.assert_eq(ksery, psery) - # with self.assertRaisesRegex( - # ValueError, - # "cannot set using a list-like indexer with a different length than the value", - # ): - # kiloc[[0]] = 10 * kser_another + with self.assertRaisesRegex( + ValueError, + "cannot set using a list-like indexer with a different length than the value", + ): + kiloc[[0]] = 10 * kser_another def test_update(self): pdf = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]}) @@ -1861,7 +1851,7 @@ def test_frame_iloc_setitem(self): another_kdf = ks.DataFrame(pdf) with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): - kdf.iloc[[1, 2], [1]] = another_kdf.max_speed + kdf.iloc[[1, 2], [1]] = another_kdf.max_speed.iloc[[1, 2]] def test_series_loc_setitem(self): pser = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"]) @@ -1887,7 +1877,7 @@ def test_series_iloc_setitem(self): kser_another = ks.from_pandas(pser_another) with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"): - kser.iloc[[1]] = -kser_another + kser.iloc[[1]] = -kser_another.iloc[[1]] def test_where(self): pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]}) diff --git a/requirements-dev.txt b/requirements-dev.txt index 7cd2605d9..98ccfba3e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ # Dependencies in Koalas. When you update don't forget to update setup.py and install.rst in docs. -pandas>=0.23.2,<1.2.0 +pandas>=0.23.2 pyarrow>=0.10 numpy>=1.14,<1.20.0 diff --git a/setup.py b/setup.py index b8dd3b860..b9e65eeb0 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,7 @@ }, python_requires='>=3.5,<3.9', install_requires=[ - 'pandas>=0.23.2,<1.2.0', + 'pandas>=0.23.2', 'pyarrow>=0.10', 'numpy>=1.14,<1.20.0', ],