From 28ec3a91dbb4efd74f311940d354fe06f8fbd061 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 23 Aug 2018 17:15:03 +0200
Subject: [PATCH 1/9] API: better error-handling for df.set_index

---
 doc/source/whatsnew/v0.24.0.txt       |  1 +
 pandas/core/frame.py                  | 47 ++++++++++++++++++---------
 pandas/tests/frame/test_alter_axes.py | 34 ++++++++++++-------
 3 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 3b61fde77cb9f..da6ed2d2c7adc 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -551,6 +551,7 @@ Other API Changes
 - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
 - :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
+- :meth:`DataFrame.set_index` now raises a ``TypeError`` for incorrect types, has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
 
 .. _whatsnew_0240.deprecations:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 721c31c57bc06..b4a72706871cb 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -60,6 +60,7 @@
     is_sequence,
     is_named_tuple)
 from pandas.core.dtypes.concat import _get_sliced_frame_result_type
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
 from pandas.core.dtypes.missing import isna, notna
 
 
@@ -3912,6 +3913,22 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         if not isinstance(keys, list):
             keys = [keys]
 
+        missing = []
+        for x in keys:
+            if not (is_scalar(x) or isinstance(x, tuple)):
+                if not isinstance(x, (ABCSeries, ABCIndexClass, ABCMultiIndex,
+                                      list, np.ndarray)):
+                    raise TypeError('keys may only contain a combination of '
+                                    'the following: valid column keys, '
+                                    'Series, Index, MultiIndex, list or '
+                                    'np.ndarray')
+            else:
+                if x not in self:
+                    missing.append(x)
+
+        if missing:
+            raise KeyError('{}'.format(missing))
+
         if inplace:
             frame = self
         else:
@@ -3921,7 +3938,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         names = []
         if append:
             names = [x for x in self.index.names]
-            if isinstance(self.index, MultiIndex):
+            if isinstance(self.index, ABCMultiIndex):
                 for i in range(self.index.nlevels):
                     arrays.append(self.index._get_level_values(i))
             else:
@@ -3929,29 +3946,26 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
         to_remove = []
         for col in keys:
-            if isinstance(col, MultiIndex):
-                # append all but the last column so we don't have to modify
-                # the end of this loop
-                for n in range(col.nlevels - 1):
+            if isinstance(col, ABCMultiIndex):
+                for n in range(col.nlevels):
                     arrays.append(col._get_level_values(n))
-
-                level = col._get_level_values(col.nlevels - 1)
                 names.extend(col.names)
-            elif isinstance(col, Series):
-                level = col._values
+            elif isinstance(col, ABCIndexClass):
+                # Index but not MultiIndex (treated above)
+                arrays.append(col)
                 names.append(col.name)
-            elif isinstance(col, Index):
-                level = col
+            elif isinstance(col, ABCSeries):
+                arrays.append(col._values)
                 names.append(col.name)
-            elif isinstance(col, (list, np.ndarray, Index)):
-                level = col
+            elif isinstance(col, (list, np.ndarray)):
+                arrays.append(col)
                 names.append(None)
+            # from here, col can only be a column label
             else:
-                level = frame[col]._values
+                arrays.append(frame[col]._values)
                 names.append(col)
                 if drop:
                     to_remove.append(col)
-            arrays.append(level)
 
         index = ensure_index_from_sequences(arrays, names)
 
@@ -3960,7 +3974,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             raise ValueError('Index has duplicate keys: {dup}'.format(
                 dup=duplicates))
 
-        for c in to_remove:
+        # use set to handle duplicate column names gracefully in case of drop
+        for c in set(to_remove):
             del frame[c]
 
         # clear up memory usage
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 4e61c9c62266d..8c635b50a7c25 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -186,18 +186,19 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
 
         # == gives ambiguous Boolean for Series
         if drop and keys[0] is 'A' and keys[1] is 'A':
-            with tm.assert_raises_regex(KeyError, '.*'):
-                df.set_index(keys, drop=drop, append=append)
+            # can't drop same column twice
+            first_drop = False
         else:
-            result = df.set_index(keys, drop=drop, append=append)
+            first_drop = drop
 
-            # to test against already-tested behavior, we add sequentially,
-            # hence second append always True; must wrap in list, otherwise
-            # list-box will be illegal
-            expected = df.set_index([keys[0]], drop=drop, append=append)
-            expected = expected.set_index([keys[1]], drop=drop, append=True)
+        # to test against already-tested behaviour, we add sequentially,
+        # hence second append always True; must wrap in list, otherwise
+        # list-box will be illegal
+        expected = df.set_index([keys[0]], drop=first_drop, append=append)
+        expected = expected.set_index([keys[1]], drop=drop, append=True)
 
-            tm.assert_frame_equal(result, expected)
+        result = df.set_index(keys, drop=drop, append=append)
+        tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize('append', [True, False])
     @pytest.mark.parametrize('drop', [True, False])
@@ -229,13 +230,24 @@ def test_set_index_verify_integrity(self, frame_of_index_cols):
     def test_set_index_raise(self, frame_of_index_cols, drop, append):
         df = frame_of_index_cols
 
-        with tm.assert_raises_regex(KeyError, '.*'):  # column names are A-E
+        with tm.assert_raises_regex(KeyError, "['foo', 'bar', 'baz']"):
+            # column names are A-E
             df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append)
 
         # non-existent key in list with arrays
-        with tm.assert_raises_regex(KeyError, '.*'):
+        with tm.assert_raises_regex(KeyError, 'X'):
             df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
 
+        rgx = 'keys may only contain a combination of the following:.*'
+        # forbidden type, e.g. set
+        with tm.assert_raises_regex(TypeError, rgx):
+            df.set_index(set(df['A']), drop=drop, append=append)
+
+        # forbidden type in list, e.g. set
+        with tm.assert_raises_regex(TypeError, rgx):
+            df.set_index(['A', df['A'], set(df['A'])],
+                         drop=drop, append=append)
+
     def test_construction_with_categorical_index(self):
         ci = tm.makeCategoricalIndex(10)
         ci.name = 'B'

From 8dd1453ad1f2b0a28d54c891ea3322c6d1fd1c34 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Wed, 19 Sep 2018 10:53:45 +0200
Subject: [PATCH 2/9] Review (jreback)

---
 pandas/core/frame.py                  |  8 ++++----
 pandas/tests/frame/test_alter_axes.py | 18 +++++++-----------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b4a72706871cb..05f4f3aed7d2b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3918,10 +3918,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             if not (is_scalar(x) or isinstance(x, tuple)):
                 if not isinstance(x, (ABCSeries, ABCIndexClass, ABCMultiIndex,
                                       list, np.ndarray)):
-                    raise TypeError('keys may only contain a combination of '
-                                    'the following: valid column keys, '
-                                    'Series, Index, MultiIndex, list or '
-                                    'np.ndarray')
+                    raise TypeError('The parameter "keys" may only contain a '
+                                    'combination of the following: valid '
+                                    'column keys, Series, Index, MultiIndex, '
+                                    'list or np.ndarray')
             else:
                 if x not in self:
                     missing.append(x)
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 8c635b50a7c25..b0451c28ef446 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -184,17 +184,13 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
 
         keys = [box1(df['A']), box2(df['A'])]
 
-        # == gives ambiguous Boolean for Series
-        if drop and keys[0] is 'A' and keys[1] is 'A':
-            # can't drop same column twice
-            first_drop = False
-        else:
-            first_drop = drop
-
         # to test against already-tested behaviour, we add sequentially,
-        # hence second append always True; must wrap in list, otherwise
-        # list-box will be illegal
-        expected = df.set_index([keys[0]], drop=first_drop, append=append)
+        # hence second append always True; must wrap keys in list, otherwise
+        # box = list would be illegal; need to adapt drop for case that both
+        # keys are 'A' -- can't drop the same column twice
+        expected = df.set_index([keys[0]], drop=(False if (keys[0] is 'A'
+                                                           and keys[1] is 'A')
+                                                 else drop), append=append)
         expected = expected.set_index([keys[1]], drop=drop, append=True)
 
         result = df.set_index(keys, drop=drop, append=append)
@@ -238,7 +234,7 @@ def test_set_index_raise(self, frame_of_index_cols, drop, append):
         with tm.assert_raises_regex(KeyError, 'X'):
             df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
 
-        rgx = 'keys may only contain a combination of the following:.*'
+        rgx = 'The parameter "keys" may only contain a combination of.*'
         # forbidden type, e.g. set
         with tm.assert_raises_regex(TypeError, rgx):
             df.set_index(set(df['A']), drop=drop, append=append)

From 18d3464e021a5d2613e183bc0c8823c736d03d80 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 23 Sep 2018 23:56:20 +0200
Subject: [PATCH 3/9] Review (jreback)

---
 pandas/tests/frame/test_alter_axes.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index b0451c28ef446..2f508f9725283 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -234,13 +234,13 @@ def test_set_index_raise(self, frame_of_index_cols, drop, append):
         with tm.assert_raises_regex(KeyError, 'X'):
             df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)
 
-        rgx = 'The parameter "keys" may only contain a combination of.*'
+        msg = 'The parameter "keys" may only contain a combination of.*'
         # forbidden type, e.g. set
-        with tm.assert_raises_regex(TypeError, rgx):
+        with tm.assert_raises_regex(TypeError, msg):
             df.set_index(set(df['A']), drop=drop, append=append)
 
         # forbidden type in list, e.g. set
-        with tm.assert_raises_regex(TypeError, rgx):
+        with tm.assert_raises_regex(TypeError, msg):
             df.set_index(['A', df['A'], set(df['A'])],
                          drop=drop, append=append)
 

From fa68660f8feed9d6d084c00b084fc8fe466a7e8d Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Wed, 26 Sep 2018 18:31:37 +0200
Subject: [PATCH 4/9] Allow list-likes (review jreback)

---
 doc/source/whatsnew/v0.24.0.txt       |  2 +-
 pandas/core/frame.py                  | 38 ++++++++++--------
 pandas/tests/frame/conftest.py        |  7 ++--
 pandas/tests/frame/test_alter_axes.py | 55 ++++++++++++++++++---------
 4 files changed, 63 insertions(+), 39 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index da6ed2d2c7adc..549c8c5d98360 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -551,7 +551,7 @@ Other API Changes
 - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
 - :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
-- :meth:`DataFrame.set_index` now raises a ``TypeError`` for incorrect types, has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
+- :meth:`DataFrame.set_index` now allows all one-dimensional list-likes, raises a ``TypeError`` for incorrect types, has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
 
 .. _whatsnew_0240.deprecations:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 05f4f3aed7d2b..13887c2c3b4f1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -60,7 +60,8 @@
     is_sequence,
     is_named_tuple)
 from pandas.core.dtypes.concat import _get_sliced_frame_result_type
-from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
+from pandas.core.dtypes.generic import (ABCSeries, ABCIndexClass,
+                                        ABCMultiIndex, ABCDataFrame)
 from pandas.core.dtypes.missing import isna, notna
 
 
@@ -3915,16 +3916,18 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
 
         missing = []
         for x in keys:
-            if not (is_scalar(x) or isinstance(x, tuple)):
-                if not isinstance(x, (ABCSeries, ABCIndexClass, ABCMultiIndex,
-                                      list, np.ndarray)):
-                    raise TypeError('The parameter "keys" may only contain a '
-                                    'combination of the following: valid '
-                                    'column keys, Series, Index, MultiIndex, '
-                                    'list or np.ndarray')
-            else:
-                if x not in self:
-                    missing.append(x)
+            if (is_scalar(x) or isinstance(x, tuple)) and x in self:
+                continue
+            elif is_scalar(x) and x not in self:
+                # a tuple gets tried as column key first;
+                # otherwise considered as a list-like; i.e. not missing
+                missing.append(x)
+            elif (not is_list_like(x) or isinstance(x, set)
+                  or isinstance(x, ABCDataFrame)
+                  or (isinstance(x, np.ndarray) and x.ndim > 1)):
+                raise TypeError('The parameter "keys" may only contain a '
+                                'combination of valid column keys and '
+                                'one-dimensional list-likes')
 
         if missing:
             raise KeyError('{}'.format(missing))
@@ -3950,16 +3953,19 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                 for n in range(col.nlevels):
                     arrays.append(col._get_level_values(n))
                 names.extend(col.names)
-            elif isinstance(col, ABCIndexClass):
-                # Index but not MultiIndex (treated above)
+            elif isinstance(col, (ABCIndexClass, ABCSeries)):
+                # if Index then not MultiIndex (treated above)
                 arrays.append(col)
                 names.append(col.name)
-            elif isinstance(col, ABCSeries):
-                arrays.append(col._values)
-                names.append(col.name)
             elif isinstance(col, (list, np.ndarray)):
                 arrays.append(col)
                 names.append(None)
+            elif (is_list_like(col)
+                  and not (isinstance(col, tuple) and col in self)):
+                # all other list-likes (but avoid valid column keys)
+                col = list(col)  # ensure iterator do not get read twice etc.
+                arrays.append(col)
+                names.append(None)
             # from here, col can only be a column label
             else:
                 arrays.append(frame[col]._values)
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
index fdedb93835d75..4e8fa849b9ccb 100644
--- a/pandas/tests/frame/conftest.py
+++ b/pandas/tests/frame/conftest.py
@@ -180,12 +180,13 @@ def frame_of_index_cols():
     """
     Fixture for DataFrame of columns that can be used for indexing
 
-    Columns are ['A', 'B', 'C', 'D', 'E']; 'A' & 'B' contain duplicates (but
-    are jointly unique), the rest are unique.
+    Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
+    'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
     """
     df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
                     'B': ['one', 'two', 'three', 'one', 'two'],
                     'C': ['a', 'b', 'c', 'd', 'e'],
                     'D': np.random.randn(5),
-                    'E': np.random.randn(5)})
+                    'E': np.random.randn(5),
+                    ('tuple', 'as', 'label'): np.random.randn(5)})
     return df
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 2f508f9725283..526b9e181dac2 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -49,7 +49,8 @@ def test_set_index_cast(self):
         tm.assert_frame_equal(df, df2)
 
     # A has duplicate values, C does not
-    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']])
+    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
+                                      ('tuple', 'as', 'label')])
     @pytest.mark.parametrize('inplace', [True, False])
     @pytest.mark.parametrize('drop', [True, False])
     def test_set_index_drop_inplace(self, frame_of_index_cols,
@@ -72,7 +73,8 @@ def test_set_index_drop_inplace(self, frame_of_index_cols,
         tm.assert_frame_equal(result, expected)
 
     # A has duplicate values, C does not
-    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']])
+    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
+                                      ('tuple', 'as', 'label')])
     @pytest.mark.parametrize('drop', [True, False])
     def test_set_index_append(self, frame_of_index_cols, drop, keys):
         df = frame_of_index_cols
@@ -88,7 +90,8 @@ def test_set_index_append(self, frame_of_index_cols, drop, keys):
         tm.assert_frame_equal(result, expected)
 
     # A has duplicate values, C does not
-    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']])
+    @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B'],
+                                      ('tuple', 'as', 'label')])
     @pytest.mark.parametrize('drop', [True, False])
     def test_set_index_append_to_multiindex(self, frame_of_index_cols,
                                             drop, keys):
@@ -114,8 +117,10 @@ def test_set_index_after_mutation(self):
         tm.assert_frame_equal(result, expected)
 
     # MultiIndex constructor does not work directly on Series -> lambda
+    # Add list-of-list constructor because list is ambiguous -> lambda
     # also test index name if append=True (name is duplicate here for B)
     @pytest.mark.parametrize('box', [Series, Index, np.array,
+                                     list, tuple, iter, lambda x: [list(x)],
                                      lambda x: MultiIndex.from_arrays([x])])
     @pytest.mark.parametrize('append, index_name', [(True, None),
                              (True, 'B'), (True, 'test'), (False, None)])
@@ -126,21 +131,29 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
         df.index.name = index_name
 
         key = box(df['B'])
-        # np.array and list "forget" the name of B
-        name = [None if box in [np.array, list] else 'B']
+        if box == list:
+            # list of strings gets interpreted as list of keys
+            msg = "['one', 'two', 'three', 'one', 'two']"
+            with tm.assert_raises_regex(KeyError, msg):
+                df.set_index(key, drop=drop, append=append)
+        else:
+            # np.array/tuple/iter/list-of-list "forget" the name of B
+            name_mi = getattr(key, 'names', None)
+            name = [getattr(key, 'name', None)] if name_mi is None else name_mi
 
-        result = df.set_index(key, drop=drop, append=append)
+            result = df.set_index(key, drop=drop, append=append)
 
-        # only valid column keys are dropped
-        # since B is always passed as array above, nothing is dropped
-        expected = df.set_index(['B'], drop=False, append=append)
-        expected.index.names = [index_name] + name if append else name
+            # only valid column keys are dropped
+            # since B is always passed as array above, nothing is dropped
+            expected = df.set_index(['B'], drop=False, append=append)
+            expected.index.names = [index_name] + name if append else name
 
-        tm.assert_frame_equal(result, expected)
+            tm.assert_frame_equal(result, expected)
 
     # MultiIndex constructor does not work directly on Series -> lambda
     # also test index name if append=True (name is duplicate here for A & B)
-    @pytest.mark.parametrize('box', [Series, Index, np.array, list,
+    @pytest.mark.parametrize('box', [Series, Index, np.array,
+                                     list, tuple, iter,
                                      lambda x: MultiIndex.from_arrays([x])])
     @pytest.mark.parametrize('append, index_name',
                              [(True, None), (True, 'A'), (True, 'B'),
@@ -152,8 +165,8 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
         df.index.name = index_name
 
         keys = ['A', box(df['B'])]
-        # np.array and list "forget" the name of B
-        names = ['A', None if box in [np.array, list] else 'B']
+        # np.array/list/tuple/iter "forget" the name of B
+        names = ['A', None if box in [np.array, list, tuple, iter] else 'B']
 
         result = df.set_index(keys, drop=drop, append=append)
 
@@ -168,10 +181,12 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
     # MultiIndex constructor does not work directly on Series -> lambda
     # We also emulate a "constructor" for the label -> lambda
     # also test index name if append=True (name is duplicate here for A)
-    @pytest.mark.parametrize('box2', [Series, Index, np.array, list,
+    @pytest.mark.parametrize('box2', [Series, Index, np.array,
+                                      list, tuple, iter,
                                       lambda x: MultiIndex.from_arrays([x]),
                                       lambda x: x.name])
-    @pytest.mark.parametrize('box1', [Series, Index, np.array, list,
+    @pytest.mark.parametrize('box1', [Series, Index, np.array,
+                                      list, tuple, iter,
                                       lambda x: MultiIndex.from_arrays([x]),
                                       lambda x: x.name])
     @pytest.mark.parametrize('append, index_name', [(True, None),
@@ -182,6 +197,10 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         df = frame_of_index_cols
         df.index.name = index_name
 
+        keys = [box1(df['A']), box2(df['A'])]
+        result = df.set_index(keys, drop=drop, append=append)
+
+        # if either box was iter, the content has been consumed; re-read it
         keys = [box1(df['A']), box2(df['A'])]
 
         # to test against already-tested behaviour, we add sequentially,
@@ -192,8 +211,6 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
                                                            and keys[1] is 'A')
                                                  else drop), append=append)
         expected = expected.set_index([keys[1]], drop=drop, append=True)
-
-        result = df.set_index(keys, drop=drop, append=append)
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize('append', [True, False])
@@ -227,7 +244,7 @@ def test_set_index_raise(self, frame_of_index_cols, drop, append):
         df = frame_of_index_cols
 
         with tm.assert_raises_regex(KeyError, "['foo', 'bar', 'baz']"):
-            # column names are A-E
+            # column names are A-E, as well as one tuple
             df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append)
 
         # non-existent key in list with arrays

From 3a6469dd9e305802bf1b015c27ad0ae57763efa7 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Wed, 3 Oct 2018 17:56:41 +0200
Subject: [PATCH 5/9] Review (jreback)

---
 pandas/core/frame.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bf959390c6e71..36c36929d4a1a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -60,8 +60,7 @@
     is_sequence,
     is_named_tuple)
 from pandas.core.dtypes.concat import _get_sliced_frame_result_type
-from pandas.core.dtypes.generic import (ABCSeries, ABCIndexClass,
-                                        ABCMultiIndex, ABCDataFrame)
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
 from pandas.core.dtypes.missing import isna, notna
 
 
@@ -3955,16 +3954,17 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             keys = [keys]
 
         missing = []
-        for x in keys:
-            if (is_scalar(x) or isinstance(x, tuple)) and x in self:
+        for col in keys:
+            if (is_scalar(col) or isinstance(col, tuple)) and col in self:
+                # tuples can be both column keys or list-likes
+                # if they are valid column keys, everything is fine
                 continue
-            elif is_scalar(x) and x not in self:
-                # a tuple gets tried as column key first;
-                # otherwise considered as a list-like; i.e. not missing
-                missing.append(x)
-            elif (not is_list_like(x) or isinstance(x, set)
-                  or isinstance(x, ABCDataFrame)
-                  or (isinstance(x, np.ndarray) and x.ndim > 1)):
+            elif is_scalar(col) and col not in self:
+                # tuples that are not column keys are considered list-like,
+                # not considered missing
+                missing.append(col)
+            elif (not is_list_like(col) or isinstance(col, set)
+                  or getattr(col, 'ndim', 1) > 1):
                 raise TypeError('The parameter "keys" may only contain a '
                                 'combination of valid column keys and '
                                 'one-dimensional list-likes')

From dd8d0004fd5154619331894c982fbc3a145c70aa Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 5 Oct 2018 18:44:37 +0200
Subject: [PATCH 6/9] Review (jreback)

---
 pandas/tests/frame/test_alter_axes.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 526b9e181dac2..1b505b965dd65 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -203,13 +203,14 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         # if either box was iter, the content has been consumed; re-read it
         keys = [box1(df['A']), box2(df['A'])]
 
+        # need to adapt first drop for case that both keys are 'A' --
+        # can't drop the same column twice
+        first_drop = False if (keys[0] is 'A' and keys[1] is 'A') else drop
+
         # to test against already-tested behaviour, we add sequentially,
         # hence second append always True; must wrap keys in list, otherwise
-        # box = list would be illegal; need to adapt drop for case that both
-        # keys are 'A' -- can't drop the same column twice
-        expected = df.set_index([keys[0]], drop=(False if (keys[0] is 'A'
-                                                           and keys[1] is 'A')
-                                                 else drop), append=append)
+        # box = list would be illegal
+        expected = df.set_index([keys[0]], drop=first_drop, append=append)
         expected = expected.set_index([keys[1]], drop=drop, append=True)
         tm.assert_frame_equal(result, expected)
 

From 47c4e74788c7194fa9864df3876149e7773314cc Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 8 Oct 2018 18:44:02 +0200
Subject: [PATCH 7/9] Improve comment

---
 pandas/tests/frame/test_alter_axes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 1b505b965dd65..fdeae61f96b93 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -204,7 +204,8 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
         keys = [box1(df['A']), box2(df['A'])]
 
         # need to adapt first drop for case that both keys are 'A' --
-        # can't drop the same column twice
+        # cannot drop the same column twice;
+        # use "is" because == would give ambiguous Boolean error for containers
         first_drop = False if (keys[0] is 'A' and keys[1] is 'A') else drop
 
         # to test against already-tested behaviour, we add sequentially,

From 9da6e6c9ce55d0c31f68e1e907aa18b0009a8222 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 18 Oct 2018 08:25:32 +0200
Subject: [PATCH 8/9] Retrigger Circle


From 42d5f2a0646c2d5b4bd40647699b489a12713267 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 18 Oct 2018 18:17:35 +0200
Subject: [PATCH 9/9] Incorporate allow_sets-kwarg for is_list_like

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a28d3d3598551..c10d78ce55d0c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3991,7 +3991,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                 # tuples that are not column keys are considered list-like,
                 # not considered missing
                 missing.append(col)
-            elif (not is_list_like(col) or isinstance(col, set)
+            elif (not is_list_like(col, allow_sets=False)
                   or getattr(col, 'ndim', 1) > 1):
                 raise TypeError('The parameter "keys" may only contain a '
                                 'combination of valid column keys and '