From c9b091f64c8a1c092131bca49c91fe4186c8ae42 Mon Sep 17 00:00:00 2001
From: William Wagner <wcw13@my.fsu.edu>
Date: Wed, 17 Aug 2016 22:09:50 -0400
Subject: [PATCH 1/2] BUG: Categoricals shouldn't allow non-strings when object
 dtype is passed (#13919)

---
 doc/source/whatsnew/v0.19.0.txt  |  1 +
 pandas/core/categorical.py       | 17 ++++++++++++++++-
 pandas/tests/test_categorical.py | 26 ++++++++++++++++++++++++--
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index cc3cc631b9575..335c8a0f3ed37 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -1075,3 +1075,4 @@ Bug Fixes
 - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
 - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
 - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment.
+- Bug in ``Categorical`` would allow creation when ``object`` dtype was passed in with  categories not containing either all non-string or all non-period values
\ No newline at end of file
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 6ea0a5e96672d..7cd2f2f5dfa4e 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -20,7 +20,8 @@
                                  is_categorical_dtype,
                                  is_integer_dtype, is_bool,
                                  is_list_like, is_sequence,
-                                 is_scalar)
+                                 is_scalar,
+                                 is_object_dtype)
 from pandas.core.common import is_null_slice
 
 from pandas.core.algorithms import factorize, take_1d
@@ -191,6 +192,8 @@ class Categorical(PandasObject):
         If an explicit ``ordered=True`` is given but no `categories` and the
         `values` are not sortable.
 
+        If an `object` dtype is passed and `values` contains dtypes other
+        than all strings or all periods.
 
     Examples
     --------
@@ -324,6 +327,18 @@ def __init__(self, values, categories=None, ordered=False,
                      "mean to use\n'Categorical.from_codes(codes, "
                      "categories)'?", RuntimeWarning, stacklevel=2)
 
+        # TODO: disallow period when they stop being handled as object dtype
+        # categoricals w/ object dtype shouldn't allow non-strings
+        if is_object_dtype(categories) and len(categories) > 0:
+            from pandas.lib import infer_dtype
+            mask = notnull(categories)
+            if infer_dtype(categories[mask]) not in ['period',
+                                                     'unicode',
+                                                     'string']:
+                raise TypeError(
+                    "Categoricals cannot be object dtype unless"
+                    " all values are strings or all are periods.")
+
         self.set_ordered(ordered or False, inplace=True)
         self._categories = categories
         self._codes = _coerce_indexer_dtype(codes, categories)
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index b630e0914259e..de880afc5cfc9 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -94,13 +94,35 @@ def test_constructor_unsortable(self):
 
         # it works!
         arr = np.array([1, 2, 3, datetime.now()], dtype='O')
-        factor = Categorical.from_array(arr, ordered=False)
-        self.assertFalse(factor.ordered)
+        msg = "Categoricals cannot be object dtype unless all values are " \
+              "strings or all are periods."
+        with tm.assertRaisesRegexp(TypeError, msg):
+            factor = Categorical.from_array(arr, ordered=False)
 
         # this however will raise as cannot be sorted
         self.assertRaises(
             TypeError, lambda: Categorical.from_array(arr, ordered=True))
 
+    def test_constructor_object_dtype(self):
+        #GH 13919
+
+        #categories must be of single dtype
+        arr = np.array([1, 2, 3, 's'], dtype=object)
+        msg = "Categoricals cannot be object dtype unless all values are " \
+              "strings or all are periods."
+        with tm.assertRaisesRegexp(TypeError, msg):
+            c = Categorical.from_array(arr)
+
+        # object dtype allowed when all strs
+        exp_arr = np.array(list('abcd'), dtype=object)
+        c = Categorical.from_array(exp_arr)
+        tm.assert_numpy_array_equal(c.__array__(), exp_arr)
+
+        # object dtype also allowed when all periods
+        idx = pd.period_range('1/1/2000', freq='D', periods=5)
+        c = Categorical(idx)
+        tm.assert_index_equal(c.categories, idx)
+
     def test_is_equal_dtype(self):
 
         # test dtype comparisons between cats

From 2730600272f9bb10ebeeb0e0aef98e5a2a40d3d9 Mon Sep 17 00:00:00 2001
From: William Wagner <wcw13@my.fsu.edu>
Date: Fri, 19 Aug 2016 09:13:20 -0400
Subject: [PATCH 2/2] Fixed typo in whatsnew entry

---
 doc/source/whatsnew/v0.19.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 335c8a0f3ed37..6b3ea9ecc3866 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -1075,4 +1075,4 @@ Bug Fixes
 - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
 - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
 - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment.
-- Bug in ``Categorical`` would allow creation when ``object`` dtype was passed in with  categories not containing either all non-string or all non-period values
\ No newline at end of file
+- Bug in ``Categorical`` would allow creation when ``object`` dtype was passed in with  categories not containing either all string or all period values
\ No newline at end of file