From c2f490e283c1d6d1b4ebe5cfd829edaa5a572c45 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Wed, 16 Jul 2014 20:32:11 +0200
Subject: [PATCH 01/10] Categorical: preserve ints when NaN are present

`Categorical([1, np.nan])` would end up with a single `1.` float level.
This commit ensures that if `values` is a list of ints and contains np.nan,
the float conversation does not take place.
---
 pandas/core/categorical.py       | 20 ++++++++++++++-
 pandas/tests/test_categorical.py | 44 ++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index c9674aea4a715..1f367d7a88d5d 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from warnings import warn
+import types
 
 from pandas import compat
 from pandas.compat import u
@@ -208,8 +209,25 @@ def __init__(self, values, levels=None, ordered=None, name=None, fastpath=False,
             # under certain versions of numpy as well
             inferred = com._possibly_infer_to_datetimelike(values)
             if not isinstance(inferred, np.ndarray):
+
+                # Input sanitation...
+                if com._is_sequence(values) or isinstance(values, types.GeneratorType):
+                    # isnull doesn't work with generators/xrange, so convert all to lists
+                    # TODO: prevent allocation of two times the array/list be converting directly
+                    values = list(values)
+                elif np.isscalar(values):
+                    values = [values]
+
                 from pandas.core.series import _sanitize_array
-                values = _sanitize_array(values, None)
+                # On list with NaNs, int values will be converted to float. Use "object" dtype
+                # to prevent this. In the end objects will be casted to int/... in the level
+                # assignment step.
+                # tuple are list_like but com.isnull(<tuple>) will return a single bool,
+                # which then raises an AttributeError: 'bool' object has no attribute 'any'
+                has_null = (com.is_list_like(values) and not isinstance(values, tuple)
+                            and com.isnull(values).any())
+                dtype = 'object' if has_null else None
+                values = _sanitize_array(values, None, dtype=dtype)
 
         if levels is None:
             try:
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 421e05f5a3bc7..cf6f0bd38a02b 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -111,6 +111,50 @@ def test_constructor(self):
         cat = pd.Categorical([1,2,3,np.nan], levels=[1,2,3])
         self.assertTrue(com.is_integer_dtype(cat.levels))
 
+        # https://github.com/pydata/pandas/issues/3678
+        cat = pd.Categorical([np.nan,1, 2, 3])
+        self.assertTrue(com.is_integer_dtype(cat.levels))
+
+        # this should result in floats
+        cat = pd.Categorical([np.nan, 1, 2., 3 ])
+        self.assertTrue(com.is_float_dtype(cat.levels))
+
+        cat = pd.Categorical([np.nan, 1., 2., 3. ])
+        self.assertTrue(com.is_float_dtype(cat.levels))
+
+        # corner cases
+        cat = pd.Categorical([1])
+        self.assertTrue(len(cat.levels) == 1)
+        self.assertTrue(cat.levels[0] == 1)
+        self.assertTrue(len(cat.codes) == 1)
+        self.assertTrue(cat.codes[0] == 0)
+
+        cat = pd.Categorical(["a"])
+        self.assertTrue(len(cat.levels) == 1)
+        self.assertTrue(cat.levels[0] == "a")
+        self.assertTrue(len(cat.codes) == 1)
+        self.assertTrue(cat.codes[0] == 0)
+
+        # Scalars should be converted to lists
+        cat = pd.Categorical(1)
+        self.assertTrue(len(cat.levels) == 1)
+        self.assertTrue(cat.levels[0] == 1)
+        self.assertTrue(len(cat.codes) == 1)
+        self.assertTrue(cat.codes[0] == 0)
+
+
+    def test_constructor_with_generator(self):
+        # This was raising an Error in isnull(single_val).any() because isnull returned a scalar
+        # for a generator
+
+        a = (a for x in [1,2])
+        cat = Categorical(a)
+
+        # This does actually a xrange, which is a sequence instead of a generator
+        from pandas.core.index import MultiIndex
+        MultiIndex.from_product([range(5), ['a', 'b', 'c']])
+
+
     def test_from_codes(self):
 
         # too few levels

From 90a81dfda1f6616655d9b4aba0e8e2aea1ab317f Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Wed, 16 Jul 2014 22:05:38 +0200
Subject: [PATCH 02/10] Categorical: fix describe with np.nan

---
 pandas/core/categorical.py       | 21 +++++++++++++++++---
 pandas/tests/test_categorical.py | 33 ++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 1f367d7a88d5d..bfa634fd794d6 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -934,14 +934,29 @@ def describe(self):
             'values' : self._codes }
                            ).groupby('codes').count()
 
-        counts.index = self.levels.take(counts.index)
-        counts = counts.reindex(self.levels)
         freqs = counts / float(counts.sum())
 
         from pandas.tools.merge import concat
         result = concat([counts,freqs],axis=1)
-        result.index.name = 'levels'
         result.columns = ['counts','freqs']
+
+        # fill in the real levels
+        check = result.index == -1
+        if check.any():
+            # Sort -1 (=NaN) to the last position
+            index = np.arange(0, len(self.levels)+1)
+            index[-1] = -1
+            result = result.reindex(index)
+            # build new index
+            levels = np.arange(0,len(self.levels)+1 ,dtype=object)
+            levels[:-1] = self.levels
+            levels[-1] = np.nan
+            result.index = levels.take(result.index)
+        else:
+            result.index = self.levels.take(result.index)
+            result = result.reindex(self.levels)
+        result.index.name = 'levels'
+
         return result
 
 ##### utility routines #####
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index cf6f0bd38a02b..faeb842e5f352 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -249,6 +249,16 @@ def test_describe(self):
                                             ).set_index('levels')
         tm.assert_frame_equal(desc, expected)
 
+        # check unused levels
+        cat = self.factor.copy()
+        cat.levels = ["a","b","c","d"]
+        desc = cat.describe()
+        expected = DataFrame.from_dict(dict(counts=[3, 2, 3, np.nan],
+                                            freqs=[3/8., 2/8., 3/8., np.nan],
+                                            levels=['a', 'b', 'c', 'd'])
+                                            ).set_index('levels')
+        tm.assert_frame_equal(desc, expected)
+
         # check an integer one
         desc = Categorical([1,2,3,1,2,3,3,2,1,1,1]).describe()
         expected = DataFrame.from_dict(dict(counts=[5, 3, 3],
@@ -258,6 +268,29 @@ def test_describe(self):
                                             ).set_index('levels')
         tm.assert_frame_equal(desc, expected)
 
+        # https://github.com/pydata/pandas/issues/3678
+        # describe should work with NaN
+        cat = pd.Categorical([np.nan,1, 2, 2])
+        desc = cat.describe()
+        expected = DataFrame.from_dict(dict(counts=[1, 2, 1],
+                                            freqs=[1/4., 2/4., 1/4.],
+                                            levels=[1,2,np.nan]
+                                            )
+                                            ).set_index('levels')
+        tm.assert_frame_equal(desc, expected)
+
+        # having NaN as level and as "not available" should also print two NaNs in describe!
+        cat = pd.Categorical([np.nan,1, 2, 2])
+        cat.levels = [1,2,np.nan]
+        desc = cat.describe()
+        expected = DataFrame.from_dict(dict(counts=[1, 2, np.nan, 1],
+                                            freqs=[1/4., 2/4., np.nan, 1/4.],
+                                            levels=[1,2,np.nan,np.nan]
+                                            )
+                                            ).set_index('levels')
+        tm.assert_frame_equal(desc, expected)
+
+
     def test_print(self):
         expected = [" a", " b", " b", " a", " a", " c", " c", " c",
                     "Levels (3, object): [a < b < c]"]

From f4bf9ee5399661ff2aedd8b2531dd16ba92141a9 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Wed, 16 Jul 2014 22:28:55 +0200
Subject: [PATCH 03/10] Categorical: ensure that one can assign np.nan

---
 pandas/core/categorical.py       |  5 +++--
 pandas/tests/test_categorical.py | 13 +++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index bfa634fd794d6..2bb8ac23c0100 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -7,7 +7,7 @@
 from pandas import compat
 from pandas.compat import u
 
-from pandas.core.algorithms import factorize, unique
+from pandas.core.algorithms import factorize
 from pandas.core.base import PandasObject
 from pandas.core.index import Index, _ensure_index
 from pandas.core.indexing import _is_null_slice
@@ -778,7 +778,8 @@ def __setitem__(self, key, value):
 
         rvalue = value if com.is_list_like(value) else [value]
         to_add = Index(rvalue)-self.levels
-        if len(to_add):
+        # no assignments of values not in levels, but it's always ok to set something to np.nan
+        if len(to_add) and not com.isnull(to_add).all():
             raise ValueError("cannot setitem on a Categorical with a new level,"
                              " set the levels first")
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index faeb842e5f352..317fa158977eb 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -573,6 +573,13 @@ def test_slicing_directly(self):
         self.assert_numpy_array_equal(sliced._codes, expected._codes)
         tm.assert_index_equal(sliced.levels, expected.levels)
 
+    def test_set_item_nan(self):
+        cat = pd.Categorical([1,2,3])
+        exp = pd.Categorical([1,np.nan,3], levels=[1,2,3])
+        cat[1] = np.nan
+        self.assertTrue(cat.equals(exp))
+
+
 class TestCategoricalAsBlock(tm.TestCase):
     _multiprocess_can_split_ = True
 
@@ -1550,6 +1557,12 @@ def f():
         df.loc[2:3,"b"] = pd.Categorical(["b","b"], levels=["a","b"])
         tm.assert_frame_equal(df, exp)
 
+        # ensure that one can set something to np.nan
+        s = Series(Categorical([1,2,3]))
+        exp = Series(Categorical([1,np.nan,3]))
+        s[1] = np.nan
+        tm.assert_series_equal(s, exp)
+
 
     def test_concat(self):
         cat = pd.Categorical(["a","b"], levels=["a","b"])

From 130b61e4f5ed0342562b32fa089f1857c48d5af8 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Wed, 23 Jul 2014 21:55:16 +0200
Subject: [PATCH 04/10] Categorical: fix assigning NaN if NaN in levels

---
 pandas/core/categorical.py       |  7 +++++++
 pandas/tests/test_categorical.py | 31 +++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 2bb8ac23c0100..43a6a80b75e47 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -805,6 +805,13 @@ def __setitem__(self, key, value):
             key = self._codes[key]
 
         lindexer = self.levels.get_indexer(rvalue)
+
+        # float levels do currently return -1 for np.nan, even if np.nan is included in the index
+        # "repair" this here
+        if com.isnull(rvalue).any() and com.isnull(self.levels).any():
+            nan_pos = np.where(com.isnull(self.levels))
+            lindexer[lindexer == -1] = nan_pos
+
         self._codes[key] = lindexer
 
     #### reduction ops ####
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 317fa158977eb..29c722bfe8660 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -579,6 +579,37 @@ def test_set_item_nan(self):
         cat[1] = np.nan
         self.assertTrue(cat.equals(exp))
 
+        # if nan in levels, the proper code should be set!
+        cat = pd.Categorical([1,2,3, np.nan], levels=[1,2,3])
+        cat.levels = [1,2,3, np.nan]
+        cat[1] = np.nan
+        exp = np.array([0,3,2,-1])
+        self.assert_numpy_array_equal(cat.codes, exp)
+
+        cat = pd.Categorical([1,2,3, np.nan], levels=[1,2,3])
+        cat.levels = [1,2,3, np.nan]
+        cat[1:3] = np.nan
+        exp = np.array([0,3,3,-1])
+        self.assert_numpy_array_equal(cat.codes, exp)
+
+        cat = pd.Categorical([1,2,3, np.nan], levels=[1,2,3])
+        cat.levels = [1,2,3, np.nan]
+        cat[1:3] = [np.nan, 1]
+        exp = np.array([0,3,0,-1])
+        self.assert_numpy_array_equal(cat.codes, exp)
+
+        cat = pd.Categorical([1,2,3, np.nan], levels=[1,2,3])
+        cat.levels = [1,2,3, np.nan]
+        cat[1:3] = [np.nan, np.nan]
+        exp = np.array([0,3,3,-1])
+        self.assert_numpy_array_equal(cat.codes, exp)
+
+        cat = pd.Categorical([1,2,3, np.nan], levels=[1,2,3])
+        cat.levels = [1,2,3, np.nan]
+        cat[pd.isnull(cat)] = np.nan
+        exp = np.array([0,1,2,3])
+        self.assert_numpy_array_equal(cat.codes, exp)
+
 
 class TestCategoricalAsBlock(tm.TestCase):
     _multiprocess_can_split_ = True

From 65d9d6ebcb94491cd5eed907a1aeb2dc05f65ce5 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Wed, 23 Jul 2014 21:56:53 +0200
Subject: [PATCH 05/10] API: change default Categorical.from_codes() to
 ordered=False

In the normal constructor `ordered=True` is only assumed if the levels
are given or the values are sortable (which is most of the cases), but
in `from_codes(...)` we can't asssume this so the default should be
`False`.
---
 pandas/core/categorical.py       | 8 ++++----
 pandas/tests/test_categorical.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 43a6a80b75e47..bfa44d565a869 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -110,9 +110,9 @@ class Categorical(PandasObject):
 
     Attributes
     ----------
-    levels : ndarray
+    levels : Index
         The levels of this categorical
-    codes : Index
+    codes : ndarray
         The codes (integer positions, which point to the levels) of this categorical, read only
     ordered : boolean
         Whether or not this Categorical is ordered
@@ -295,7 +295,7 @@ def from_array(cls, data):
         return Categorical(data)
 
     @classmethod
-    def from_codes(cls, codes, levels, ordered=True, name=None):
+    def from_codes(cls, codes, levels, ordered=False, name=None):
         """
         Make a Categorical type from codes and levels arrays.
 
@@ -312,7 +312,7 @@ def from_codes(cls, codes, levels, ordered=True, name=None):
             The levels for the categorical. Items need to be unique.
         ordered : boolean, optional
             Whether or not this categorical is treated as a ordered categorical. If not given,
-            the resulting categorical will be ordered.
+            the resulting categorical will be unordered.
         name : str, optional
             Name for the Categorical variable.
         """
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 29c722bfe8660..815ac3d7b29da 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -178,7 +178,7 @@ def f():
         self.assertRaises(ValueError, f)
 
 
-        exp = Categorical(["a","b","c"])
+        exp = Categorical(["a","b","c"], ordered=False)
         res = Categorical.from_codes([0,1,2], ["a","b","c"])
         self.assertTrue(exp.equals(res))
 

From 5c4f1bdf700d303ba27b73846602497677b0618a Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Thu, 24 Jul 2014 00:29:51 +0200
Subject: [PATCH 06/10] Categorical: add some links to Categorical in the other
 docs

---
 doc/source/10min.rst       | 29 ++++++++++++++++++++++++++++-
 doc/source/categorical.rst |  1 +
 doc/source/reshaping.rst   |  7 +++++++
 doc/source/v0.15.0.txt     |  3 ++-
 4 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index 985f112979a7e..6424b82779f0f 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -66,7 +66,8 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
                         'B' : pd.Timestamp('20130102'),
                         'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                         'D' : np.array([3] * 4,dtype='int32'),
-                        'E' : 'foo' })
+                        'E' : pd.Categorical(["test","train","test","train"]),
+                        'F' : 'foo' })
    df2
 
 Having specific :ref:`dtypes <basics.dtypes>`
@@ -635,6 +636,32 @@ the quarter end:
    ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
    ts.head()
 
+Categoricals
+------------
+
+Since version 0.15, pandas can include categorical data in a `DataFrame`. For full docs, see the
+:ref:`Categorical introduction <categorical>` and the :ref:`API documentation <api.categorical>` .
+
+.. ipython:: python
+
+    df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
+
+    # convert the raw grades to a categorical
+    df["grade"] = pd.Categorical(df["raw_grade"])
+
+    # Alternative: df["grade"] = df["raw_grade"].astype("category")
+    df["grade"]
+
+    # Rename the levels
+    df["grade"].cat.levels = ["very good", "good", "very bad"]
+
+    # Reorder the levels and simultaneously add the missing levels
+    df["grade"].cat.reorder_levels(["very bad", "bad", "medium", "good", "very good"])
+    df["grade"]
+    df.sort("grade")
+    df.groupby("grade").size()
+
+
 
 Plotting
 --------
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index c08351eb87a79..c47653e92edb3 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -90,6 +90,7 @@ By using some special functions:
     df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels)
     df.head(10)
 
+See :ref:`documentation <reshaping.tile.cut>` for :func:`~pandas.cut`.
 
 `Categoricals` have a specific ``category`` :ref:`dtype <basics.dtypes>`:
 
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index 92a35d0276e22..3d40be37dbbb3 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -503,3 +503,10 @@ handling of NaN:
 
    pd.factorize(x, sort=True)
    np.unique(x, return_inverse=True)[::-1]
+
+.. note::
+    If you just want to handle one column as a categorical variable (like R's factor),
+    you can use  ``df["cat_col"] = pd.Categorical(df["col"])`` or
+    ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`,
+    see the :ref:`Categorical introduction <categorical>` and the
+    :ref:`API documentation <api.categorical>`. This feature was introduced in version 0.15.
diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
index b91c306e9b193..c7a9aa5c3630b 100644
--- a/doc/source/v0.15.0.txt
+++ b/doc/source/v0.15.0.txt
@@ -225,7 +225,8 @@ Categoricals in Series/DataFrame
 methods to manipulate. Thanks to Jan Schultz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`,
 :issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`).
 
-For full docs, see the :ref:`Categorical introduction <categorical>` and the :ref:`API documentation <api.categorical>`.
+For full docs, see the :ref:`Categorical introduction <categorical>` and the
+:ref:`API documentation <api.categorical>`.
 
 .. ipython:: python
 

From 0438a30ae8464a3a3ddf35ca6bb5a9edbdb79a82 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Fri, 8 Aug 2014 22:22:26 +0200
Subject: [PATCH 07/10] Categorical: use s.values when calling private methods

s.values is the underlying Categorical object, s.cat will be changed
to only expose the API methods/properties.
---
 pandas/core/format.py | 2 +-
 pandas/core/series.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/format.py b/pandas/core/format.py
index 8f749d07296a7..0539d803a42a4 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -177,7 +177,7 @@ def _get_footer(self):
         # level infos are added to the end and in a new line, like it is done for Categoricals
         # Only added when we request a name
         if self.name and com.is_categorical_dtype(self.series.dtype):
-            level_info = self.series.cat._repr_level_info()
+            level_info = self.series.values._repr_level_info()
             if footer:
                 footer += "\n"
             footer += level_info
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5a490992c478c..25d80c27d7b02 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -900,7 +900,7 @@ def _repr_footer(self):
 
         # Categorical
         if com.is_categorical_dtype(self.dtype):
-            level_info = self.cat._repr_level_info()
+            level_info = self.values._repr_level_info()
             return u('%sLength: %d, dtype: %s\n%s') % (namestr,
                                                        len(self),
                                                        str(self.dtype.name),

From 19f4d46555ddd20eea554c6ca62b777ca2d3460f Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Fri, 8 Aug 2014 22:29:43 +0200
Subject: [PATCH 08/10] Categorical: Change series.cat to only expose the API

---
 doc/source/api.rst               | 11 ++++--
 pandas/core/categorical.py       | 30 +++++++++++++++-
 pandas/core/series.py            |  5 +--
 pandas/tests/test_categorical.py | 59 +++++++++++++++++---------------
 4 files changed, 71 insertions(+), 34 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index ec6e2aff870c6..a6a04af610ee0 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -528,11 +528,17 @@ and has the following usable methods and properties (all available as
    :toctree: generated/
 
    Categorical
-   Categorical.from_codes
    Categorical.levels
    Categorical.ordered
    Categorical.reorder_levels
    Categorical.remove_unused_levels
+
+The following methods are considered API when using ``Categorical`` directly:
+
+.. autosummary::
+   :toctree: generated/
+
+   Categorical.from_codes
    Categorical.min
    Categorical.max
    Categorical.mode
@@ -547,7 +553,7 @@ the Categorical back to a numpy array, so levels and order information is not pr
    Categorical.__array__
 
 To create compatibility with `pandas.Series` and `numpy` arrays, the following (non-API) methods
-are also introduced.
+are also introduced and available when ``Categorical`` is used directly.
 
 .. autosummary::
    :toctree: generated/
@@ -564,7 +570,6 @@ are also introduced.
    Categorical.argsort
    Categorical.fillna
 
-
 Plotting
 ~~~~~~~~
 .. currentmodule:: pandas
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index bfa44d565a869..7c220afe243df 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -8,7 +8,7 @@
 from pandas.compat import u
 
 from pandas.core.algorithms import factorize
-from pandas.core.base import PandasObject
+from pandas.core.base import PandasObject, PandasDelegate
 from pandas.core.index import Index, _ensure_index
 from pandas.core.indexing import _is_null_slice
 from pandas.tseries.period import PeriodIndex
@@ -967,6 +967,34 @@ def describe(self):
 
         return result
 
+##### The Series.cat accessor #####
+
+class CategoricalProperties(PandasDelegate):
+    """
+    This is a delegator class that passes thru limit property access
+    """
+
+    def __init__(self, values, index):
+        self.categorical = values
+        self.index = index
+
+    def _delegate_property_get(self, name):
+        return getattr(self.categorical, name)
+
+    def _delegate_property_set(self, name, new_values):
+        return setattr(self.categorical, name, new_values)
+
+    def _delegate_method(self, name, *args, **kwargs):
+        method = getattr(self.categorical, name)
+        return method(*args, **kwargs)
+
+CategoricalProperties._add_delegate_accessors(delegate=Categorical,
+                                           accessors=["levels", "codes", "ordered"],
+                                           typ='property')
+CategoricalProperties._add_delegate_accessors(delegate=Categorical,
+                                           accessors=["reorder_levels", "remove_unused_levels"],
+                                           typ='method')
+
 ##### utility routines #####
 
 def _get_codes_for_values(values, levels):
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 25d80c27d7b02..ef6bdf99915b1 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2415,11 +2415,12 @@ def dt(self):
     #------------------------------------------------------------------------------
     # Categorical methods
 
-    @property
+    @cache_readonly
     def cat(self):
+        from pandas.core.categorical import CategoricalProperties
         if not com.is_categorical_dtype(self.dtype):
             raise TypeError("Can only use .cat accessor with a 'category' dtype")
-        return self.values
+        return CategoricalProperties(self.values, self.index)
 
 Series._setup_axes(['index'], info_axis=0, stat_axis=0,
                    aliases={'rows': 0})
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 815ac3d7b29da..892e4177b4bb5 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -290,6 +290,24 @@ def test_describe(self):
                                             ).set_index('levels')
         tm.assert_frame_equal(desc, expected)
 
+        # empty levels show up as NA
+        cat = Categorical(["a","b","b","b"], levels=['a','b','c'], ordered=True)
+        result = cat.describe()
+
+        expected = DataFrame([[1,0.25],[3,0.75],[np.nan,np.nan]],
+                             columns=['counts','freqs'],
+                             index=Index(['a','b','c'],name='levels'))
+        tm.assert_frame_equal(result,expected)
+
+        # NA as a level
+        cat = pd.Categorical(["a","c","c",np.nan], levels=["b","a","c",np.nan] )
+        result = cat.describe()
+
+        expected = DataFrame([[np.nan, np.nan],[1,0.25],[2,0.5], [1,0.25]],
+                             columns=['counts','freqs'],
+                             index=Index(['b','a','c',np.nan],name='levels'))
+        tm.assert_frame_equal(result,expected)
+
 
     def test_print(self):
         expected = [" a", " b", " b", " a", " a", " c", " c", " c",
@@ -731,7 +749,7 @@ def test_sideeffects_free(self):
         # so this WILL change values
         cat = Categorical(["a","b","c","a"])
         s =  pd.Series(cat)
-        self.assertTrue(s.cat is cat)
+        self.assertTrue(s.values is cat)
         s.cat.levels = [1,2,3]
         exp_s = np.array([1,2,3,1])
         self.assert_numpy_array_equal(s.__array__(), exp_s)
@@ -747,20 +765,20 @@ def test_nan_handling(self):
         # Nans are represented as -1 in labels
         s = Series(Categorical(["a","b",np.nan,"a"]))
         self.assert_numpy_array_equal(s.cat.levels, np.array(["a","b"]))
-        self.assert_numpy_array_equal(s.cat._codes, np.array([0,1,-1,0]))
+        self.assert_numpy_array_equal(s.cat.codes, np.array([0,1,-1,0]))
 
         # If levels have nan included, the label should point to that instead
         s2 = Series(Categorical(["a","b",np.nan,"a"], levels=["a","b",np.nan]))
         self.assert_numpy_array_equal(s2.cat.levels,
                                       np.array(["a","b",np.nan], dtype=np.object_))
-        self.assert_numpy_array_equal(s2.cat._codes, np.array([0,1,2,0]))
+        self.assert_numpy_array_equal(s2.cat.codes, np.array([0,1,2,0]))
 
         # Changing levels should also make the replaced level np.nan
         s3 = Series(Categorical(["a","b","c","a"]))
         s3.cat.levels = ["a","b",np.nan]
         self.assert_numpy_array_equal(s3.cat.levels,
                                       np.array(["a","b",np.nan], dtype=np.object_))
-        self.assert_numpy_array_equal(s3.cat._codes, np.array([0,1,2,0]))
+        self.assert_numpy_array_equal(s3.cat.codes, np.array([0,1,2,0]))
 
     def test_sequence_like(self):
 
@@ -770,8 +788,8 @@ def test_sequence_like(self):
         df['grade'] = Categorical(df['raw_grade'])
 
         # basic sequencing testing
-        result = list(df.grade.cat)
-        expected = np.array(df.grade.cat).tolist()
+        result = list(df.grade.values)
+        expected = np.array(df.grade.values).tolist()
         tm.assert_almost_equal(result,expected)
 
         # iteration
@@ -813,7 +831,7 @@ def test_series_delegations(self):
         exp_values = np.array(["a","b","c","a"])
         s.cat.reorder_levels(["c","b","a"])
         self.assert_numpy_array_equal(s.cat.levels, exp_levels)
-        self.assert_numpy_array_equal(s.cat.__array__(), exp_values)
+        self.assert_numpy_array_equal(s.values.__array__(), exp_values)
         self.assert_numpy_array_equal(s.__array__(), exp_values)
 
         # remove unused levels
@@ -822,7 +840,7 @@ def test_series_delegations(self):
         exp_values = np.array(["a","b","b","a"])
         s.cat.remove_unused_levels()
         self.assert_numpy_array_equal(s.cat.levels, exp_levels)
-        self.assert_numpy_array_equal(s.cat.__array__(), exp_values)
+        self.assert_numpy_array_equal(s.values.__array__(), exp_values)
         self.assert_numpy_array_equal(s.__array__(), exp_values)
 
         # This method is likely to be confused, so test that it raises an error on wrong inputs:
@@ -881,31 +899,16 @@ def test_describe(self):
         result = self.cat.describe()
         self.assertEquals(len(result.columns),1)
 
-        # empty levels show up as NA
-        s = Series(Categorical(["a","b","b","b"], levels=['a','b','c'], ordered=True))
-        result = s.cat.describe()
 
-        expected = DataFrame([[1,0.25],[3,0.75],[np.nan,np.nan]],
-                             columns=['counts','freqs'],
-                             index=Index(['a','b','c'],name='levels'))
-        tm.assert_frame_equal(result,expected)
+        # In a frame, describe() for the cat should be the same as for string arrays (count, unique,
+        # top, freq)
 
+        cat = Categorical(["a","b","b","b"], levels=['a','b','c'], ordered=True)
+        s = Series(cat)
         result = s.describe()
         expected = Series([4,2,"b",3],index=['count','unique','top', 'freq'])
         tm.assert_series_equal(result,expected)
 
-        # NA as a level
-        cat = pd.Categorical(["a","c","c",np.nan], levels=["b","a","c",np.nan] )
-        result = cat.describe()
-
-        expected = DataFrame([[np.nan, np.nan],[1,0.25],[2,0.5], [1,0.25]],
-                             columns=['counts','freqs'],
-                             index=Index(['b','a','c',np.nan],name='levels'))
-        tm.assert_frame_equal(result,expected)
-
-
-        # In a frame, describe() for the cat should be the same as for string arrays (count, unique,
-        # top, freq)
         cat = pd.Series(pd.Categorical(["a","b","c","c"]))
         df3 = pd.DataFrame({"cat":cat, "s":["a","b","c","c"]})
         res = df3.describe()
@@ -1085,7 +1088,7 @@ def test_sort(self):
         # Cats must be sorted in a dataframe
         res = df.sort(columns=["string"], ascending=False)
         exp = np.array(["d", "c", "b", "a"])
-        self.assert_numpy_array_equal(res["sort"].cat.__array__(), exp)
+        self.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
         self.assertEqual(res["sort"].dtype, "category")
 
         res = df.sort(columns=["sort"], ascending=False)

From 47953a296167d5f845e681a56228744e9fd8a2ca Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Fri, 8 Aug 2014 23:00:00 +0200
Subject: [PATCH 09/10] Categorical: Fix order and na_position

---
 pandas/core/categorical.py       | 19 ++++++++++++++++++-
 pandas/tests/test_categorical.py | 24 ++++++++++++++++++------
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 7c220afe243df..9aa801ba8336d 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -521,10 +521,27 @@ def order(self, inplace=False, ascending=True, na_position='last', **kwargs):
         if na_position not in ['last','first']:
             raise ValueError('invalid na_position: {!r}'.format(na_position))
 
-        codes = np.sort(self._codes.copy())
+        codes = np.sort(self._codes)
         if not ascending:
             codes = codes[::-1]
 
+        # NaN handling
+        na_mask = (codes==-1)
+        if na_mask.any():
+            n_nans = len(codes[na_mask])
+            if na_position=="first" and not ascending:
+                # in this case sort to the front
+                new_codes = codes.copy()
+                new_codes[0:n_nans] = -1
+                new_codes[n_nans:] = codes[~na_mask]
+                codes = new_codes
+            elif na_position=="last" and not ascending:
+                # ... and to the end
+                new_codes = codes.copy()
+                pos = len(codes)-n_nans
+                new_codes[0:pos] = codes[~na_mask]
+                new_codes[pos:] = -1
+                codes = new_codes
         if inplace:
             self._codes = codes
             return
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 892e4177b4bb5..f5b20e924cdf6 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -1131,17 +1131,29 @@ def f():
         res = cat.order(ascending=False, na_position='last')
         exp_val = np.array(["d","c","b","a", np.nan],dtype=object)
         exp_levels = np.array(["a","b","c","d"],dtype=object)
-        # FIXME: IndexError: Out of bounds on buffer access (axis 0)
-        #self.assert_numpy_array_equal(res.__array__(), exp_val)
-        #self.assert_numpy_array_equal(res.levels, exp_levels)
+        self.assert_numpy_array_equal(res.__array__(), exp_val)
+        self.assert_numpy_array_equal(res.levels, exp_levels)
+
+        cat = Categorical(["a","c","b","d", np.nan], ordered=True)
+        res = cat.order(ascending=False, na_position='first')
+        exp_val = np.array([np.nan, "d","c","b","a"],dtype=object)
+        exp_levels = np.array(["a","b","c","d"],dtype=object)
+        self.assert_numpy_array_equal(res.__array__(), exp_val)
+        self.assert_numpy_array_equal(res.levels, exp_levels)
 
         cat = Categorical(["a","c","b","d", np.nan], ordered=True)
         res = cat.order(ascending=False, na_position='first')
         exp_val = np.array([np.nan, "d","c","b","a"],dtype=object)
         exp_levels = np.array(["a","b","c","d"],dtype=object)
-        # FIXME: IndexError: Out of bounds on buffer access (axis 0)
-        #self.assert_numpy_array_equal(res.__array__(), exp_val)
-        #self.assert_numpy_array_equal(res.levels, exp_levels)
+        self.assert_numpy_array_equal(res.__array__(), exp_val)
+        self.assert_numpy_array_equal(res.levels, exp_levels)
+
+        cat = Categorical(["a","c","b","d", np.nan], ordered=True)
+        res = cat.order(ascending=False, na_position='last')
+        exp_val = np.array(["d","c","b","a",np.nan],dtype=object)
+        exp_levels = np.array(["a","b","c","d"],dtype=object)
+        self.assert_numpy_array_equal(res.__array__(), exp_val)
+        self.assert_numpy_array_equal(res.levels, exp_levels)
 
     def test_slicing(self):
         cat = Series(Categorical([1,2,3,4]))

From 2958ce143428a9db2a411015d03ac61f9b50e7ef Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Sat, 9 Aug 2014 00:55:39 +0200
Subject: [PATCH 10/10] Categorical: Fix comparison of Categoricals and
 Series|Categorical|np.array

Categorical can only be comapred to another Categorical with the same levels
and the same ordering or to a scalar value.

If the Categorical has no order defined (cat.ordered == False), only equal
(and not equal) are defined.
---
 doc/source/categorical.rst       |  44 ++++++++++++-
 pandas/core/categorical.py       |  44 +++++++++++--
 pandas/core/common.py            |  13 +++-
 pandas/core/ops.py               |  11 +++-
 pandas/tests/test_categorical.py | 107 +++++++++++++++++++++++++++++++
 5 files changed, 207 insertions(+), 12 deletions(-)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index c47653e92edb3..831093228b5d6 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -332,6 +332,45 @@ Operations
 
 The following operations are possible with categorical data:
 
+Comparing `Categoricals` with other objects is possible in two cases:
+ * comparing a `Categorical` to another `Categorical`, when `level` and `ordered` is the same or
+ * comparing a `Categorical` to a scalar.
+All other comparisons will raise a TypeError.
+
+.. ipython:: python
+
+    cat = pd.Series(pd.Categorical([1,2,3], levels=[3,2,1]))
+    cat_base = pd.Series(pd.Categorical([2,2,2], levels=[3,2,1]))
+    cat_base2 = pd.Series(pd.Categorical([2,2,2]))
+
+    cat > cat_base
+
+    # This doesn't work because the levels are not the same
+    try:
+        cat > cat_base2
+    except TypeError as e:
+         print("TypeError: " + str(e))
+
+    cat > 2
+
+.. note::
+
+    Comparisons with `Series`, `np.array` or a `Categorical` with different levels or ordering
+    will raise an `TypeError` because custom level ordering would result in two valid results:
+    one with taking in account the ordering and one without. If you want to compare a `Categorical`
+    with such a type, you need to be explicit and convert the `Categorical` to values:
+
+.. ipython:: python
+
+    base = np.array([1,2,3])
+
+    try:
+        cat > base
+    except TypeError as e:
+         print("TypeError: " + str(e))
+
+    np.asarray(cat) > base
+
 Getting the minimum and maximum, if the categorical is ordered:
 
 .. ipython:: python
@@ -510,7 +549,8 @@ The same applies to ``df.append(df)``.
 Getting Data In/Out
 -------------------
 
-Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype will currently raise ``NotImplementedError``.
+Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype will currently
+raise ``NotImplementedError``.
 
 Writing to a CSV file will convert the data, effectively removing any information about the
 `Categorical` (levels and ordering). So if you read back the CSV file you have to convert the
@@ -580,7 +620,7 @@ object and not as a low level `numpy` array dtype. This leads to some problems.
     try:
         np.dtype("category")
     except TypeError as e:
-         print("TypeError: " + str(e))
+        print("TypeError: " + str(e))
 
     dtype = pd.Categorical(["a"]).dtype
     try:
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 9aa801ba8336d..91713ab3bc576 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -19,16 +19,36 @@
 
 def _cat_compare_op(op):
     def f(self, other):
-        if isinstance(other, (Categorical, np.ndarray)):
-            values = np.asarray(self)
-            f = getattr(values, op)
-            return f(np.asarray(other))
-        else:
+        # On python2, you can usually compare any type to any type, and Categoricals can be
+        # seen as a custom type, but having different results depending whether a level are
+        # the same or not is kind of insane, so be a bit stricter here and use the python3 idea
+        # of comparing only things of equal type.
+        if not self.ordered:
+            if op in ['__lt__', '__gt__','__le__','__ge__']:
+                raise TypeError("Unordered Categoricals can only compare equality or not")
+        if isinstance(other, Categorical):
+            # Two Categoricals can only be be compared if the levels are the same
+            if (len(self.levels) != len(other.levels)) or not ((self.levels == other.levels).all()):
+                raise TypeError("Categoricals can only be compared if 'levels' are the same")
+            if not (self.ordered == other.ordered):
+                raise TypeError("Categoricals can only be compared if 'ordered' is the same")
+            na_mask = (self._codes == -1) | (other._codes == -1)
+            f = getattr(self._codes, op)
+            ret = f(other._codes)
+            if na_mask.any():
+                # In other series, the leads to False, so do that here too
+                ret[na_mask] = False
+            return ret
+        elif np.isscalar(other):
             if other in self.levels:
                 i = self.levels.get_loc(other)
                 return getattr(self._codes, op)(i)
             else:
                 return np.repeat(False, len(self))
+        else:
+            msg = "Cannot compare a Categorical for op {op} with type {typ}. If you want to \n" \
+                  "compare values, use 'np.asarray(cat) <op> other'."
+            raise TypeError(msg.format(op=op,typ=type(other)))
 
     f.__name__ = op
 
@@ -172,6 +192,9 @@ class Categorical(PandasObject):
     Categorical.max
     """
 
+    # For comparisons, so that numpy uses our implementation if the compare ops, which raise
+    __array_priority__ = 1000
+
     def __init__(self, values, levels=None, ordered=None, name=None, fastpath=False, compat=False):
 
         if fastpath:
@@ -447,9 +470,16 @@ def __array__(self, dtype=None):
         Returns
         -------
         values : numpy array
-            A numpy array of the same dtype as categorical.levels.dtype
+            A numpy array of either the specified dtype or, if dtype==None (default), the same
+            dtype as categorical.levels.dtype
         """
-        return com.take_1d(self.levels.values, self._codes)
+        ret = com.take_1d(self.levels.values, self._codes)
+        if dtype and dtype != self.levels.dtype:
+            return np.asarray(ret, dtype)
+        return ret
+
+    def astype(self, dtype, order='K', casting='unsafe', subok=True, copy=True):
+        return np.asarray(self, dtype)
 
     @property
     def T(self):
diff --git a/pandas/core/common.py b/pandas/core/common.py
index bc4c95ed3323e..9e04e38b9c4e2 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -275,7 +275,9 @@ def _isnull_ndarraylike(obj):
     values = getattr(obj, 'values', obj)
     dtype = values.dtype
 
-    if dtype.kind in ('O', 'S', 'U'):
+    if is_categorical_dtype(values):
+        result = _isnull_categorical(values)
+    elif dtype.kind in ('O', 'S', 'U'):
         # Working around NumPy ticket 1542
         shape = values.shape
 
@@ -285,7 +287,6 @@ def _isnull_ndarraylike(obj):
             result = np.empty(shape, dtype=bool)
             vec = lib.isnullobj(values.ravel())
             result[...] = vec.reshape(shape)
-
     elif dtype in _DATELIKE_DTYPES:
         # this is the NaT pattern
         result = values.view('i8') == tslib.iNaT
@@ -299,6 +300,14 @@ def _isnull_ndarraylike(obj):
 
     return result
 
+def _isnull_categorical(obj):
+    ret = obj._codes == -1
+    # String/object and float levels can hold np.nan
+    if obj.levels.dtype.kind in ('S', 'O' 'f'):
+        if np.nan in obj.levels:
+            nan_pos = np.where(com.isnull(self.levels))
+            ret = ret | obj == nan_pos
+    return ret
 
 def _isnull_ndarraylike_old(obj):
     values = getattr(obj, 'values', obj)
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 9f29570af6f4f..de3b8d857617f 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -524,6 +524,10 @@ def _comp_method_SERIES(op, name, str_rep, masker=False):
     code duplication.
     """
     def na_op(x, y):
+        if com.is_categorical_dtype(x) != com.is_categorical_dtype(y):
+            msg = "Cannot compare a Categorical for op {op} with type {typ}. If you want to \n" \
+                  "compare values, use 'series <op> np.asarray(cat)'."
+            raise TypeError(msg.format(op=op,typ=type(y)))
         if x.dtype == np.object_:
             if isinstance(y, list):
                 y = lib.list_to_object_array(y)
@@ -555,11 +559,16 @@ def wrapper(self, other):
                                      index=self.index, name=name)
         elif isinstance(other, pd.DataFrame):  # pragma: no cover
             return NotImplemented
-        elif isinstance(other, (pa.Array, pd.Series, pd.Index)):
+        elif isinstance(other, (pa.Array, pd.Index)):
             if len(self) != len(other):
                 raise ValueError('Lengths must match to compare')
             return self._constructor(na_op(self.values, np.asarray(other)),
                                      index=self.index).__finalize__(self)
+        elif isinstance(other, pd.Categorical):
+            if not com.is_categorical_dtype(self):
+                msg = "Cannot compare a Categorical for op {op} with Series of dtype {typ}.\n"\
+                      "If you want to compare values, use 'series <op> np.asarray(other)'."
+                raise TypeError(msg.format(op=op,typ=self.dtype))
         else:
 
             mask = isnull(self)
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index f5b20e924cdf6..dbfea95bb58c8 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -223,6 +223,62 @@ def test_comparisons(self):
         expected = np.repeat(False, len(self.factor))
         self.assert_numpy_array_equal(result, expected)
 
+        # comparisons with categoricals
+        cat_rev = pd.Categorical(["a","b","c"], levels=["c","b","a"])
+        cat_rev_base = pd.Categorical(["b","b","b"], levels=["c","b","a"])
+        cat = pd.Categorical(["a","b","c"])
+        cat_base = pd.Categorical(["b","b","b"], levels=cat.levels)
+
+        # comparisons need to take level ordering into account
+        res_rev = cat_rev > cat_rev_base
+        exp_rev = np.array([True, False, False])
+        self.assert_numpy_array_equal(res_rev, exp_rev)
+
+        res_rev = cat_rev < cat_rev_base
+        exp_rev = np.array([False, False, True])
+        self.assert_numpy_array_equal(res_rev, exp_rev)
+
+        res = cat > cat_base
+        exp = np.array([False, False, True])
+        self.assert_numpy_array_equal(res, exp)
+
+        # Only categories with same levels can be compared
+        def f():
+            cat > cat_rev
+        self.assertRaises(TypeError, f)
+
+        cat_rev_base2 = pd.Categorical(["b","b","b"], levels=["c","b","a","d"])
+        def f():
+            cat_rev > cat_rev_base2
+        self.assertRaises(TypeError, f)
+
+        # Only categories with same ordering information can be compared
+        cat_unorderd = cat.copy()
+        cat_unorderd.ordered = False
+        self.assertFalse((cat > cat).any())
+        def f():
+            cat > cat_unorderd
+        self.assertRaises(TypeError, f)
+
+        # comparison (in both directions) with Series will raise
+        s = Series(["b","b","b"])
+        self.assertRaises(TypeError, lambda: cat > s)
+        self.assertRaises(TypeError, lambda: cat_rev > s)
+        self.assertRaises(TypeError, lambda: s < cat)
+        self.assertRaises(TypeError, lambda: s < cat_rev)
+
+        # comparison with numpy.array will raise in both direction, but only on newer
+        # numpy versions
+        a = np.array(["b","b","b"])
+        self.assertRaises(TypeError, lambda: cat > a)
+        self.assertRaises(TypeError, lambda: cat_rev > a)
+
+        # The following work via '__array_priority__ = 1000'
+        # but only on numpy > 1.6.1?
+        tm._skip_if_not_numpy17_friendly()
+        self.assertRaises(TypeError, lambda: a < cat)
+        self.assertRaises(TypeError, lambda: a < cat_rev)
+
     def test_na_flags_int_levels(self):
         # #1457
 
@@ -1609,6 +1665,57 @@ def f():
         s[1] = np.nan
         tm.assert_series_equal(s, exp)
 
+    def test_comparisons(self):
+        tests_data = [(list("abc"), list("cba"), list("bbb")),
+                      ([1,2,3], [3,2,1], [2,2,2])]
+        for data , reverse, base in tests_data:
+            cat_rev = pd.Series(pd.Categorical(data, levels=reverse))
+            cat_rev_base = pd.Series(pd.Categorical(base, levels=reverse))
+            cat = pd.Series(pd.Categorical(data))
+            cat_base = pd.Series(pd.Categorical(base, levels=cat.cat.levels))
+            s = Series(base)
+            a = np.array(base)
+
+            # comparisons need to take level ordering into account
+            res_rev = cat_rev > cat_rev_base
+            exp_rev = Series([True, False, False])
+            tm.assert_series_equal(res_rev, exp_rev)
+
+            res_rev = cat_rev < cat_rev_base
+            exp_rev = Series([False, False, True])
+            tm.assert_series_equal(res_rev, exp_rev)
+
+            res = cat > cat_base
+            exp = Series([False, False, True])
+            tm.assert_series_equal(res, exp)
+
+            # Only categories with same levels can be compared
+            def f():
+                cat > cat_rev
+            self.assertRaises(TypeError, f)
+
+            # categorical cannot be compared to Series or numpy array, and also not the other way
+            # around
+            self.assertRaises(TypeError, lambda: cat > s)
+            self.assertRaises(TypeError, lambda: cat_rev > s)
+            self.assertRaises(TypeError, lambda: cat > a)
+            self.assertRaises(TypeError, lambda: cat_rev > a)
+
+            self.assertRaises(TypeError, lambda: s < cat)
+            self.assertRaises(TypeError, lambda: s < cat_rev)
+
+            self.assertRaises(TypeError, lambda: a < cat)
+            self.assertRaises(TypeError, lambda: a < cat_rev)
+
+            # Categoricals can be compared to scalar values
+            res = cat_rev > base[0]
+            tm.assert_series_equal(res, exp)
+
+        # And test NaN handling...
+        cat = pd.Series(pd.Categorical(["a","b","c", np.nan]))
+        exp = Series([True, True, True, False])
+        res = (cat == cat)
+        tm.assert_series_equal(res, exp)
 
     def test_concat(self):
         cat = pd.Categorical(["a","b"], levels=["a","b"])