diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 45f86f044a4b2..065a5782aced1 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -4,6 +4,8 @@
 """
 from __future__ import division
 from warnings import warn, catch_warnings
+from textwrap import dedent
+
 import numpy as np
 
 from pandas.core.dtypes.cast import (
@@ -34,7 +36,10 @@
 from pandas.core import common as com
 from pandas._libs import algos, lib, hashtable as htable
 from pandas._libs.tslib import iNaT
-from pandas.util._decorators import deprecate_kwarg
+from pandas.util._decorators import (Appender, Substitution,
+                                     deprecate_kwarg)
+
+_shared_docs = {}
 
 
 # --------------- #
@@ -146,10 +151,9 @@ def _reconstruct_data(values, dtype, original):
     Returns
     -------
     Index for extension types, otherwise ndarray casted to dtype
-
     """
     from pandas import Index
-    if is_categorical_dtype(dtype):
+    if is_extension_array_dtype(dtype):
         pass
     elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
         values = Index(original)._shallow_copy(values, name=None)
@@ -469,32 +473,124 @@ def _factorize_array(values, na_sentinel=-1, size_hint=None,
     return labels, uniques
 
 
-@deprecate_kwarg(old_arg_name='order', new_arg_name=None)
-def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
-    """
-    Encode input values as an enumerated type or categorical variable
+_shared_docs['factorize'] = """
+    Encode the object as an enumerated type or categorical variable.
+
+    This method is useful for obtaining a numeric representation of an
+    array when all that matters is identifying distinct values. `factorize`
+    is available as both a top-level function :func:`pandas.factorize`,
+    and as a method :meth:`Series.factorize` and :meth:`Index.factorize`.
 
     Parameters
     ----------
-    values : Sequence
-        ndarrays must be 1-D. Sequences that aren't pandas objects are
-        coereced to ndarrays before factorization.
-    sort : boolean, default False
-        Sort by values
+    %(values)s%(sort)s%(order)s
     na_sentinel : int, default -1
-        Value to mark "not found"
-    size_hint : hint to the hashtable sizer
+        Value to mark "not found".
+    %(size_hint)s\
 
     Returns
     -------
-    labels : the indexer to the original array
-    uniques : ndarray (1-d) or Index
-        the unique values. Index is returned when passed values is Index or
-        Series
+    labels : ndarray
+        An integer ndarray that's an indexer into `uniques`.
+        ``uniques.take(labels)`` will have the same values as `values`.
+    uniques : ndarray, Index, or Categorical
+        The unique valid values. When `values` is Categorical, `uniques`
+        is a Categorical. When `values` is some other pandas object, an
+        `Index` is returned. Otherwise, a 1-D ndarray is returned.
+
+        .. note ::
+
+           Even if there's a missing value in `values`, `uniques` will
+           *not* contain an entry for it.
+
+    See Also
+    --------
+    pandas.cut : Discretize continuous-valued array.
+    pandas.unique : Find the unique valuse in an array.
+
+    Examples
+    --------
+    These examples all show factorize as a top-level method like
+    ``pd.factorize(values)``. The results are identical for methods like
+    :meth:`Series.factorize`.
+
+    >>> labels, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'])
+    >>> labels
+    array([0, 0, 1, 2, 0])
+    >>> uniques
+    array(['b', 'a', 'c'], dtype=object)
+
+    With ``sort=True``, the `uniques` will be sorted, and `labels` will be
+    shuffled so that the relationship is the maintained.
+
+    >>> labels, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True)
+    >>> labels
+    array([1, 1, 0, 2, 1])
+    >>> uniques
+    array(['a', 'b', 'c'], dtype=object)
+
+    Missing values are indicated in `labels` with `na_sentinel`
+    (``-1`` by default). Note that missing values are never
+    included in `uniques`.
+
+    >>> labels, uniques = pd.factorize(['b', None, 'a', 'c', 'b'])
+    >>> labels
+    array([ 0, -1,  1,  2,  0])
+    >>> uniques
+    array(['b', 'a', 'c'], dtype=object)
 
-    note: an array of Periods will ignore sort as it returns an always sorted
-    PeriodIndex.
+    Thus far, we've only factorized lists (which are internally coerced to
+    NumPy arrays). When factorizing pandas objects, the type of `uniques`
+    will differ. For Categoricals, a `Categorical` is returned.
+
+    >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
+    >>> labels, uniques = pd.factorize(cat)
+    >>> labels
+    array([0, 0, 1])
+    >>> uniques
+    [a, c]
+    Categories (3, object): [a, b, c]
+
+    Notice that ``'b'`` is in ``uniques.categories``, desipite not being
+    present in ``cat.values``.
+
+    For all other pandas objects, an Index of the appropriate type is
+    returned.
+
+    >>> cat = pd.Series(['a', 'a', 'c'])
+    >>> labels, uniques = pd.factorize(cat)
+    >>> labels
+    array([0, 0, 1])
+    >>> uniques
+    Index(['a', 'c'], dtype='object')
     """
+
+
+@Substitution(
+    values=dedent("""\
+    values : sequence
+        A 1-D seqeunce. Sequences that aren't pandas objects are
+        coereced to ndarrays before factorization.
+    """),
+    order=dedent("""\
+    order
+        .. deprecated:: 0.23.0
+
+           This parameter has no effect and is deprecated.
+    """),
+    sort=dedent("""\
+    sort : bool, default False
+        Sort `uniques` and shuffle `labels` to maintain the
+        relationship.
+    """),
+    size_hint=dedent("""\
+    size_hint : int, optional
+        Hint to the hashtable sizer.
+    """),
+)
+@Appender(_shared_docs['factorize'])
+@deprecate_kwarg(old_arg_name='order', new_arg_name=None)
+def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     # Implementation notes: This method is responsible for 3 things
     # 1.) coercing data to array-like (ndarray, Index, extension array)
     # 2.) factorizing labels and uniques
@@ -507,9 +603,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     values = _ensure_arraylike(values)
     original = values
 
-    if is_categorical_dtype(values):
+    if is_extension_array_dtype(values):
         values = getattr(values, '_values', values)
-        labels, uniques = values.factorize()
+        labels, uniques = values.factorize(na_sentinel=na_sentinel)
         dtype = original.dtype
     else:
         values, dtype, _ = _ensure_data(values)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index d53caa265b9b3..c281bd80cb274 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -77,6 +77,24 @@ def _constructor_from_sequence(cls, scalars):
         """
         raise AbstractMethodError(cls)
 
+    @classmethod
+    def _from_factorized(cls, values, original):
+        """Reconstruct an ExtensionArray after factorization.
+
+        Parameters
+        ----------
+        values : ndarray
+            An integer ndarray with the factorized values.
+        original : ExtensionArray
+            The original ExtensionArray that factorize was called on.
+
+        See Also
+        --------
+        pandas.factorize
+        ExtensionArray.factorize
+        """
+        raise AbstractMethodError(cls)
+
     # ------------------------------------------------------------------------
     # Must be a Sequence
     # ------------------------------------------------------------------------
@@ -353,6 +371,73 @@ def unique(self):
         uniques = unique(self.astype(object))
         return self._constructor_from_sequence(uniques)
 
+    def _values_for_factorize(self):
+        # type: () -> Tuple[ndarray, Any]
+        """Return an array and missing value suitable for factorization.
+
+        Returns
+        -------
+        values : ndarray
+            An array suitable for factoraization. This should maintain order
+            and be a supported dtype (Float64, Int64, UInt64, String, Object).
+            By default, the extension array is cast to object dtype.
+        na_value : object
+            The value in `values` to consider missing. This will be treated
+            as NA in the factorization routines, so it will be coded as
+            `na_sentinal` and not included in `uniques`. By default,
+            ``np.nan`` is used.
+        """
+        return self.astype(object), np.nan
+
+    def factorize(self, na_sentinel=-1):
+        # type: (int) -> Tuple[ndarray, ExtensionArray]
+        """Encode the extension array as an enumerated type.
+
+        Parameters
+        ----------
+        na_sentinel : int, default -1
+            Value to use in the `labels` array to indicate missing values.
+
+        Returns
+        -------
+        labels : ndarray
+            An interger NumPy array that's an indexer into the original
+            ExtensionArray.
+        uniques : ExtensionArray
+            An ExtensionArray containing the unique values of `self`.
+
+            .. note::
+
+               uniques will *not* contain an entry for the NA value of
+               the ExtensionArray if there are any missing values present
+               in `self`.
+
+        See Also
+        --------
+        pandas.factorize : Top-level factorize method that dispatches here.
+
+        Notes
+        -----
+        :meth:`pandas.factorize` offers a `sort` keyword as well.
+        """
+        # Impelmentor note: There are two ways to override the behavior of
+        # pandas.factorize
+        # 1. _values_for_factorize and _from_factorize.
+        #    Specify the values passed to pandas' internal factorization
+        #    routines, and how to convert from those values back to the
+        #    original ExtensionArray.
+        # 2. ExtensionArray.factorize.
+        #    Complete control over factorization.
+        from pandas.core.algorithms import _factorize_array
+
+        arr, na_value = self._values_for_factorize()
+
+        labels, uniques = _factorize_array(arr, na_sentinel=na_sentinel,
+                                           na_value=na_value)
+
+        uniques = self._from_factorized(uniques, self)
+        return labels, uniques
+
     # ------------------------------------------------------------------------
     # Indexing methods
     # ------------------------------------------------------------------------
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index ac57660300be4..b5a4785fd98a6 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2118,58 +2118,15 @@ def unique(self):
             take_codes = sorted(take_codes)
         return cat.set_categories(cat.categories.take(take_codes))
 
-    def factorize(self, na_sentinel=-1):
-        """Encode the Categorical as an enumerated type.
-
-        Parameters
-        ----------
-        sort : boolean, default False
-            Sort by values
-        na_sentinel: int, default -1
-            Value to mark "not found"
-
-        Returns
-        -------
-        labels : ndarray
-            An integer NumPy array that's an indexer into the original
-            Categorical
-        uniques : Categorical
-            A Categorical whose values are the unique values and
-            whose dtype matches the original CategoricalDtype. Note that if
-            there any unobserved categories in ``self`` will not be present
-            in ``uniques.values``. They will be present in
-            ``uniques.categories``
-
-        Examples
-        --------
-        >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c'])
-        >>> labels, uniques = cat.factorize()
-        >>> labels
-        (array([0, 0, 1]),
-        >>> uniques
-        [a, c]
-        Categories (3, object): [a, b, c])
-
-        Missing values are handled
-
-        >>> labels, uniques = pd.factorize(pd.Categorical(['a', 'b', None]))
-        >>> labels
-        array([ 0,  1, -1])
-        >>> uniques
-        [a, b]
-        Categories (2, object): [a, b]
-        """
-        from pandas.core.algorithms import _factorize_array
-
+    def _values_for_factorize(self):
         codes = self.codes.astype('int64')
-        # We set missing codes, normally -1, to iNaT so that the
-        # Int64HashTable treats them as missing values.
-        labels, uniques = _factorize_array(codes, na_sentinel=na_sentinel,
-                                           na_value=-1)
-        uniques = self._constructor(self.categories.take(uniques),
-                                    categories=self.categories,
-                                    ordered=self.ordered)
-        return labels, uniques
+        return codes, -1
+
+    @classmethod
+    def _from_factorized(cls, uniques, original):
+        return original._constructor(original.categories.take(uniques),
+                                     categories=original.categories,
+                                     ordered=original.ordered)
 
     def equals(self, other):
         """
diff --git a/pandas/core/base.py b/pandas/core/base.py
index b3eb9a0ae7530..99e2af9fb3aeb 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -2,6 +2,7 @@
 Base and utility classes for pandas objects.
 """
 import warnings
+import textwrap
 from pandas import compat
 from pandas.compat import builtins
 import numpy as np
@@ -1151,24 +1152,16 @@ def memory_usage(self, deep=False):
             v += lib.memory_usage_of_objects(self.values)
         return v
 
+    @Substitution(
+        values='', order='', size_hint='',
+        sort=textwrap.dedent("""\
+            sort : boolean, default False
+                Sort `uniques` and shuffle `labels` to maintain the
+                relationship.
+            """))
+    @Appender(algorithms._shared_docs['factorize'])
     def factorize(self, sort=False, na_sentinel=-1):
-        """
-        Encode the object as an enumerated type or categorical variable
-
-        Parameters
-        ----------
-        sort : boolean, default False
-            Sort by values
-        na_sentinel: int, default -1
-            Value to mark "not found"
-
-        Returns
-        -------
-        labels : the indexer to the original array
-        uniques : the unique Index
-        """
-        from pandas.core.algorithms import factorize
-        return factorize(self, sort=sort, na_sentinel=na_sentinel)
+        return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel)
 
     _shared_docs['searchsorted'] = (
         """Find indices where elements should be inserted to maintain order.
diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index 4d467d62d0a56..f9f079cb21858 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -2,6 +2,7 @@
 import numpy as np
 
 import pandas as pd
+import pandas.util.testing as tm
 
 from .base import BaseExtensionTests
 
@@ -82,3 +83,23 @@ def test_unique(self, data, box, method):
         assert len(result) == 1
         assert isinstance(result, type(data))
         assert result[0] == duplicated[0]
+
+    @pytest.mark.parametrize('na_sentinel', [-1, -2])
+    def test_factorize(self, data_for_grouping, na_sentinel):
+        labels, uniques = pd.factorize(data_for_grouping,
+                                       na_sentinel=na_sentinel)
+        expected_labels = np.array([0, 0, na_sentinel,
+                                   na_sentinel, 1, 1, 0, 2],
+                                   dtype='int64')
+        expected_uniques = data_for_grouping.take([0, 4, 7])
+
+        tm.assert_numpy_array_equal(labels, expected_labels)
+        self.assert_extension_array_equal(uniques, expected_uniques)
+
+    @pytest.mark.parametrize('na_sentinel', [-1, -2])
+    def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
+        l1, u1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel)
+        l2, u2 = data_for_grouping.factorize(na_sentinel=na_sentinel)
+
+        tm.assert_numpy_array_equal(l1, l2)
+        self.assert_extension_array_equal(u1, u2)
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
index b602d9ee78e2a..7528299578326 100644
--- a/pandas/tests/extension/category/test_categorical.py
+++ b/pandas/tests/extension/category/test_categorical.py
@@ -46,6 +46,11 @@ def na_value():
     return np.nan
 
 
+@pytest.fixture
+def data_for_grouping():
+    return Categorical(['a', 'a', None, None, 'b', 'b', 'a', 'c'])
+
+
 class TestDtype(base.BaseDtypeTests):
     pass
 
diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
index 04dfb408fc378..4cb4ea21d9be3 100644
--- a/pandas/tests/extension/conftest.py
+++ b/pandas/tests/extension/conftest.py
@@ -66,3 +66,14 @@ def na_cmp():
 def na_value():
     """The scalar missing value for this type. Default 'None'"""
     return None
+
+
+@pytest.fixture
+def data_for_grouping():
+    """Data for factorization, grouping, and unique tests.
+
+    Expected to be like [B, B, NA, NA, A, A, B, C]
+
+    Where A < B < C and NA is missing
+    """
+    raise NotImplementedError
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index f1852542088ff..b66a14c77a059 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -36,6 +36,10 @@ def __init__(self, values):
     def _constructor_from_sequence(cls, scalars):
         return cls(scalars)
 
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls(values)
+
     def __getitem__(self, item):
         if isinstance(item, numbers.Integral):
             return self.values[item]
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index b6303ededd0dc..22c1a67a0d60d 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -49,6 +49,15 @@ def na_value():
     return decimal.Decimal("NaN")
 
 
+@pytest.fixture
+def data_for_grouping():
+    b = decimal.Decimal('1.0')
+    a = decimal.Decimal('0.0')
+    c = decimal.Decimal('2.0')
+    na = decimal.Decimal('NaN')
+    return DecimalArray([b, b, na, na, a, a, b, c])
+
+
 class BaseDecimal(object):
 
     def assert_series_equal(self, left, right, *args, **kwargs):
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index ee0951812b8f0..51a68a3701046 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -37,6 +37,10 @@ def __init__(self, values):
     def _constructor_from_sequence(cls, scalars):
         return cls(scalars)
 
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls([collections.UserDict(x) for x in values if x != ()])
+
     def __getitem__(self, item):
         if isinstance(item, numbers.Integral):
             return self.data[item]
@@ -108,6 +112,10 @@ def _concat_same_type(cls, to_concat):
         data = list(itertools.chain.from_iterable([x.data for x in to_concat]))
         return cls(data)
 
+    def _values_for_factorize(self):
+        frozen = tuple(tuple(x.items()) for x in self)
+        return np.array(frozen, dtype=object), ()
+
     def _values_for_argsort(self):
         # Disable NumPy's shape inference by including an empty tuple...
         # If all the elemnts of self are the same size P, NumPy will
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 8083a1ce69092..63d97d5e7a2c5 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -48,6 +48,17 @@ def na_cmp():
     return operator.eq
 
 
+@pytest.fixture
+def data_for_grouping():
+    return JSONArray([
+        {'b': 1}, {'b': 1},
+        {}, {},
+        {'a': 0, 'c': 2}, {'a': 0, 'c': 2},
+        {'b': 1},
+        {'c': 2},
+    ])
+
+
 class TestDtype(base.BaseDtypeTests):
     pass