From 73569971bc029db80499589c26dfa70fef9c0d67 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 7 Mar 2019 15:55:07 +0100
Subject: [PATCH 01/10] BUG: fix usage of na_sentinel with sort=True in
 factorize()

---
 doc/source/whatsnew/v0.24.2.rst |  1 +
 pandas/core/algorithms.py       | 20 +++++++++++++-------
 pandas/tests/test_algos.py      | 15 +++++++++++++++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
index 7da99590d5a0a..839754b828186 100644
--- a/doc/source/whatsnew/v0.24.2.rst
+++ b/doc/source/whatsnew/v0.24.2.rst
@@ -32,6 +32,7 @@ Fixed Regressions
 - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
 - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
 - Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
+- Fixed regression in :func:`factorize` when passing a custom ``na_sentinel`` value with ``sort=True`` (:issue:`25409`).
 
 .. _whatsnew_0242.enhancements:
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 4a71951e2435e..5ed2e3efe26a1 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -619,13 +619,19 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
     if sort and len(uniques) > 0:
         from pandas.core.sorting import safe_sort
-        try:
-            order = uniques.argsort()
-            order2 = order.argsort()
-            labels = take_1d(order2, labels, fill_value=na_sentinel)
-            uniques = uniques.take(order)
-        except TypeError:
-            # Mixed types, where uniques.argsort fails.
+        if na_sentinel == -1:
+            # GH-25409 take_1d only works for na_sentinels of -1
+            try:
+                order = uniques.argsort()
+                order2 = order.argsort()
+                labels = take_1d(order2, labels, fill_value=na_sentinel)
+                uniques = uniques.take(order)
+            except TypeError:
+                # Mixed types, where uniques.argsort fails.
+                uniques, labels = safe_sort(uniques, labels,
+                                            na_sentinel=na_sentinel,
+                                            assume_unique=True)
+        else:
             uniques, labels = safe_sort(uniques, labels,
                                         na_sentinel=na_sentinel,
                                         assume_unique=True)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 3f75c508d22f9..7c009f6a2633c 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -326,6 +326,21 @@ def test_parametrized_factorize_na_value(self, data, na_value):
         tm.assert_numpy_array_equal(l, expected_labels)
         tm.assert_numpy_array_equal(u, expected_uniques)
 
+    @pytest.mark.parametrize('sort', [True, False])
+    @pytest.mark.parametrize('na_sentinel', [-1, -10, 100])
+    def test_factorize_na_sentinel(self, sort, na_sentinel):
+        data = np.array(['b', 'a', None, 'b'], dtype=object)
+        labels, uniques = algos.factorize(data, sort=sort,
+                                          na_sentinel=na_sentinel)
+        if sort:
+            expected_labels = np.array([1, 0, na_sentinel, 1])
+            expected_uniques = np.array(['a', 'b'], dtype=object)
+        else:
+            expected_labels = np.array([0, 1, na_sentinel, 0])
+            expected_uniques = np.array(['b', 'a'], dtype=object)
+        tm.assert_numpy_array_equal(labels, expected_labels)
+        tm.assert_numpy_array_equal(uniques, expected_uniques)
+
 
 class TestUnique(object):
 

From e1ab3a489b1a5825527312d53dcc20f1545f04d6 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 11 Mar 2019 18:20:13 +0100
Subject: [PATCH 02/10] fix dtype

---
 pandas/tests/test_algos.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 7c009f6a2633c..083307371b699 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -333,10 +333,10 @@ def test_factorize_na_sentinel(self, sort, na_sentinel):
         labels, uniques = algos.factorize(data, sort=sort,
                                           na_sentinel=na_sentinel)
         if sort:
-            expected_labels = np.array([1, 0, na_sentinel, 1])
+            expected_labels = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
             expected_uniques = np.array(['a', 'b'], dtype=object)
         else:
-            expected_labels = np.array([0, 1, na_sentinel, 0])
+            expected_labels = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
             expected_uniques = np.array(['b', 'a'], dtype=object)
         tm.assert_numpy_array_equal(labels, expected_labels)
         tm.assert_numpy_array_equal(uniques, expected_uniques)

From ba944eb09a784605db9a3d51aa154f1f29aa7a5f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 12 Mar 2019 18:49:51 +0100
Subject: [PATCH 03/10] Attempt to include it in safe_sort

---
 pandas/core/algorithms.py | 19 +++-------------
 pandas/core/sorting.py    | 46 +++++++++++++++++++++++++++------------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 5ed2e3efe26a1..5f9640308f289 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -619,22 +619,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
 
     if sort and len(uniques) > 0:
         from pandas.core.sorting import safe_sort
-        if na_sentinel == -1:
-            # GH-25409 take_1d only works for na_sentinels of -1
-            try:
-                order = uniques.argsort()
-                order2 = order.argsort()
-                labels = take_1d(order2, labels, fill_value=na_sentinel)
-                uniques = uniques.take(order)
-            except TypeError:
-                # Mixed types, where uniques.argsort fails.
-                uniques, labels = safe_sort(uniques, labels,
-                                            na_sentinel=na_sentinel,
-                                            assume_unique=True)
-        else:
-            uniques, labels = safe_sort(uniques, labels,
-                                        na_sentinel=na_sentinel,
-                                        assume_unique=True)
+        uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
+                                    assume_unique=True,
+                                    check_outofbounds=False)
 
     uniques = _reconstruct_data(uniques, dtype, original)
 
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 0b5b017bec9ac..5ffb1edf68f3e 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -11,6 +11,7 @@
 from pandas.core.dtypes.common import (
     ensure_int64, ensure_platform_int, is_categorical_dtype, is_list_like)
 from pandas.core.dtypes.missing import isna
+from pandas.core.dtypes.generic import ABCExtensionArray
 
 import pandas.core.algorithms as algorithms
 
@@ -404,7 +405,8 @@ def _reorder_by_uniques(uniques, labels):
     return uniques, labels
 
 
-def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
+def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False, 
+              check_outofbounds=True):
     """
     Sort ``values`` and reorder corresponding ``labels``.
     ``values`` should be unique if ``labels`` is not None.
@@ -425,6 +427,10 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
     assume_unique : bool, default False
         When True, ``values`` are assumed to be unique, which can speed up
         the calculation. Ignored when ``labels`` is None.
+    check_outofbounds : bool, default True
+        Check if labels are out of bound for the values and put out of bound
+        labels equal to na_sentinel. If ``check_outofbounds=False``, it is
+        assumed there are no out of bound labels.
 
     Returns
     -------
@@ -446,8 +452,8 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
         raise TypeError("Only list-like objects are allowed to be passed to"
                         "safe_sort as values")
 
-    if not isinstance(values, np.ndarray):
-
+    if (not isinstance(values, np.ndarray)
+            and not isinstance(values, ABCExtensionArray)):
         # don't convert to string types
         dtype, _ = infer_dtype_from_array(values)
         values = np.asarray(values, dtype=dtype)
@@ -461,7 +467,8 @@ def sort_mixed(values):
         return np.concatenate([nums, np.asarray(strs, dtype=object)])
 
     sorter = None
-    if PY3 and lib.infer_dtype(values, skipna=False) == 'mixed-integer':
+    if (PY3 and not isinstance(values, ABCExtensionArray)
+            and lib.infer_dtype(values, skipna=False) == 'mixed-integer'):
         # unorderable in py3 if mixed str/int
         ordered = sort_mixed(values)
     else:
@@ -494,15 +501,26 @@ def sort_mixed(values):
         t.map_locations(values)
         sorter = ensure_platform_int(t.lookup(ordered))
 
-    reverse_indexer = np.empty(len(sorter), dtype=np.int_)
-    reverse_indexer.put(sorter, np.arange(len(sorter)))
-
-    mask = (labels < -len(values)) | (labels >= len(values)) | \
-        (labels == na_sentinel)
-
-    # (Out of bound indices will be masked with `na_sentinel` next, so we may
-    # deal with them here without performance loss using `mode='wrap'`.)
-    new_labels = reverse_indexer.take(labels, mode='wrap')
-    np.putmask(new_labels, mask, na_sentinel)
+    if na_sentinel == -1:
+        # take_1d is faster, but only works for na_sentinels of -1
+        order2 = sorter.argsort()
+        new_labels = algorithms.take_1d(order2, labels, fill_value=-1)
+        if check_outofbounds:
+            mask = (labels < -len(values)) | (labels >= len(values))
+        else:
+            mask = None
+    else:
+        reverse_indexer = np.empty(len(sorter), dtype=np.int_)
+        reverse_indexer.put(sorter, np.arange(len(sorter)))
+        # Out of bound indices will be masked with `na_sentinel` next, so we
+        # may deal with them here without performance loss using `mode='wrap'`
+        new_labels = reverse_indexer.take(labels, mode='wrap')
+
+        mask = labels == na_sentinel
+        if check_outofbounds:
+            mask = mask | (labels < -len(values)) | (labels >= len(values))
+
+    if mask is not None:
+        np.putmask(new_labels, mask, na_sentinel)
 
     return ordered, ensure_platform_int(new_labels)

From fdf330aab9740209c76b33e694e59801529ea197 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Apr 2019 09:19:40 +0200
Subject: [PATCH 04/10] feedback Jeff

---
 pandas/core/algorithms.py |  3 +--
 pandas/core/sorting.py    | 22 ++++++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index fe83d377721f0..fc74742ace3e0 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -618,8 +618,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     if sort and len(uniques) > 0:
         from pandas.core.sorting import safe_sort
         uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
-                                    assume_unique=True,
-                                    check_outofbounds=False)
+                                    assume_unique=True, verify=False)
 
     uniques = _reconstruct_data(uniques, dtype, original)
 
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index af6bfa22b3489..85b2eb3bafde2 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -9,9 +9,9 @@
 
 from pandas.core.dtypes.cast import infer_dtype_from_array
 from pandas.core.dtypes.common import (
-    ensure_int64, ensure_platform_int, is_categorical_dtype, is_list_like)
+    ensure_int64, ensure_platform_int, is_categorical_dtype,
+    is_extension_array_dtype, is_list_like)
 from pandas.core.dtypes.missing import isna
-from pandas.core.dtypes.generic import ABCExtensionArray
 
 import pandas.core.algorithms as algorithms
 
@@ -406,7 +406,7 @@ def _reorder_by_uniques(uniques, labels):
 
 
 def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False, 
-              check_outofbounds=True):
+              verify=True):
     """
     Sort ``values`` and reorder corresponding ``labels``.
     ``values`` should be unique if ``labels`` is not None.
@@ -427,10 +427,12 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False,
     assume_unique : bool, default False
         When True, ``values`` are assumed to be unique, which can speed up
         the calculation. Ignored when ``labels`` is None.
-    check_outofbounds : bool, default True
+    verify : bool, default True
         Check if labels are out of bound for the values and put out of bound
-        labels equal to na_sentinel. If ``check_outofbounds=False``, it is
-        assumed there are no out of bound labels.
+        labels equal to na_sentinel. If ``verify=False``, it is assumed there
+        are no out of bound labels.
+
+        .. versionadded:: 0.25.0
 
     Returns
     -------
@@ -453,7 +455,7 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False,
                         "safe_sort as values")
 
     if (not isinstance(values, np.ndarray)
-            and not isinstance(values, ABCExtensionArray)):
+            and not is_extension_array_dtype(values)):
         # don't convert to string types
         dtype, _ = infer_dtype_from_array(values)
         values = np.asarray(values, dtype=dtype)
@@ -467,7 +469,7 @@ def sort_mixed(values):
         return np.concatenate([nums, np.asarray(strs, dtype=object)])
 
     sorter = None
-    if (PY3 and not isinstance(values, ABCExtensionArray)
+    if (not is_extension_array_dtype(values)
             and lib.infer_dtype(values, skipna=False) == 'mixed-integer'):
         # unorderable in py3 if mixed str/int
         ordered = sort_mixed(values)
@@ -505,7 +507,7 @@ def sort_mixed(values):
         # take_1d is faster, but only works for na_sentinels of -1
         order2 = sorter.argsort()
         new_labels = algorithms.take_1d(order2, labels, fill_value=-1)
-        if check_outofbounds:
+        if verify:
             mask = (labels < -len(values)) | (labels >= len(values))
         else:
             mask = None
@@ -517,7 +519,7 @@ def sort_mixed(values):
         new_labels = reverse_indexer.take(labels, mode='wrap')
 
         mask = labels == na_sentinel
-        if check_outofbounds:
+        if verify:
             mask = mask | (labels < -len(values)) | (labels >= len(values))
 
     if mask is not None:

From b08ea6d4761839e6ae52c29db1458c1d61115880 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Apr 2019 09:52:34 +0200
Subject: [PATCH 05/10] add tests for safe_sort

---
 pandas/core/sorting.py       |  2 +-
 pandas/tests/test_sorting.py | 43 +++++++++++++++++++++++++++---------
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 85b2eb3bafde2..3e016da8260b5 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -430,7 +430,7 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False,
     verify : bool, default True
         Check if labels are out of bound for the values and put out of bound
         labels equal to na_sentinel. If ``verify=False``, it is assumed there
-        are no out of bound labels.
+        are no out of bound labels. Ignored when ``labels`` is None.
 
         .. versionadded:: 0.25.0
 
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index 04a50cf6facd5..86c3734c55db2 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -7,7 +7,7 @@
 import pytest
 
 from pandas import (
-    DataFrame, MultiIndex, Series, compat, concat, merge, to_datetime)
+    DataFrame, MultiIndex, Series, array, compat, concat, merge, to_datetime)
 from pandas.core import common as com
 from pandas.core.sorting import (
     decons_group_index, get_group_index, is_int64_overflow_possible,
@@ -359,34 +359,39 @@ def test_basic_sort(self):
         expected = np.array([])
         tm.assert_numpy_array_equal(result, expected)
 
-    def test_labels(self):
+    @pytest.mark.parametrize('verify', [True, False])
+    def test_labels(self, verify):
         values = [3, 1, 2, 0, 4]
         expected = np.array([0, 1, 2, 3, 4])
 
         labels = [0, 1, 1, 2, 3, 0, -1, 4]
-        result, result_labels = safe_sort(values, labels)
+        result, result_labels = safe_sort(values, labels, verify=verify)
         expected_labels = np.array([3, 1, 1, 2, 0, 3, -1, 4], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_labels, expected_labels)
 
         # na_sentinel
         labels = [0, 1, 1, 2, 3, 0, 99, 4]
-        result, result_labels = safe_sort(values, labels,
-                                          na_sentinel=99)
+        result, result_labels = safe_sort(values, labels, na_sentinel=99,
+                                          verify=verify)
         expected_labels = np.array([3, 1, 1, 2, 0, 3, 99, 4], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_labels, expected_labels)
 
-        # out of bound indices
-        labels = [0, 101, 102, 2, 3, 0, 99, 4]
-        result, result_labels = safe_sort(values, labels)
-        expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp)
+        labels = []
+        result, result_labels = safe_sort(values, labels, verify=verify)
+        expected_labels = np.array([], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_labels, expected_labels)
 
-        labels = []
+    def test_labels_out_of_bound(self):
+        values = [3, 1, 2, 0, 4]
+        expected = np.array([0, 1, 2, 3, 4])
+
+        # out of bound indices
+        labels = [0, 101, 102, 2, 3, 0, 99, 4]
         result, result_labels = safe_sort(values, labels)
-        expected_labels = np.array([], dtype=np.intp)
+        expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_labels, expected_labels)
 
@@ -431,3 +436,19 @@ def test_exceptions(self):
         with pytest.raises(ValueError,
                            match="values should be unique"):
             safe_sort(values=[0, 1, 2, 1], labels=[0, 1])
+
+    @pytest.mark.parametrize('verify', [True, False])
+    def test_extension_array(self, verify):
+        # a = array([1, 3, np.nan, 2], dtype='Int64')
+        a = array([1, 3, 2], dtype='Int64')
+        result = safe_sort(a)
+        # expected = array([1, 2, 3, np.nan], dtype='Int64')
+        expected = array([1, 2, 3], dtype='Int64')
+        tm.assert_extension_array_equal(result, expected)
+
+        a = array([1, 3, 2], dtype='Int64')
+        result, labels = safe_sort(a, [0, 1, 2], verify=verify)
+        expected_values = array([1, 2, 3], dtype='Int64')
+        expected_labels = np.array([0, 2, 1], dtype=np.intp)
+        tm.assert_extension_array_equal(result, expected_values)
+        tm.assert_numpy_array_equal(labels, expected_labels)

From 9de26fc83c39b4fc8863a9a827eedf7b1a117ea0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Apr 2019 09:58:02 +0200
Subject: [PATCH 06/10] additional test for other na_sentinel in case of out of
 bound indices

---
 pandas/tests/test_sorting.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index 86c3734c55db2..37d2da9b45508 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -384,14 +384,18 @@ def test_labels(self, verify):
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_labels, expected_labels)
 
-    def test_labels_out_of_bound(self):
+    @pytest.mark.parametrize('na_sentinel', [-1, 99])
+    def test_labels_out_of_bound(self, na_sentinel):
         values = [3, 1, 2, 0, 4]
         expected = np.array([0, 1, 2, 3, 4])
 
         # out of bound indices
         labels = [0, 101, 102, 2, 3, 0, 99, 4]
-        result, result_labels = safe_sort(values, labels)
-        expected_labels = np.array([3, -1, -1, 2, 0, 3, -1, 4], dtype=np.intp)
+        result, result_labels = safe_sort(
+            values, labels, na_sentinel=na_sentinel)
+        expected_labels = np.array(
+            [3, na_sentinel, na_sentinel, 2, 0, 3, na_sentinel, 4],
+            dtype=np.intp)
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_labels, expected_labels)
 

From bcb8c7e37f7b7b8bba8e3f504696d9c7638fcbc0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Apr 2019 10:06:00 +0200
Subject: [PATCH 07/10] additional test for EA with custom na_sentinel

---
 pandas/tests/test_sorting.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index 37d2da9b45508..b0dd2d5a1c27e 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -441,8 +441,7 @@ def test_exceptions(self):
                            match="values should be unique"):
             safe_sort(values=[0, 1, 2, 1], labels=[0, 1])
 
-    @pytest.mark.parametrize('verify', [True, False])
-    def test_extension_array(self, verify):
+    def test_extension_array(self):
         # a = array([1, 3, np.nan, 2], dtype='Int64')
         a = array([1, 3, 2], dtype='Int64')
         result = safe_sort(a)
@@ -450,9 +449,13 @@ def test_extension_array(self, verify):
         expected = array([1, 2, 3], dtype='Int64')
         tm.assert_extension_array_equal(result, expected)
 
+    @pytest.mark.parametrize('verify', [True, False])
+    @pytest.mark.parametrize('na_sentinel', [-1, 99])
+    def test_extension_array_labels(self, verify, na_sentinel):
         a = array([1, 3, 2], dtype='Int64')
-        result, labels = safe_sort(a, [0, 1, 2], verify=verify)
+        result, labels = safe_sort(a, [0, 1, na_sentinel, 2],
+                                   na_sentinel=na_sentinel, verify=verify)
         expected_values = array([1, 2, 3], dtype='Int64')
-        expected_labels = np.array([0, 2, 1], dtype=np.intp)
+        expected_labels = np.array([0, 2, na_sentinel, 1], dtype=np.intp)
         tm.assert_extension_array_equal(result, expected_values)
         tm.assert_numpy_array_equal(labels, expected_labels)

From 13f6706e83760ae3f1f04fe8dce95b9b17bc7fec Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 5 Apr 2019 10:18:09 +0200
Subject: [PATCH 08/10] update factorize test for EAs with custom na_sentinel
 (which now works) + add whatsnew

---
 doc/source/whatsnew/v0.25.0.rst |  2 +-
 pandas/tests/test_algos.py      | 19 ++++++++++++++-----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index d2897afa762b1..aecabbb3d10ae 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -408,7 +408,7 @@ Other
 ^^^^^
 
 - Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`)
--
+- Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). 
 -
 
 
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index b64786de264cd..4bc83bbd09ff1 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -24,6 +24,7 @@
 import pandas.core.algorithms as algos
 from pandas.core.arrays import DatetimeArray
 import pandas.core.common as com
+from pandas.core.sorting import safe_sort
 import pandas.util.testing as tm
 from pandas.util.testing import assert_almost_equal
 
@@ -327,18 +328,26 @@ def test_parametrized_factorize_na_value(self, data, na_value):
 
     @pytest.mark.parametrize('sort', [True, False])
     @pytest.mark.parametrize('na_sentinel', [-1, -10, 100])
-    def test_factorize_na_sentinel(self, sort, na_sentinel):
-        data = np.array(['b', 'a', None, 'b'], dtype=object)
+    @pytest.mark.parametrize('data, uniques', [
+        (np.array(['b', 'a', None, 'b'], dtype=object),
+         np.array(['b', 'a'], dtype=object)),
+        (pd.array([2, 1, np.nan, 2], dtype='Int64'),
+         pd.array([2, 1], dtype='Int64'))],
+        ids=['numpy_array', 'extension_array'])
+    def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques):
         labels, uniques = algos.factorize(data, sort=sort,
                                           na_sentinel=na_sentinel)
         if sort:
             expected_labels = np.array([1, 0, na_sentinel, 1], dtype=np.intp)
-            expected_uniques = np.array(['a', 'b'], dtype=object)
+            expected_uniques = safe_sort(uniques)
         else:
             expected_labels = np.array([0, 1, na_sentinel, 0], dtype=np.intp)
-            expected_uniques = np.array(['b', 'a'], dtype=object)
+            expected_uniques = uniques
         tm.assert_numpy_array_equal(labels, expected_labels)
-        tm.assert_numpy_array_equal(uniques, expected_uniques)
+        if isinstance(data, np.ndarray):
+            tm.assert_numpy_array_equal(uniques, expected_uniques)
+        else:
+            tm.assert_extension_array_equal(uniques, expected_uniques)
 
 
 class TestUnique(object):

From e350641dbb6cced71507109aabc4d57fd5fa3969 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 6 May 2019 20:13:08 +0200
Subject: [PATCH 09/10] linting

---
 pandas/core/sorting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 79b164406a218..21c0c8f747b10 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -404,7 +404,7 @@ def _reorder_by_uniques(uniques, labels):
     return uniques, labels
 
 
-def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False, 
+def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False,
               verify=True):
     """
     Sort ``values`` and reorder corresponding ``labels``.

From 151aa6ae5d3584042f21b9a3438d8f2ce2453c13 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 6 May 2019 20:52:12 +0200
Subject: [PATCH 10/10] more linting

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index d85cf07e36298..2784b9299e447 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -428,7 +428,7 @@ Other
 ^^^^^
 
 - Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`)
-- Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). 
+- Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`).
 
 
 .. _whatsnew_0.250.contributors: