From 4f13207a6fb468f48565363d67cfd0c1e5a4aada Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Thu, 25 Oct 2018 22:27:14 -0600 Subject: [PATCH 1/2] BUG: Fix IntervalTree handling of NaN --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/_libs/intervaltree.pxi.in | 6 ++++++ .../indexes/interval/test_interval_tree.py | 19 ++++++++++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 768868d585721..b6c9ef52c1abe 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1094,6 +1094,7 @@ Interval - Bug in the ``IntervalIndex`` repr where a trailing comma was missing after the list of intervals (:issue:`20611`) - Bug in :class:`Interval` where scalar arithmetic operations did not retain the ``closed`` value (:issue:`22313`) - Bug in :class:`IntervalIndex` where indexing with datetime-like values raised a ``KeyError`` (:issue:`20636`) +- Bug in ``IntervalTree`` where data containing ``NaN`` triggered a warning and resulted in incorrect indexing queries with :class:`IntervalIndex` (:issue:`23352`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index 9ed76242a95c3..875848c00311f 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -72,6 +72,12 @@ cdef class IntervalTree(IntervalMixin): self.closed = closed + # GH 23352: ensure no nan in nodes + mask = ~np.isnan(self.left) + self.left = self.left[mask] + self.right = self.right[mask] + indices = indices[mask] + node_cls = NODE_CLASSES[str(self.dtype), closed] self.root = node_cls(self.left, self.right, indices, leaf_size) diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py index 5f248bf7725e5..8f10f02cf0c8c 100644 --- a/pandas/tests/indexes/interval/test_interval_tree.py +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -13,14 +13,27 @@ def dtype(request): return request.param -@pytest.fixture(scope='class') -def tree(dtype): - left = np.arange(5, dtype=dtype) +@pytest.fixture(scope='class', params=[ + np.arange(5, dtype='int64'), + np.arange(5, dtype='int32'), + np.arange(5, dtype='uint64'), + np.arange(5, dtype='float64'), + np.arange(5, dtype='float32'), + np.array([0, 1, 2, 3, 4, np.nan], dtype='float64'), + np.array([0, 1, 2, 3, 4, np.nan], dtype='float32')]) +def tree(request): + left = request.param return IntervalTree(left, left + 2) class TestIntervalTree(object): + def test_construction_nan(self): + # GH 23352 + left, right = [0, 1, 2, np.nan], [1, 2, 3, np.nan] + with tm.assert_produces_warning(None): + IntervalTree(left, right, leaf_size=2) + def test_get_loc(self, tree): tm.assert_numpy_array_equal(tree.get_loc(1), np.array([0], dtype='int64')) From 552accb1165ee20d313fe01e1b66c2b918599853 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Sun, 28 Oct 2018 23:49:11 -0600 Subject: [PATCH 2/2] remove unnecesary test; parametrize leaf_size --- .../indexes/interval/test_interval_tree.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py index 8f10f02cf0c8c..001a5109f7fc3 100644 --- a/pandas/tests/indexes/interval/test_interval_tree.py +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -13,7 +13,12 @@ def dtype(request): return request.param -@pytest.fixture(scope='class', params=[ +@pytest.fixture(params=[1, 2, 10]) +def leaf_size(request): + return request.param + + +@pytest.fixture(params=[ np.arange(5, dtype='int64'), np.arange(5, dtype='int32'), np.arange(5, dtype='uint64'), @@ -21,19 +26,13 @@ def dtype(request): np.arange(5, dtype='float32'), np.array([0, 1, 2, 3, 4, np.nan], dtype='float64'), np.array([0, 1, 2, 3, 4, np.nan], dtype='float32')]) -def tree(request): +def tree(request, leaf_size): left = request.param - return IntervalTree(left, left + 2) + return IntervalTree(left, left + 2, leaf_size=leaf_size) class TestIntervalTree(object): - def test_construction_nan(self): - # GH 23352 - left, right = [0, 1, 2, np.nan], [1, 2, 3, np.nan] - with tm.assert_produces_warning(None): - IntervalTree(left, right, leaf_size=2) - def test_get_loc(self, tree): tm.assert_numpy_array_equal(tree.get_loc(1), np.array([0], dtype='int64'))