-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PERF/REF: improve performance of Series.searchsorted, PandasArray.searchsorted, collect functionality #22034
Changes from all commits
6ad3f12
60742c3
672802d
c1a337c
686a0a1
ea8280e
a9905fd
9e6ed43
bcbe226
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
|
||
import pandas as pd | ||
from pandas.api.extensions import register_extension_dtype | ||
from pandas.api.types import is_scalar | ||
from pandas.core.arrays import PandasArray, integer_array, period_array | ||
from pandas.tests.extension.decimal import ( | ||
DecimalArray, DecimalDtype, to_decimal) | ||
|
@@ -254,3 +255,51 @@ def test_array_not_registered(registry_without_decimal): | |
result = pd.array(data, dtype=DecimalDtype) | ||
expected = DecimalArray._from_sequence(data) | ||
tm.assert_equal(result, expected) | ||
|
||
|
||
class TestArrayAnalytics(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there are already tests in pandas/tests/extension/base/methods.py are these supplmental? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extension arrays and PandasArrays do not have the same interface, so the tests in pandas/tests/extension/base/methods.py will not work for PandasArrays. E.g. BTW, I copied these tests from pandas/tests/series/test_analytics.py. |
||
def test_searchsorted(self, string_dtype): | ||
arr = pd.array(['a', 'b', 'c'], dtype=string_dtype) | ||
|
||
result = arr.searchsorted('a', side='left') | ||
assert is_scalar(result) | ||
assert result == 0 | ||
|
||
result = arr.searchsorted('a', side='right') | ||
assert is_scalar(result) | ||
assert result == 1 | ||
|
||
def test_searchsorted_numeric_dtypes_scalar(self, any_real_dtype): | ||
arr = pd.array([1, 3, 90], dtype=any_real_dtype) | ||
result = arr.searchsorted(30) | ||
assert is_scalar(result) | ||
assert result == 2 | ||
|
||
result = arr.searchsorted([30]) | ||
expected = np.array([2], dtype=np.intp) | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
def test_searchsorted_numeric_dtypes_vector(self, any_real_dtype): | ||
arr = pd.array([1, 3, 90], dtype=any_real_dtype) | ||
result = arr.searchsorted([2, 30]) | ||
expected = np.array([1, 2], dtype=np.intp) | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('arr, val', [ | ||
[pd.date_range('20120101', periods=10, freq='2D'), | ||
pd.Timestamp('20120102')], | ||
[pd.date_range('20120101', periods=10, freq='2D', tz='Asia/Hong_Kong'), | ||
pd.Timestamp('20120102', tz='Asia/Hong_Kong')], | ||
[pd.timedelta_range(start='1 day', end='10 days', periods=10), | ||
pd.Timedelta('2 days')]]) | ||
def test_search_sorted_datetime64_scalar(self, arr, val): | ||
arr = pd.array(arr) | ||
result = arr.searchsorted(val) | ||
assert is_scalar(result) | ||
assert result == 1 | ||
|
||
def test_searchsorted_sorter(self, any_real_dtype): | ||
arr = pd.array([3, 1, 2], dtype=any_real_dtype) | ||
result = arr.searchsorted([0, 3], sorter=np.argsort(arr)) | ||
expected = np.array([0, 2], dtype=np.intp) | ||
tm.assert_numpy_array_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for a followup can add EA types here (Int8 and so on)