From e2aa1d67d471cba47ddff8d750db2eda00cd0d5c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 6 Nov 2020 08:06:53 +0000 Subject: [PATCH 01/15] check for inconsistent namespace usage --- .pre-commit-config.yaml | 6 ++++ ci/code_checks.sh | 13 -------- pandas/tests/extension/test_categorical.py | 4 +-- pandas/tests/groupby/aggregate/test_other.py | 6 ++-- pandas/tests/groupby/test_counting.py | 11 +++---- pandas/tests/groupby/test_grouping.py | 32 +++++++++---------- pandas/tests/groupby/test_quantile.py | 6 ++-- pandas/tests/groupby/test_timegrouper.py | 10 +++--- .../tests/groupby/transform/test_transform.py | 2 +- pandas/tests/indexes/period/test_formats.py | 4 +-- pandas/tests/indexes/test_numeric.py | 4 +-- .../tests/indexes/timedeltas/test_formats.py | 4 +-- pandas/tests/io/test_compression.py | 4 +-- pandas/tests/resample/test_time_grouper.py | 32 +++++++++---------- pandas/tests/reshape/merge/test_merge_asof.py | 22 ++++++------- pandas/tests/series/indexing/test_indexing.py | 2 +- pandas/tests/window/test_expanding.py | 10 +++--- ...check_for_inconsistent_pandas_namespace.py | 32 +++++++++++++++++++ 18 files changed, 114 insertions(+), 90 deletions(-) create mode 100644 scripts/check_for_inconsistent_pandas_namespace.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c1e4e330c903..f9b396715664a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -119,6 +119,12 @@ repos: entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" types: [python] exclude: ^(asv_bench|pandas/tests|doc)/ + - id: inconsistent-namespace-usage + name: 'Check for inconsistent use of pandas namespace in tests' + entry: python scripts/check_for_inconsistent_pandas_namespace.py + language: python + types: [python] + files: ^pandas/tests/ - id: FrameOrSeriesUnion name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\] diff --git a/ci/code_checks.sh b/ci/code_checks.sh index b5d63e259456b..c920500aac9cd 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -37,12 +37,6 @@ function invgrep { return $((! $EXIT_STATUS)) } -function check_namespace { - local -r CLASS=${1} - grep -R -l --include "*.py" " ${CLASS}(" pandas/tests | xargs grep -n "pd\.${CLASS}[(\.]" - test $? -gt 0 -} - if [[ "$GITHUB_ACTIONS" == "true" ]]; then FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s" INVGREP_PREPEND="##[error]" @@ -120,13 +114,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG invgrep -R --include=*.{py,pyx} '!r}' pandas RET=$(($RET + $?)) ; echo $MSG "DONE" - - # ------------------------------------------------------------------------- - MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG - for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do - check_namespace ${class} - RET=$(($RET + $?)) - done echo $MSG "DONE" fi diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index e8d82b525c9f4..95f338cbc3240 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -192,7 +192,7 @@ def test_cast_category_to_extension_dtype(self, expected): ( "datetime64[ns, MET]", pd.DatetimeIndex( - [pd.Timestamp("2015-01-01 00:00:00+0100", tz="MET")] + [Timestamp("2015-01-01 00:00:00+0100", tz="MET")] ).array, ), ], @@ -254,7 +254,7 @@ def _compare_other(self, s, data, op_name, other): @pytest.mark.parametrize( "categories", - [["a", "b"], [0, 1], [pd.Timestamp("2019"), pd.Timestamp("2020")]], + [["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]], ) def test_not_equal_with_na(self, categories): # https://github.com/pandas-dev/pandas/issues/32276 diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 15803d4b0ef94..5d0f6d6262899 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -425,7 +425,7 @@ def test_agg_over_numpy_arrays(): result = df.groupby("category").agg(sum) expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] - expected_index = pd.Index([1, 2], name="category") + expected_index = Index([1, 2], name="category") expected_column = ["arraydata"] expected = DataFrame(expected_data, index=expected_index, columns=expected_column) @@ -497,7 +497,7 @@ def test_sum_uint64_overflow(): df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) df = df + 9223372036854775807 - index = pd.Index( + index = Index( [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64 ) expected = DataFrame( @@ -596,7 +596,7 @@ def test_agg_lambda_with_timezone(): result = df.groupby("tag").agg({"date": lambda e: e.head(1)}) expected = DataFrame( [pd.Timestamp("2018-01-01", tz="UTC")], - index=pd.Index([1], name="tag"), + index=Index([1], name="tag"), columns=["date"], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 04b73b16ae2c7..1317f0f68216a 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -4,7 +4,6 @@ import numpy as np import pytest -import pandas as pd from pandas import ( DataFrame, Index, @@ -260,7 +259,7 @@ def test_groupby_timedelta_cython_count(): df = DataFrame( {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")} ) - expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt") + expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt") result = df.groupby("g").delt.count() tm.assert_series_equal(expected, result) @@ -317,12 +316,12 @@ def test_count_non_nulls(): def test_count_object(): df = DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3}) result = df.groupby("c").a.count() - expected = Series([3, 3], index=pd.Index([2, 3], name="c"), name="a") + expected = Series([3, 3], index=Index([2, 3], name="c"), name="a") tm.assert_series_equal(result, expected) df = DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3}) result = df.groupby("c").a.count() - expected = Series([1, 3], index=pd.Index([2, 3], name="c"), name="a") + expected = Series([1, 3], index=Index([2, 3], name="c"), name="a") tm.assert_series_equal(result, expected) @@ -354,7 +353,7 @@ def test_lower_int_prec_count(): ) result = df.groupby("grp").count() expected = DataFrame( - {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp") + {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=Index(list("ab"), name="grp") ) tm.assert_frame_equal(result, expected) @@ -374,5 +373,5 @@ def __eq__(self, other): df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)}) result = df.groupby("grp").count() - expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp")) + expected = DataFrame({"a": [2, 2]}, index=Index(list("ab"), name="grp")) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 4d6a1afe06e1c..4aefb73bf912c 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -614,12 +614,12 @@ def test_list_grouper_with_nat(self): # Grouper in a list grouping result = df.groupby([grouper]) - expected = {pd.Timestamp("2011-01-01"): Index(list(range(364)))} + expected = {Timestamp("2011-01-01"): Index(list(range(364)))} tm.assert_dict_equal(result.groups, expected) # Test case without a list result = df.groupby(grouper) - expected = {pd.Timestamp("2011-01-01"): 365} + expected = {Timestamp("2011-01-01"): 365} tm.assert_dict_equal(result.groups, expected) @pytest.mark.parametrize( @@ -938,12 +938,12 @@ def test_groupby_with_small_elem(self): grouped = df.groupby([pd.Grouper(freq="M"), "event"]) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 - assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups - assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups - res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + res = grouped.get_group((Timestamp("2014-09-30"), "start")) tm.assert_frame_equal(res, df.iloc[[0], :]) - res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) tm.assert_frame_equal(res, df.iloc[[1], :]) df = DataFrame( @@ -953,12 +953,12 @@ def test_groupby_with_small_elem(self): grouped = df.groupby([pd.Grouper(freq="M"), "event"]) assert len(grouped.groups) == 2 assert grouped.ngroups == 2 - assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups - assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups - res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + res = grouped.get_group((Timestamp("2014-09-30"), "start")) tm.assert_frame_equal(res, df.iloc[[0, 2], :]) - res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) tm.assert_frame_equal(res, df.iloc[[1], :]) # length=3 @@ -969,15 +969,15 @@ def test_groupby_with_small_elem(self): grouped = df.groupby([pd.Grouper(freq="M"), "event"]) assert len(grouped.groups) == 3 assert grouped.ngroups == 3 - assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups - assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups - assert (pd.Timestamp("2014-08-31"), "start") in grouped.groups + assert (Timestamp("2014-09-30"), "start") in grouped.groups + assert (Timestamp("2013-10-31"), "start") in grouped.groups + assert (Timestamp("2014-08-31"), "start") in grouped.groups - res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + res = grouped.get_group((Timestamp("2014-09-30"), "start")) tm.assert_frame_equal(res, df.iloc[[0], :]) - res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + res = grouped.get_group((Timestamp("2013-10-31"), "start")) tm.assert_frame_equal(res, df.iloc[[1], :]) - res = grouped.get_group((pd.Timestamp("2014-08-31"), "start")) + res = grouped.get_group((Timestamp("2014-08-31"), "start")) tm.assert_frame_equal(res, df.iloc[[2], :]) def test_grouping_string_repr(self): diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index e48f10ebacb79..bd6d33c59a48a 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -194,7 +194,7 @@ def test_quantile_missing_group_values_correct_results( df = DataFrame({"key": key, "val": val}) expected = DataFrame( - expected_val, index=pd.Index(expected_key, name="key"), columns=["val"] + expected_val, index=Index(expected_key, name="key"), columns=["val"] ) grp = df.groupby("key") @@ -223,7 +223,7 @@ def test_groupby_quantile_nullable_array(values, q): idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None]) true_quantiles = [0.0, 0.5, 1.0] else: - idx = pd.Index(["x", "y"], name="a") + idx = Index(["x", "y"], name="a") true_quantiles = [0.5] expected = pd.Series(true_quantiles * 2, index=idx, name="b") @@ -251,6 +251,6 @@ def test_groupby_timedelta_quantile(): pd.Timedelta("0 days 00:00:02.990000"), ] }, - index=pd.Index([1, 2], name="group"), + index=Index([1, 2], name="group"), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 612079447576f..c3282758a23f2 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -452,7 +452,7 @@ def test_groupby_groups_datetimeindex(self): result = df.groupby(level="date").groups dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"] expected = { - Timestamp(date): pd.DatetimeIndex([date], name="date") for date in dates + Timestamp(date): DatetimeIndex([date], name="date") for date in dates } tm.assert_dict_equal(result, expected) @@ -460,7 +460,7 @@ def test_groupby_groups_datetimeindex(self): for date in dates: result = grouped.get_group(date) data = [[df.loc[date, "A"], df.loc[date, "B"]]] - expected_index = pd.DatetimeIndex([date], name="date", freq="D") + expected_index = DatetimeIndex([date], name="date", freq="D") expected = DataFrame(data, columns=list("AB"), index=expected_index) tm.assert_frame_equal(result, expected) @@ -484,7 +484,7 @@ def test_groupby_groups_datetimeindex_tz(self): ) df["datetime"] = df["datetime"].apply(lambda d: Timestamp(d, tz="US/Pacific")) - exp_idx1 = pd.DatetimeIndex( + exp_idx1 = DatetimeIndex( [ "2011-07-19 07:00:00", "2011-07-19 07:00:00", @@ -508,13 +508,13 @@ def test_groupby_groups_datetimeindex_tz(self): tm.assert_frame_equal(result, expected) # by level - didx = pd.DatetimeIndex(dates, tz="Asia/Tokyo") + didx = DatetimeIndex(dates, tz="Asia/Tokyo") df = DataFrame( {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, index=didx, ) - exp_idx = pd.DatetimeIndex( + exp_idx = DatetimeIndex( ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], tz="Asia/Tokyo", ) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 1aeff7426c33a..d7426a5e3b42e 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1134,7 +1134,7 @@ def test_categorical_and_not_categorical_key(observed): # GH 32494 df_with_categorical = DataFrame( { - "A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]), + "A": Categorical(["a", "b", "a"], categories=["a", "b", "c"]), "B": [1, 2, 3], "C": ["a", "b", "a"], } diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index 150a797169c14..b60ae8819023f 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import PeriodIndex +from pandas import PeriodIndex, Series import pandas._testing as tm @@ -154,7 +154,7 @@ def test_representation_to_series(self): [idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9], ): - result = repr(pd.Series(idx)) + result = repr(Series(idx)) assert result == expected def test_summary(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 045816b3c9513..0c990b0456b5c 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -273,7 +273,7 @@ def test_equals_numeric_other_index_type(self, other): def test_lookups_datetimelike_values(self, vals): # If we have datetime64 or timedelta64 values, make sure they are # wrappped correctly GH#31163 - ser = pd.Series(vals, index=range(3, 6)) + ser = Series(vals, index=range(3, 6)) ser.index = ser.index.astype("float64") expected = vals[1] @@ -642,7 +642,7 @@ def test_range_float_union_dtype(): def test_uint_index_does_not_convert_to_float64(box): # https://github.com/pandas-dev/pandas/issues/28279 # https://github.com/pandas-dev/pandas/issues/28023 - series = pd.Series( + series = Series( [0, 1, 2, 3, 4, 5], index=[ 7606741985629028552, diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 1dfc5b5305008..8a8e2abd17165 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -1,7 +1,7 @@ import pytest import pandas as pd -from pandas import TimedeltaIndex +from pandas import Series, TimedeltaIndex class TestTimedeltaIndexRendering: @@ -62,7 +62,7 @@ def test_representation_to_series(self): for idx, expected in zip( [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] ): - result = repr(pd.Series(idx)) + result = repr(Series(idx)) assert result == expected def test_summary(self): diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 8d7d5d85cbb48..43a31ff1e4b58 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -205,8 +205,8 @@ def test_with_missing_lzma_runtime(): import sys import pytest sys.modules['lzma'] = None - import pandas - df = pandas.DataFrame() + import pandas as pd + df = pd.DataFrame() with pytest.raises(RuntimeError, match='lzma module'): df.to_csv('foo.csv', compression='xz') """ diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 0832724110203..50e7cf9bd8eda 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -5,7 +5,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, Timestamp import pandas._testing as tm from pandas.core.groupby.grouper import Grouper from pandas.core.indexes.datetimes import date_range @@ -306,21 +306,21 @@ def test_groupby_resample_interpolate(): expected_ind = pd.MultiIndex.from_tuples( [ (50, "2018-01-07"), - (50, pd.Timestamp("2018-01-08")), - (50, pd.Timestamp("2018-01-09")), - (50, pd.Timestamp("2018-01-10")), - (50, pd.Timestamp("2018-01-11")), - (50, pd.Timestamp("2018-01-12")), - (50, pd.Timestamp("2018-01-13")), - (50, pd.Timestamp("2018-01-14")), - (50, pd.Timestamp("2018-01-15")), - (50, pd.Timestamp("2018-01-16")), - (50, pd.Timestamp("2018-01-17")), - (50, pd.Timestamp("2018-01-18")), - (50, pd.Timestamp("2018-01-19")), - (50, pd.Timestamp("2018-01-20")), - (50, pd.Timestamp("2018-01-21")), - (60, pd.Timestamp("2018-01-14")), + (50, Timestamp("2018-01-08")), + (50, Timestamp("2018-01-09")), + (50, Timestamp("2018-01-10")), + (50, Timestamp("2018-01-11")), + (50, Timestamp("2018-01-12")), + (50, Timestamp("2018-01-13")), + (50, Timestamp("2018-01-14")), + (50, Timestamp("2018-01-15")), + (50, Timestamp("2018-01-16")), + (50, Timestamp("2018-01-17")), + (50, Timestamp("2018-01-18")), + (50, Timestamp("2018-01-19")), + (50, Timestamp("2018-01-20")), + (50, Timestamp("2018-01-21")), + (60, Timestamp("2018-01-14")), ], names=["volume", "week_starting"], ) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 895de2b748c34..613e7d423d87f 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -98,7 +98,7 @@ def test_examples2(self): pd.merge_asof(trades, quotes, on="time", by="ticker") pd.merge_asof( - trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms") + trades, quotes, on="time", by="ticker", tolerance=Timedelta("2ms") ) expected = pd.DataFrame( @@ -126,7 +126,7 @@ def test_examples2(self): quotes, on="time", by="ticker", - tolerance=pd.Timedelta("10ms"), + tolerance=Timedelta("10ms"), allow_exact_matches=False, ) tm.assert_frame_equal(result, expected) @@ -591,7 +591,7 @@ def test_non_sorted(self): @pytest.mark.parametrize( "tolerance", [Timedelta("1day"), datetime.timedelta(days=1)], - ids=["pd.Timedelta", "datetime.timedelta"], + ids=["Timedelta", "datetime.timedelta"], ) def test_tolerance(self, tolerance): @@ -652,7 +652,7 @@ def test_tolerance_tz(self): "value2": list("ABCDE"), } ) - result = pd.merge_asof(left, right, on="date", tolerance=pd.Timedelta("1 day")) + result = pd.merge_asof(left, right, on="date", tolerance=Timedelta("1 day")) expected = pd.DataFrame( { @@ -698,7 +698,7 @@ def test_index_tolerance(self): left_index=True, right_index=True, by="ticker", - tolerance=pd.Timedelta("1day"), + tolerance=Timedelta("1day"), ) tm.assert_frame_equal(result, expected) @@ -792,7 +792,7 @@ def test_allow_exact_matches_and_tolerance2(self): df2, on="time", allow_exact_matches=False, - tolerance=pd.Timedelta("10ms"), + tolerance=Timedelta("10ms"), ) expected = pd.DataFrame( { @@ -827,7 +827,7 @@ def test_allow_exact_matches_and_tolerance3(self): df2, on="time", allow_exact_matches=False, - tolerance=pd.Timedelta("10ms"), + tolerance=Timedelta("10ms"), ) expected = pd.DataFrame( { @@ -1342,9 +1342,9 @@ def test_merge_index_column_tz(self): def test_left_index_right_index_tolerance(self): # https://github.com/pandas-dev/pandas/issues/35558 - dr1 = pd.date_range( - start="1/1/2020", end="1/20/2020", freq="2D" - ) + pd.Timedelta(seconds=0.4) + dr1 = pd.date_range(start="1/1/2020", end="1/20/2020", freq="2D") + Timedelta( + seconds=0.4 + ) dr2 = pd.date_range(start="1/1/2020", end="2/1/2020") df1 = pd.DataFrame({"val1": "foo"}, index=pd.DatetimeIndex(dr1)) @@ -1358,6 +1358,6 @@ def test_left_index_right_index_tolerance(self): df2, left_index=True, right_index=True, - tolerance=pd.Timedelta(seconds=0.5), + tolerance=Timedelta(seconds=0.5), ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 88087110fc221..1f2adaafbbccd 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -487,7 +487,7 @@ def test_categorical_assigning_ops(): def test_getitem_categorical_str(): # GH#31765 - ser = Series(range(5), index=pd.Categorical(["a", "b", "c", "a", "b"])) + ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"])) result = ser["a"] expected = ser.iloc[[0, 3]] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 183d2814920e4..21c7477918d02 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -90,14 +90,14 @@ def test_empty_df_expanding(expander): def test_missing_minp_zero(): # https://github.com/pandas-dev/pandas/pull/18921 # minp=0 - x = pd.Series([np.nan]) + x = Series([np.nan]) result = x.expanding(min_periods=0).sum() - expected = pd.Series([0.0]) + expected = Series([0.0]) tm.assert_series_equal(result, expected) # minp=1 result = x.expanding(min_periods=1).sum() - expected = pd.Series([np.nan]) + expected = Series([np.nan]) tm.assert_series_equal(result, expected) @@ -252,6 +252,6 @@ def test_expanding_sem(constructor): obj = getattr(pd, constructor)([0, 1, 2]) result = obj.expanding().sem() if isinstance(result, DataFrame): - result = pd.Series(result[0].values) - expected = pd.Series([np.nan] + [0.707107] * 2) + result = Series(result[0].values) + expected = Series([np.nan] + [0.707107] * 2) tm.assert_series_equal(result, expected) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py new file mode 100644 index 0000000000000..ab32c2a073836 --- /dev/null +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -0,0 +1,32 @@ +import argparse +import re + +PATTERN = r"(? Date: Fri, 6 Nov 2020 08:09:01 +0000 Subject: [PATCH 02/15] doc --- scripts/check_for_inconsistent_pandas_namespace.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index ab32c2a073836..3148ba7c1b4bd 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -1,3 +1,11 @@ +""" +Check that test suite file doesn't contain both of e.g. ``Series`` and ``pd.Series``. + +This is meant to be run as a pre-commit hook - run it manually, you can do: + + pre-commit run inconsistent-namespace-usage --all-files +""" + import argparse import re From 7e814dbff2d123488fc5367e48e3dbdb32b3b900 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 6 Nov 2020 08:16:07 +0000 Subject: [PATCH 03/15] typos --- scripts/check_for_inconsistent_pandas_namespace.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index 3148ba7c1b4bd..fa7b7adb50b24 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -1,7 +1,10 @@ """ -Check that test suite file doesn't contain both of e.g. ``Series`` and ``pd.Series``. +Check that test suite file doesn't use the pandas namespace inconsistently. -This is meant to be run as a pre-commit hook - run it manually, you can do: +We check for cases of ``Series`` and ``pd.Series`` appearing in the same file +(likewise for some other common classes). + +This is meant to be run as a pre-commit hook - to run it manually, you can do: pre-commit run inconsistent-namespace-usage --all-files """ From 326229a1aa03354a2327978161f99ed719517562 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 6 Nov 2020 08:21:45 +0000 Subject: [PATCH 04/15] verbose regex --- scripts/check_for_inconsistent_pandas_namespace.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index fa7b7adb50b24..ea6ebf87faf81 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -12,7 +12,13 @@ import argparse import re -PATTERN = r"(? Date: Fri, 6 Nov 2020 08:23:08 +0000 Subject: [PATCH 05/15] use verbose flag --- scripts/check_for_inconsistent_pandas_namespace.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index ea6ebf87faf81..f2ca90c31a5c9 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -12,8 +12,7 @@ import argparse import re -PATTERN = r""" - (?x) # enable verbose regex +PATTERN = r"""(?x) (? Date: Fri, 6 Nov 2020 08:23:26 +0000 Subject: [PATCH 06/15] use verbose flag --- scripts/check_for_inconsistent_pandas_namespace.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index f2ca90c31a5c9..1684e7f0a806a 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -12,7 +12,7 @@ import argparse import re -PATTERN = r"""(?x) +PATTERN = r""" (? Date: Fri, 6 Nov 2020 16:38:10 +0000 Subject: [PATCH 07/15] match both directions --- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/groupby/test_missing.py | 4 ++-- pandas/tests/io/json/test_pandas.py | 6 +++--- pandas/tests/series/indexing/test_getitem.py | 8 ++++---- .../check_for_inconsistent_pandas_namespace.py | 18 +++++++++++++----- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 1c82d6f9a26ff..438a22c99a4eb 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -862,7 +862,7 @@ def test_infer_dtype_datetime_with_na(self, na_value, time_stamp): @pytest.mark.parametrize( "arr", [ - np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]), + np.array([Timedelta("1 days"), Timedelta("2 days")]), np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object), np.array([timedelta(1), timedelta(2)]), ], diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py index 2c2147795bc07..580148cb2a3a3 100644 --- a/pandas/tests/groupby/test_missing.py +++ b/pandas/tests/groupby/test_missing.py @@ -11,11 +11,11 @@ def test_groupby_column_index_name_lost_fill_funcs(func): # GH: 29764 groupby loses index sometimes df = DataFrame( [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], - columns=pd.Index(["type", "a", "b"], name="idx"), + columns=Index(["type", "a", "b"], name="idx"), ) df_grouped = df.groupby(["type"])[["a", "b"]] result = getattr(df_grouped, func)().columns - expected = pd.Index(["a", "b"], name="idx") + expected = Index(["a", "b"], name="idx") tm.assert_index_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 47b7bd0983305..3e5f9d481ce48 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -436,7 +436,7 @@ def test_frame_mixedtype_orient(self): # GH10289 def test_v12_compat(self, datapath): dti = pd.date_range("2000-01-03", "2000-01-07") # freq doesnt roundtrip - dti = pd.DatetimeIndex(np.asarray(dti), freq=None) + dti = DatetimeIndex(np.asarray(dti), freq=None) df = DataFrame( [ [1.56808523, 0.65727391, 1.81021139, -0.17251653], @@ -466,7 +466,7 @@ def test_v12_compat(self, datapath): def test_blocks_compat_GH9037(self): index = pd.date_range("20000101", periods=10, freq="H") # freq doesnt round-trip - index = pd.DatetimeIndex(list(index), freq=None) + index = DatetimeIndex(list(index), freq=None) df_mixed = DataFrame( dict( @@ -1189,7 +1189,7 @@ def test_tz_range_is_utc(self, tz_range): ) assert dumps(tz_range, iso_dates=True) == exp - dti = pd.DatetimeIndex(tz_range) + dti = DatetimeIndex(tz_range) assert dumps(dti, iso_dates=True) == exp df = DataFrame({"DT": dti}) result = dumps(df, iso_dates=True) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 71bcce12796f5..7b794668803c3 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -188,7 +188,7 @@ def test_getitem_slice_date(self, slc, positions): class TestSeriesGetitemListLike: - @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) + @pytest.mark.parametrize("box", [list, np.array, Index, pd.Series]) def test_getitem_no_matches(self, box): # GH#33462 we expect the same behavior for list/ndarray/Index/Series ser = Series(["A", "B"]) @@ -212,7 +212,7 @@ def test_getitem_intlist_intindex_periodvalues(self): tm.assert_series_equal(result, exp) assert result.dtype == "Period[D]" - @pytest.mark.parametrize("box", [list, np.array, pd.Index]) + @pytest.mark.parametrize("box", [list, np.array, Index]) def test_getitem_intlist_intervalindex_non_int(self, box): # GH#33404 fall back to positional since ints are unambiguous dti = date_range("2000-01-03", periods=3)._with_freq(None) @@ -224,11 +224,11 @@ def test_getitem_intlist_intervalindex_non_int(self, box): result = ser[key] tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("box", [list, np.array, pd.Index]) + @pytest.mark.parametrize("box", [list, np.array, Index]) @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64]) def test_getitem_intlist_multiindex_numeric_level(self, dtype, box): # GH#33404 do _not_ fall back to positional since ints are ambiguous - idx = pd.Index(range(4)).astype(dtype) + idx = Index(range(4)).astype(dtype) dti = date_range("2000-01-03", periods=3) mi = pd.MultiIndex.from_product([idx, dti]) ser = Series(range(len(mi))[::-1], index=mi) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index 1684e7f0a806a..426e3c6257f86 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -13,10 +13,18 @@ import re PATTERN = r""" - (? Date: Fri, 6 Nov 2020 17:04:33 +0000 Subject: [PATCH 08/15] add test --- .../test_inconsistent_namespace_check.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/tests/test_inconsistent_namespace_check.py diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py new file mode 100644 index 0000000000000..03b6fd8fae19d --- /dev/null +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from pathlib import Path +import subprocess + +import pytest + +FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" +FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" +FILE_2 = "cat_0 = Categorical()\ncat_1 = Categorical()" +FILE_3 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()" + + +@pytest.mark.parametrize("content", [FILE_0, FILE_1]) +def test_inconsistent_usage(tmpdir, content: str) -> None: + tmpfile = Path(tmpdir / "tmpfile.py") + tmpfile.touch() + tmpfile.write_text(content) + output = subprocess.run( + ["python", "scripts/check_for_inconsistent_pandas_namespace.py", str(tmpfile)], + stderr=subprocess.PIPE, + ) + + # check stderr + result = output.stderr.decode() + expected = "Found both `pd.Categorical` and `Categorical` in" + assert expected in result + + # check return code + result = output.returncode + expected = 1 + assert result == expected + + +@pytest.mark.parametrize("content", [FILE_2, FILE_3]) +def test_consistent_usage(tmpdir, content: str) -> None: + tmpfile = Path(tmpdir / "tmpfile.py") + tmpfile.touch() + tmpfile.write_text(content) + output = subprocess.run( + ["python", "scripts/check_for_inconsistent_pandas_namespace.py", str(tmpfile)], + stderr=subprocess.PIPE, + ) + + # check stderr + result = output.stderr.decode() + expected = "" + assert expected == result + + # check return code + result = output.returncode + expected = 0 + assert result == expected From 6f51fc0ee249ce1c05af85480e8ddc75c35921dc Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 6 Nov 2020 17:05:08 +0000 Subject: [PATCH 09/15] don't import annotations from future --- scripts/tests/test_inconsistent_namespace_check.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 03b6fd8fae19d..4b158b531ddfa 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from pathlib import Path import subprocess From 2209a75f553a0c7680f88d2908d4dc18ba01379f Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 6 Nov 2020 17:11:54 +0000 Subject: [PATCH 10/15] update extra couple of cases --- pandas/tests/indexes/datetimes/test_shift.py | 16 ++++++++-------- pandas/tests/series/test_repr.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_shift.py b/pandas/tests/indexes/datetimes/test_shift.py index a2a673ed5d9e0..3c202005f7933 100644 --- a/pandas/tests/indexes/datetimes/test_shift.py +++ b/pandas/tests/indexes/datetimes/test_shift.py @@ -20,25 +20,25 @@ class TestDatetimeIndexShift: def test_dti_shift_tzaware(self, tz_naive_fixture): # GH#9903 tz = tz_naive_fixture - idx = pd.DatetimeIndex([], name="xxx", tz=tz) + idx = DatetimeIndex([], name="xxx", tz=tz) tm.assert_index_equal(idx.shift(0, freq="H"), idx) tm.assert_index_equal(idx.shift(3, freq="H"), idx) - idx = pd.DatetimeIndex( + idx = DatetimeIndex( ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], name="xxx", tz=tz, freq="H", ) tm.assert_index_equal(idx.shift(0, freq="H"), idx) - exp = pd.DatetimeIndex( + exp = DatetimeIndex( ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], name="xxx", tz=tz, freq="H", ) tm.assert_index_equal(idx.shift(3, freq="H"), exp) - exp = pd.DatetimeIndex( + exp = DatetimeIndex( ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], name="xxx", tz=tz, @@ -51,21 +51,21 @@ def test_dti_shift_freqs(self): # GH#8083 drange = pd.date_range("20130101", periods=5) result = drange.shift(1) - expected = pd.DatetimeIndex( + expected = DatetimeIndex( ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], freq="D", ) tm.assert_index_equal(result, expected) result = drange.shift(-1) - expected = pd.DatetimeIndex( + expected = DatetimeIndex( ["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"], freq="D", ) tm.assert_index_equal(result, expected) result = drange.shift(3, freq="2D") - expected = pd.DatetimeIndex( + expected = DatetimeIndex( ["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"], freq="D", ) @@ -84,7 +84,7 @@ def test_dti_shift_int(self): def test_dti_shift_no_freq(self): # GH#19147 - dti = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) + dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): dti.shift(2) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 7325505ce233b..75e7f8a17eda3 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -251,7 +251,7 @@ class County: def __repr__(self) -> str: return self.name + ", " + self.state - cat = pd.Categorical([County() for _ in range(61)]) + cat = Categorical([County() for _ in range(61)]) idx = Index(cat) ser = idx.to_series() From 01bf754d62219dca40563f4243834e5b17b1aafe Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Fri, 6 Nov 2020 17:14:02 +0000 Subject: [PATCH 11/15] :truck: rename --- scripts/tests/test_inconsistent_namespace_check.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 4b158b531ddfa..430caaff9b2ab 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -3,13 +3,13 @@ import pytest -FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" -FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" -FILE_2 = "cat_0 = Categorical()\ncat_1 = Categorical()" -FILE_3 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()" +BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" +BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" +GOOD_FILE_0 = "cat_0 = Categorical()\ncat_1 = Categorical()" +GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()" -@pytest.mark.parametrize("content", [FILE_0, FILE_1]) +@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) def test_inconsistent_usage(tmpdir, content: str) -> None: tmpfile = Path(tmpdir / "tmpfile.py") tmpfile.touch() @@ -30,7 +30,7 @@ def test_inconsistent_usage(tmpdir, content: str) -> None: assert result == expected -@pytest.mark.parametrize("content", [FILE_2, FILE_3]) +@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) def test_consistent_usage(tmpdir, content: str) -> None: tmpfile = Path(tmpdir / "tmpfile.py") tmpfile.touch() From 520e01cf21a2533bcb7f910fc04c56ab3c8ae5a3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 7 Nov 2020 08:29:28 +0000 Subject: [PATCH 12/15] typing --- .../check_for_inconsistent_pandas_namespace.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index 426e3c6257f86..30fbddc6c860d 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -11,6 +11,7 @@ import argparse import re +from typing import Optional, Sequence PATTERN = r""" ( @@ -37,11 +38,13 @@ "DatetimeIndex", "Categorical", ) +ERROR_MESSAGE = "Found both `pd.{class_name}` and `{class_name}` in {path}" -if __name__ == "__main__": + +def main(argv: Optional[Sequence[str]] = None) -> None: parser = argparse.ArgumentParser() parser.add_argument("paths", nargs="*") - args = parser.parse_args() + args = parser.parse_args(argv) for class_name in CLASS_NAMES: pattern = re.compile( @@ -52,6 +55,8 @@ with open(path, "rb") as f: contents = f.read() match = pattern.search(contents) - assert ( - match is None - ), f"Found both `pd.{class_name}` and `{class_name}` in {path}" + assert match is None, ERROR_MESSAGE.format(class_name=class_name, path=path) + + +if __name__ == "__main__": + main() From cb81aa565e67d8405a8d4ea60f90421ef4ddbc23 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 7 Nov 2020 08:37:35 +0000 Subject: [PATCH 13/15] don't use subprocess --- .../test_inconsistent_namespace_check.py | 35 ++++--------------- 1 file changed, 6 insertions(+), 29 deletions(-) diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 430caaff9b2ab..4a127b7336a52 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -1,8 +1,9 @@ from pathlib import Path -import subprocess import pytest +from scripts.check_for_inconsistent_pandas_namespace import main + BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" GOOD_FILE_0 = "cat_0 = Categorical()\ncat_1 = Categorical()" @@ -14,20 +15,9 @@ def test_inconsistent_usage(tmpdir, content: str) -> None: tmpfile = Path(tmpdir / "tmpfile.py") tmpfile.touch() tmpfile.write_text(content) - output = subprocess.run( - ["python", "scripts/check_for_inconsistent_pandas_namespace.py", str(tmpfile)], - stderr=subprocess.PIPE, - ) - - # check stderr - result = output.stderr.decode() - expected = "Found both `pd.Categorical` and `Categorical` in" - assert expected in result - - # check return code - result = output.returncode - expected = 1 - assert result == expected + msg = fr"Found both `pd\.Categorical` and `Categorical` in {str(tmpfile)}" + with pytest.raises(AssertionError, match=msg): + main((str(tmpfile),)) @pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) @@ -35,17 +25,4 @@ def test_consistent_usage(tmpdir, content: str) -> None: tmpfile = Path(tmpdir / "tmpfile.py") tmpfile.touch() tmpfile.write_text(content) - output = subprocess.run( - ["python", "scripts/check_for_inconsistent_pandas_namespace.py", str(tmpfile)], - stderr=subprocess.PIPE, - ) - - # check stderr - result = output.stderr.decode() - expected = "" - assert expected == result - - # check return code - result = output.returncode - expected = 0 - assert result == expected + main((str(tmpfile),)) # Should not raise. From e50c70e326494c47f03741ca47de5956362d6c20 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 7 Nov 2020 08:38:59 +0000 Subject: [PATCH 14/15] don't type tests --- scripts/tests/test_inconsistent_namespace_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 4a127b7336a52..37e6d288d9341 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -11,7 +11,7 @@ @pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) -def test_inconsistent_usage(tmpdir, content: str) -> None: +def test_inconsistent_usage(tmpdir, content): tmpfile = Path(tmpdir / "tmpfile.py") tmpfile.touch() tmpfile.write_text(content) @@ -21,7 +21,7 @@ def test_inconsistent_usage(tmpdir, content: str) -> None: @pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) -def test_consistent_usage(tmpdir, content: str) -> None: +def test_consistent_usage(tmpdir, content): tmpfile = Path(tmpdir / "tmpfile.py") tmpfile.touch() tmpfile.write_text(content) From 046d61c433e46b752d860751b699f3f268014ae3 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sat, 7 Nov 2020 09:06:03 +0000 Subject: [PATCH 15/15] use pathlib --- scripts/check_for_inconsistent_pandas_namespace.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index 30fbddc6c860d..4b4515cdf7e11 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -10,6 +10,7 @@ """ import argparse +from pathlib import Path import re from typing import Optional, Sequence @@ -43,7 +44,7 @@ def main(argv: Optional[Sequence[str]] = None) -> None: parser = argparse.ArgumentParser() - parser.add_argument("paths", nargs="*") + parser.add_argument("paths", nargs="*", type=Path) args = parser.parse_args(argv) for class_name in CLASS_NAMES: @@ -52,10 +53,11 @@ def main(argv: Optional[Sequence[str]] = None) -> None: flags=re.MULTILINE | re.DOTALL | re.VERBOSE, ) for path in args.paths: - with open(path, "rb") as f: - contents = f.read() + contents = path.read_bytes() match = pattern.search(contents) - assert match is None, ERROR_MESSAGE.format(class_name=class_name, path=path) + assert match is None, ERROR_MESSAGE.format( + class_name=class_name, path=str(path) + ) if __name__ == "__main__":