Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move inconsistent namespace check to pre-commit, fixup more files #37662

Merged
merged 17 commits into from
Nov 8, 2020
Merged
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ repos:
entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
types: [python]
exclude: ^(asv_bench|pandas/tests|doc)/
- id: inconsistent-namespace-usage
name: 'Check for inconsistent use of pandas namespace in tests'
entry: python scripts/check_for_inconsistent_pandas_namespace.py
language: python
types: [python]
files: ^pandas/tests/
- id: FrameOrSeriesUnion
name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
Expand Down
13 changes: 0 additions & 13 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ function invgrep {
return $((! $EXIT_STATUS))
}

function check_namespace {
local -r CLASS=${1}
grep -R -l --include "*.py" " ${CLASS}(" pandas/tests | xargs grep -n "pd\.${CLASS}[(\.]"
test $? -gt 0
}

if [[ "$GITHUB_ACTIONS" == "true" ]]; then
FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s"
INVGREP_PREPEND="##[error]"
Expand Down Expand Up @@ -120,13 +114,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG
invgrep -R --include=*.{py,pyx} '!r}' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

# -------------------------------------------------------------------------
MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do
check_namespace ${class}
RET=$(($RET + $?))
done
echo $MSG "DONE"
fi

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ def test_infer_dtype_datetime_with_na(self, na_value, time_stamp):
@pytest.mark.parametrize(
"arr",
[
np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]),
np.array([Timedelta("1 days"), Timedelta("2 days")]),
np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object),
np.array([timedelta(1), timedelta(2)]),
],
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def test_cast_category_to_extension_dtype(self, expected):
(
"datetime64[ns, MET]",
pd.DatetimeIndex(
[pd.Timestamp("2015-01-01 00:00:00+0100", tz="MET")]
[Timestamp("2015-01-01 00:00:00+0100", tz="MET")]
).array,
),
],
Expand Down Expand Up @@ -254,7 +254,7 @@ def _compare_other(self, s, data, op_name, other):

@pytest.mark.parametrize(
"categories",
[["a", "b"], [0, 1], [pd.Timestamp("2019"), pd.Timestamp("2020")]],
[["a", "b"], [0, 1], [Timestamp("2019"), Timestamp("2020")]],
)
def test_not_equal_with_na(self, categories):
# https://github.com/pandas-dev/pandas/issues/32276
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def test_agg_over_numpy_arrays():
result = df.groupby("category").agg(sum)

expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]]
expected_index = pd.Index([1, 2], name="category")
expected_index = Index([1, 2], name="category")
expected_column = ["arraydata"]
expected = DataFrame(expected_data, index=expected_index, columns=expected_column)

Expand Down Expand Up @@ -497,7 +497,7 @@ def test_sum_uint64_overflow():
df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object)
df = df + 9223372036854775807

index = pd.Index(
index = Index(
[9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64
)
expected = DataFrame(
Expand Down Expand Up @@ -596,7 +596,7 @@ def test_agg_lambda_with_timezone():
result = df.groupby("tag").agg({"date": lambda e: e.head(1)})
expected = DataFrame(
[pd.Timestamp("2018-01-01", tz="UTC")],
index=pd.Index([1], name="tag"),
index=Index([1], name="tag"),
columns=["date"],
)
tm.assert_frame_equal(result, expected)
Expand Down
11 changes: 5 additions & 6 deletions pandas/tests/groupby/test_counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import numpy as np
import pytest

import pandas as pd
from pandas import (
DataFrame,
Index,
Expand Down Expand Up @@ -260,7 +259,7 @@ def test_groupby_timedelta_cython_count():
df = DataFrame(
{"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")}
)
expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt")
expected = Series([2, 2], index=Index(["a", "b"], name="g"), name="delt")
result = df.groupby("g").delt.count()
tm.assert_series_equal(expected, result)

Expand Down Expand Up @@ -317,12 +316,12 @@ def test_count_non_nulls():
def test_count_object():
df = DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3})
result = df.groupby("c").a.count()
expected = Series([3, 3], index=pd.Index([2, 3], name="c"), name="a")
expected = Series([3, 3], index=Index([2, 3], name="c"), name="a")
tm.assert_series_equal(result, expected)

df = DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3})
result = df.groupby("c").a.count()
expected = Series([1, 3], index=pd.Index([2, 3], name="c"), name="a")
expected = Series([1, 3], index=Index([2, 3], name="c"), name="a")
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -354,7 +353,7 @@ def test_lower_int_prec_count():
)
result = df.groupby("grp").count()
expected = DataFrame(
{"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp")
{"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=Index(list("ab"), name="grp")
)
tm.assert_frame_equal(result, expected)

Expand All @@ -374,5 +373,5 @@ def __eq__(self, other):

df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)})
result = df.groupby("grp").count()
expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp"))
expected = DataFrame({"a": [2, 2]}, index=Index(list("ab"), name="grp"))
tm.assert_frame_equal(result, expected)
32 changes: 16 additions & 16 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,12 +614,12 @@ def test_list_grouper_with_nat(self):

# Grouper in a list grouping
result = df.groupby([grouper])
expected = {pd.Timestamp("2011-01-01"): Index(list(range(364)))}
expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
tm.assert_dict_equal(result.groups, expected)

# Test case without a list
result = df.groupby(grouper)
expected = {pd.Timestamp("2011-01-01"): 365}
expected = {Timestamp("2011-01-01"): 365}
tm.assert_dict_equal(result.groups, expected)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -938,12 +938,12 @@ def test_groupby_with_small_elem(self):
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
assert len(grouped.groups) == 2
assert grouped.ngroups == 2
assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups
assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups
assert (Timestamp("2014-09-30"), "start") in grouped.groups
assert (Timestamp("2013-10-31"), "start") in grouped.groups

res = grouped.get_group((pd.Timestamp("2014-09-30"), "start"))
res = grouped.get_group((Timestamp("2014-09-30"), "start"))
tm.assert_frame_equal(res, df.iloc[[0], :])
res = grouped.get_group((pd.Timestamp("2013-10-31"), "start"))
res = grouped.get_group((Timestamp("2013-10-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[1], :])

df = DataFrame(
Expand All @@ -953,12 +953,12 @@ def test_groupby_with_small_elem(self):
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
assert len(grouped.groups) == 2
assert grouped.ngroups == 2
assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups
assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups
assert (Timestamp("2014-09-30"), "start") in grouped.groups
assert (Timestamp("2013-10-31"), "start") in grouped.groups

res = grouped.get_group((pd.Timestamp("2014-09-30"), "start"))
res = grouped.get_group((Timestamp("2014-09-30"), "start"))
tm.assert_frame_equal(res, df.iloc[[0, 2], :])
res = grouped.get_group((pd.Timestamp("2013-10-31"), "start"))
res = grouped.get_group((Timestamp("2013-10-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[1], :])

# length=3
Expand All @@ -969,15 +969,15 @@ def test_groupby_with_small_elem(self):
grouped = df.groupby([pd.Grouper(freq="M"), "event"])
assert len(grouped.groups) == 3
assert grouped.ngroups == 3
assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups
assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups
assert (pd.Timestamp("2014-08-31"), "start") in grouped.groups
assert (Timestamp("2014-09-30"), "start") in grouped.groups
assert (Timestamp("2013-10-31"), "start") in grouped.groups
assert (Timestamp("2014-08-31"), "start") in grouped.groups

res = grouped.get_group((pd.Timestamp("2014-09-30"), "start"))
res = grouped.get_group((Timestamp("2014-09-30"), "start"))
tm.assert_frame_equal(res, df.iloc[[0], :])
res = grouped.get_group((pd.Timestamp("2013-10-31"), "start"))
res = grouped.get_group((Timestamp("2013-10-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[1], :])
res = grouped.get_group((pd.Timestamp("2014-08-31"), "start"))
res = grouped.get_group((Timestamp("2014-08-31"), "start"))
tm.assert_frame_equal(res, df.iloc[[2], :])

def test_grouping_string_repr(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ def test_groupby_column_index_name_lost_fill_funcs(func):
# GH: 29764 groupby loses index sometimes
df = DataFrame(
[[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]],
columns=pd.Index(["type", "a", "b"], name="idx"),
columns=Index(["type", "a", "b"], name="idx"),
)
df_grouped = df.groupby(["type"])[["a", "b"]]
result = getattr(df_grouped, func)().columns
expected = pd.Index(["a", "b"], name="idx")
expected = Index(["a", "b"], name="idx")
tm.assert_index_equal(result, expected)


Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def test_quantile_missing_group_values_correct_results(
df = DataFrame({"key": key, "val": val})

expected = DataFrame(
expected_val, index=pd.Index(expected_key, name="key"), columns=["val"]
expected_val, index=Index(expected_key, name="key"), columns=["val"]
)

grp = df.groupby("key")
Expand Down Expand Up @@ -223,7 +223,7 @@ def test_groupby_quantile_nullable_array(values, q):
idx = pd.MultiIndex.from_product((["x", "y"], q), names=["a", None])
true_quantiles = [0.0, 0.5, 1.0]
else:
idx = pd.Index(["x", "y"], name="a")
idx = Index(["x", "y"], name="a")
true_quantiles = [0.5]

expected = pd.Series(true_quantiles * 2, index=idx, name="b")
Expand Down Expand Up @@ -251,6 +251,6 @@ def test_groupby_timedelta_quantile():
pd.Timedelta("0 days 00:00:02.990000"),
]
},
index=pd.Index([1, 2], name="group"),
index=Index([1, 2], name="group"),
)
tm.assert_frame_equal(result, expected)
10 changes: 5 additions & 5 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,15 +452,15 @@ def test_groupby_groups_datetimeindex(self):
result = df.groupby(level="date").groups
dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"]
expected = {
Timestamp(date): pd.DatetimeIndex([date], name="date") for date in dates
Timestamp(date): DatetimeIndex([date], name="date") for date in dates
}
tm.assert_dict_equal(result, expected)

grouped = df.groupby(level="date")
for date in dates:
result = grouped.get_group(date)
data = [[df.loc[date, "A"], df.loc[date, "B"]]]
expected_index = pd.DatetimeIndex([date], name="date", freq="D")
expected_index = DatetimeIndex([date], name="date", freq="D")
expected = DataFrame(data, columns=list("AB"), index=expected_index)
tm.assert_frame_equal(result, expected)

Expand All @@ -484,7 +484,7 @@ def test_groupby_groups_datetimeindex_tz(self):
)
df["datetime"] = df["datetime"].apply(lambda d: Timestamp(d, tz="US/Pacific"))

exp_idx1 = pd.DatetimeIndex(
exp_idx1 = DatetimeIndex(
[
"2011-07-19 07:00:00",
"2011-07-19 07:00:00",
Expand All @@ -508,13 +508,13 @@ def test_groupby_groups_datetimeindex_tz(self):
tm.assert_frame_equal(result, expected)

# by level
didx = pd.DatetimeIndex(dates, tz="Asia/Tokyo")
didx = DatetimeIndex(dates, tz="Asia/Tokyo")
df = DataFrame(
{"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]},
index=didx,
)

exp_idx = pd.DatetimeIndex(
exp_idx = DatetimeIndex(
["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"],
tz="Asia/Tokyo",
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ def test_categorical_and_not_categorical_key(observed):
# GH 32494
df_with_categorical = DataFrame(
{
"A": pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]),
"A": Categorical(["a", "b", "a"], categories=["a", "b", "c"]),
"B": [1, 2, 3],
"C": ["a", "b", "a"],
}
Expand Down
16 changes: 8 additions & 8 deletions pandas/tests/indexes/datetimes/test_shift.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,25 @@ class TestDatetimeIndexShift:
def test_dti_shift_tzaware(self, tz_naive_fixture):
# GH#9903
tz = tz_naive_fixture
idx = pd.DatetimeIndex([], name="xxx", tz=tz)
idx = DatetimeIndex([], name="xxx", tz=tz)
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
tm.assert_index_equal(idx.shift(3, freq="H"), idx)

idx = pd.DatetimeIndex(
idx = DatetimeIndex(
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
name="xxx",
tz=tz,
freq="H",
)
tm.assert_index_equal(idx.shift(0, freq="H"), idx)
exp = pd.DatetimeIndex(
exp = DatetimeIndex(
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
name="xxx",
tz=tz,
freq="H",
)
tm.assert_index_equal(idx.shift(3, freq="H"), exp)
exp = pd.DatetimeIndex(
exp = DatetimeIndex(
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
name="xxx",
tz=tz,
Expand All @@ -51,21 +51,21 @@ def test_dti_shift_freqs(self):
# GH#8083
drange = pd.date_range("20130101", periods=5)
result = drange.shift(1)
expected = pd.DatetimeIndex(
expected = DatetimeIndex(
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
freq="D",
)
tm.assert_index_equal(result, expected)

result = drange.shift(-1)
expected = pd.DatetimeIndex(
expected = DatetimeIndex(
["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"],
freq="D",
)
tm.assert_index_equal(result, expected)

result = drange.shift(3, freq="2D")
expected = pd.DatetimeIndex(
expected = DatetimeIndex(
["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"],
freq="D",
)
Expand All @@ -84,7 +84,7 @@ def test_dti_shift_int(self):

def test_dti_shift_no_freq(self):
# GH#19147
dti = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"):
dti.shift(2)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/period/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

import pandas as pd
from pandas import PeriodIndex
from pandas import PeriodIndex, Series
import pandas._testing as tm


Expand Down Expand Up @@ -154,7 +154,7 @@ def test_representation_to_series(self):
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
):
result = repr(pd.Series(idx))
result = repr(Series(idx))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on line 117 we have

exp1 = """Series([], dtype: period[D])"""

so this is to be consistent with that

assert result == expected

def test_summary(self):
Expand Down
Loading