Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: fix pandas-stubs issues #2008

Merged
merged 17 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions narwhals/_dask/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@


def n_unique() -> dd.Aggregation:
def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> int:
def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> pd.Series[Any]:
Copy link
Member

@dangotbanned dangotbanned Feb 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm getting quite a lot of warnings for pd.core from pyright

Screenshot

image

I think I resolved this before in altair by finding a more public import path

return s.nunique(dropna=False) # type: ignore[no-any-return]

def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> int:
def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> pd.Series[Any]:
return s0.sum() # type: ignore[no-any-return]

return dd.Aggregation(
Expand Down
18 changes: 6 additions & 12 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,12 @@ def nth(self: Self, *column_indices: int) -> DaskExpr:

def lit(self: Self, value: Any, dtype: DType | None) -> DaskExpr:
def func(df: DaskLazyFrame) -> list[dx.Series]:
return [
dd.from_pandas(
pd.Series(
[value],
dtype=narwhals_to_native_dtype(dtype, self._version)
if dtype is not None
else None,
name="literal",
),
npartitions=df._native_frame.npartitions,
)
]
if dtype is not None:
native_dtype = narwhals_to_native_dtype(dtype, self._version)
s = pd.Series([value], dtype=native_dtype)
else:
s = pd.Series([value])
return [dd.from_pandas(s, npartitions=df._native_frame.npartitions)]

return DaskExpr(
func,
Expand Down
20 changes: 17 additions & 3 deletions narwhals/_pandas_like/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Iterable
from typing import Sequence
from typing import TypeVar
from typing import cast

import pandas as pd

Expand Down Expand Up @@ -511,7 +512,11 @@ def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBacken
if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype):
return "pyarrow"
with suppress(AttributeError):
if isinstance(dtype, pd.core.dtypes.dtypes.BaseMaskedDtype):
sentinel = object()
if (
isinstance(dtype, pd.api.extensions.ExtensionDtype)
and getattr(dtype, "base", sentinel) is None
):
Comment on lines +515 to +519
Copy link
Member

@dangotbanned dangotbanned Feb 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to avoiding pd.core (#2008 (comment))

Available since 0.23.0

Pretty sure we don't need with suppress(AttributeError): with this?
@MarcoGorelli

return "numpy_nullable"
return None

Expand Down Expand Up @@ -722,8 +727,17 @@ def int_dtype_mapper(dtype: Any) -> str:
def convert_str_slice_to_int_slice(
str_slice: slice, columns: pd.Index
) -> tuple[int | None, int | None, int | None]:
start = columns.get_loc(str_slice.start) if str_slice.start is not None else None
stop = columns.get_loc(str_slice.stop) + 1 if str_slice.stop is not None else None
# We can safely cast to int because we know that `columns` doesn't contain duplicates.
start = (
cast(int, columns.get_loc(str_slice.start))
if str_slice.start is not None
else None
)
stop = (
cast(int, columns.get_loc(str_slice.stop)) + 1
if str_slice.stop is not None
else None
)
step = str_slice.step
return (start, stop, step)

Expand Down
2 changes: 1 addition & 1 deletion narwhals/_polars/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def hist(
version=self._version,
)
elif (self._backend_version < (1, 15)) and self._native_series.count() < 1:
data_dict: dict[str, list[int | float] | pl.Series | pl.Expr]
data_dict: dict[str, Sequence[Any] | pl.Series]
if bins is not None:
data_dict = {
"breakpoint": bins[1:],
Expand Down
2 changes: 1 addition & 1 deletion narwhals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -977,7 +977,7 @@ def is_ordered_categorical(series: Series[Any]) -> bool:
if is_polars_series(native_series):
return native_series.dtype.ordering == "physical" # type: ignore[attr-defined, no-any-return]
if is_pandas_series(native_series):
return native_series.cat.ordered # type: ignore[no-any-return]
return bool(native_series.cat.ordered)
if is_modin_series(native_series): # pragma: no cover
return native_series.cat.ordered # type: ignore[no-any-return]
if is_cudf_series(native_series): # pragma: no cover
Expand Down
4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ tests = [
"typing_extensions",
]
typing = [
"pandas-stubs",
"typing_extensions",
"mypy~=1.15.0",
]
Expand Down Expand Up @@ -226,16 +227,13 @@ pretty = true

[[tool.mypy.overrides]]
module = [
# TODO: enable step by step when it makes sense
# e.g. the pandas API is just too inconsistent for type hinting to be useful.
"cudf.*",
"dask.*",
"dask_expr.*",
"duckdb.*",
"ibis.*",
"modin.*",
"numpy.*",
"pandas.*",
"pyspark.*",
"sklearn.*",
"sqlframe.*",
Expand Down
4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def dask_lazy_p2_constructor(obj: dict[str, list[Any]]) -> IntoFrame: # pragma:
return dd.from_dict(obj, npartitions=2) # type: ignore[no-any-return]


def pyarrow_table_constructor(obj: dict[str, list[Any]]) -> IntoDataFrame:
def pyarrow_table_constructor(obj: dict[str, Any]) -> IntoDataFrame:
return pa.table(obj) # type: ignore[no-any-return]


Expand Down Expand Up @@ -227,7 +227,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
if x not in GPU_CONSTRUCTORS and x != "modin" # too slow
]
else: # pragma: no cover
selected_constructors = metafunc.config.getoption("constructors").split(",")
selected_constructors = metafunc.config.getoption("constructors").split(",") # pyright: ignore[reportAttributeAccessIssue]

eager_constructors: list[Callable[[Any], IntoDataFrame]] = []
eager_constructors_ids: list[str] = []
Expand Down
5 changes: 3 additions & 2 deletions tests/dependencies/is_into_dataframe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Mapping

import numpy as np
import pandas as pd
Expand All @@ -16,7 +17,7 @@


class DictDataFrame:
def __init__(self: Self, data: dict[str, list[Any]]) -> None:
def __init__(self: Self, data: Mapping[str, Any]) -> None:
self._data = data

def __len__(self) -> int: # pragma: no cover
Expand All @@ -27,7 +28,7 @@ def __narwhals_dataframe__(self) -> Self: # pragma: no cover


def test_is_into_dataframe() -> None:
data = {"a": [1, 2, 3], "b": [4, 5, 6]}
data: dict[str, Any] = {"a": [1, 2, 3], "b": [4, 5, 6]}
assert is_into_dataframe(pa.table(data))
assert is_into_dataframe(pl.DataFrame(data))
assert is_into_dataframe(pd.DataFrame(data))
Expand Down
12 changes: 9 additions & 3 deletions tests/dtypes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
import narwhals.stable.v1 as nw
from tests.utils import PANDAS_VERSION
from tests.utils import POLARS_VERSION
from tests.utils import PYARROW_VERSION

if TYPE_CHECKING:
from narwhals.typing import IntoSeries
from tests.utils import Constructor


Expand Down Expand Up @@ -135,6 +137,8 @@ def test_struct_hashes() -> None:
def test_2d_array(constructor: Constructor, request: pytest.FixtureRequest) -> None:
if any(x in str(constructor) for x in ("dask", "modin", "cudf", "pyspark")):
request.applymarker(pytest.mark.xfail)
if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (14,):
request.applymarker(pytest.mark.xfail)
data = {"a": [[[1, 2], [3, 4], [5, 6]]]}
df = nw.from_native(constructor(data)).with_columns(
a=nw.col("a").cast(nw.Array(nw.Int64(), (3, 2)))
Expand All @@ -144,13 +148,15 @@ def test_2d_array(constructor: Constructor, request: pytest.FixtureRequest) -> N


def test_second_time_unit() -> None:
s = pd.Series(np.array([np.datetime64("2020-01-01", "s")]))
s: IntoSeries = pd.Series(np.array([np.datetime64("2020-01-01", "s")]))
result = nw.from_native(s, series_only=True)
if PANDAS_VERSION < (2,): # pragma: no cover
assert result.dtype == nw.Datetime("ns")
else:
assert result.dtype == nw.Datetime("s")
s = pa.chunked_array([pa.array([datetime(2020, 1, 1)], type=pa.timestamp("s"))])
ts_sec = pa.timestamp("s")
dur_sec = pa.duration("s")
s = pa.chunked_array([pa.array([datetime(2020, 1, 1)], type=ts_sec)], type=ts_sec)
result = nw.from_native(s, series_only=True)
assert result.dtype == nw.Datetime("s")
s = pd.Series(np.array([np.timedelta64(1, "s")]))
Expand All @@ -159,7 +165,7 @@ def test_second_time_unit() -> None:
assert result.dtype == nw.Duration("ns")
else:
assert result.dtype == nw.Duration("s")
s = pa.chunked_array([pa.array([timedelta(1)], type=pa.duration("s"))])
s = pa.chunked_array([pa.array([timedelta(1)], type=dur_sec)], type=dur_sec)
result = nw.from_native(s, series_only=True)
assert result.dtype == nw.Duration("s")

Expand Down
12 changes: 6 additions & 6 deletions tests/expr_and_series/arithmetic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,9 @@ def test_floordiv(left: int, right: int) -> None:
# test is a bit manual unfortunately
assume(right != 0)
expected = {"a": [left // right]}
result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
nw.col("a") // right
)
result: nw.DataFrame[Any] = nw.from_native(
pd.DataFrame({"a": [left]}), eager_only=True
).select(nw.col("a") // right)
assert_equal_data(result, expected)
if PANDAS_VERSION < (2, 2): # pragma: no cover
# Bug in old version of pandas
Expand Down Expand Up @@ -201,9 +201,9 @@ def test_mod(left: int, right: int) -> None:
# test is a bit manual unfortunately
assume(right != 0)
expected = {"a": [left % right]}
result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
nw.col("a") % right
)
result: nw.DataFrame[Any] = nw.from_native(
pd.DataFrame({"a": [left]}), eager_only=True
).select(nw.col("a") % right)
assert_equal_data(result, expected)
result = nw.from_native(
pd.DataFrame({"a": [left]}).convert_dtypes(), eager_only=True
Expand Down
5 changes: 4 additions & 1 deletion tests/expr_and_series/nth_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

from typing import Any
from typing import Mapping

import polars as pl
import pytest

Expand All @@ -8,7 +11,7 @@
from tests.utils import Constructor
from tests.utils import assert_equal_data

data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}


@pytest.mark.parametrize(
Expand Down
16 changes: 11 additions & 5 deletions tests/frame/getitem_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Any
from typing import cast

import numpy as np
import pandas as pd
Expand All @@ -12,7 +14,10 @@
from tests.utils import ConstructorEager
from tests.utils import assert_equal_data

data = {
if TYPE_CHECKING:
from narwhals.typing import _1DArray

data: dict[str, Any] = {
"a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
"b": [11, 12, 13, 14, 15, 16],
}
Expand Down Expand Up @@ -74,7 +79,8 @@ def test_gather(constructor_eager: ConstructorEager) -> None:
"b": [11, 14, 12],
}
assert_equal_data(result, expected)
result = df[np.array([0, 3, 1])]
arr = cast("_1DArray", np.array([0, 3, 1]))
result = df[arr]
assert_equal_data(result, expected)


Expand All @@ -96,10 +102,10 @@ def test_gather_rows_cols(constructor_eager: ConstructorEager) -> None:

expected = {"b": [11, 14, 12]}

result = {"b": df[[0, 3, 1], 1]}
result: Any = {"b": df[[0, 3, 1], 1]}
assert_equal_data(result, expected)

result = {"b": df[np.array([0, 3, 1]), "b"]}
arr = cast("_1DArray", np.array([0, 3, 1]))
result = {"b": df[arr, "b"]}
assert_equal_data(result, expected)


Expand Down
5 changes: 4 additions & 1 deletion tests/frame/interchange_native_namespace_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from __future__ import annotations

from typing import Any
from typing import Mapping

import polars as pl
import pytest

import narwhals.stable.v1 as nw

data = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}
data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}


def test_interchange() -> None:
Expand Down
3 changes: 2 additions & 1 deletion tests/frame/interchange_select_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Mapping

import polars as pl
import pytest
Expand All @@ -11,7 +12,7 @@
if TYPE_CHECKING:
from typing_extensions import Self

data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}


class InterchangeDataFrame:
Expand Down
5 changes: 4 additions & 1 deletion tests/frame/interchange_to_arrow_test.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from __future__ import annotations

from typing import Any
from typing import Mapping

import polars as pl
import pyarrow as pa
import pytest

import narwhals.stable.v1 as nw

data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}


def test_interchange_to_arrow() -> None:
Expand Down
19 changes: 12 additions & 7 deletions tests/frame/invalid_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import pandas as pd
import polars as pl
import pyarrow as pa
Expand All @@ -8,10 +10,13 @@
import narwhals.stable.v1 as nw
from tests.utils import NUMPY_VERSION

if TYPE_CHECKING:
from narwhals.typing import Frame


def test_invalid() -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}
df = nw.from_native(pa.table({"a": [1, 2], "b": [3, 4]}))
df: Frame = nw.from_native(pa.table({"a": [1, 2], "b": [3, 4]}))
with pytest.raises(ValueError, match="Multi-output"):
df.select(nw.all() + nw.all())
df = nw.from_native(pd.DataFrame(data))
Expand All @@ -24,14 +29,14 @@ def test_invalid() -> None:


def test_native_vs_non_native() -> None:
s = pd.Series([1, 2, 3])
df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
s_pd = pd.Series([1, 2, 3])
df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
with pytest.raises(TypeError, match="Perhaps you forgot"):
nw.from_native(df).filter(s > 1)
s = pl.Series([1, 2, 3])
df = pl.DataFrame({"a": [2, 2, 3], "b": [4, 5, 6]})
nw.from_native(df_pd).filter(s_pd > 1) # type: ignore[arg-type]
s_pl = pl.Series([1, 2, 3])
df_pl = pl.DataFrame({"a": [2, 2, 3], "b": [4, 5, 6]})
with pytest.raises(TypeError, match="Perhaps you\n- forgot"):
nw.from_native(df).filter(s > 1)
nw.from_native(df_pl).filter(s_pl > 1)


def test_validate_laziness() -> None:
Expand Down
Loading
Loading