diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py index dc018e7816..5fd11225ba 100644 --- a/narwhals/_dask/group_by.py +++ b/narwhals/_dask/group_by.py @@ -26,10 +26,10 @@ def n_unique() -> dd.Aggregation: - def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> int: + def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> pd.Series[Any]: return s.nunique(dropna=False) # type: ignore[no-any-return] - def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> int: + def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> pd.Series[Any]: return s0.sum() # type: ignore[no-any-return] return dd.Aggregation( diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py index cde2a7861f..f75b8bacb5 100644 --- a/narwhals/_dask/namespace.py +++ b/narwhals/_dask/namespace.py @@ -77,18 +77,12 @@ def nth(self: Self, *column_indices: int) -> DaskExpr: def lit(self: Self, value: Any, dtype: DType | None) -> DaskExpr: def func(df: DaskLazyFrame) -> list[dx.Series]: - return [ - dd.from_pandas( - pd.Series( - [value], - dtype=narwhals_to_native_dtype(dtype, self._version) - if dtype is not None - else None, - name="literal", - ), - npartitions=df._native_frame.npartitions, - ) - ] + if dtype is not None: + native_dtype = narwhals_to_native_dtype(dtype, self._version) + s = pd.Series([value], dtype=native_dtype) + else: + s = pd.Series([value]) + return [dd.from_pandas(s, npartitions=df._native_frame.npartitions)] return DaskExpr( func, diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py index 1eff70cefa..98f613e9fa 100644 --- a/narwhals/_pandas_like/utils.py +++ b/narwhals/_pandas_like/utils.py @@ -9,6 +9,7 @@ from typing import Iterable from typing import Sequence from typing import TypeVar +from typing import cast import pandas as pd @@ -511,7 +512,11 @@ def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBacken if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype): return "pyarrow" with suppress(AttributeError): - if isinstance(dtype, pd.core.dtypes.dtypes.BaseMaskedDtype): + sentinel = object() + if ( + isinstance(dtype, pd.api.extensions.ExtensionDtype) + and getattr(dtype, "base", sentinel) is None + ): return "numpy_nullable" return None @@ -722,8 +727,17 @@ def int_dtype_mapper(dtype: Any) -> str: def convert_str_slice_to_int_slice( str_slice: slice, columns: pd.Index ) -> tuple[int | None, int | None, int | None]: - start = columns.get_loc(str_slice.start) if str_slice.start is not None else None - stop = columns.get_loc(str_slice.stop) + 1 if str_slice.stop is not None else None + # We can safely cast to int because we know that `columns` doesn't contain duplicates. + start = ( + cast(int, columns.get_loc(str_slice.start)) + if str_slice.start is not None + else None + ) + stop = ( + cast(int, columns.get_loc(str_slice.stop)) + 1 + if str_slice.stop is not None + else None + ) step = str_slice.step return (start, stop, step) diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py index 27eab9d263..461932e5d1 100644 --- a/narwhals/_polars/series.py +++ b/narwhals/_polars/series.py @@ -483,7 +483,7 @@ def hist( version=self._version, ) elif (self._backend_version < (1, 15)) and self._native_series.count() < 1: - data_dict: dict[str, list[int | float] | pl.Series | pl.Expr] + data_dict: dict[str, Sequence[Any] | pl.Series] if bins is not None: data_dict = { "breakpoint": bins[1:], diff --git a/narwhals/utils.py b/narwhals/utils.py index 031c466f8a..0d0b50b76b 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -977,7 +977,7 @@ def is_ordered_categorical(series: Series[Any]) -> bool: if is_polars_series(native_series): return native_series.dtype.ordering == "physical" # type: ignore[attr-defined, no-any-return] if is_pandas_series(native_series): - return native_series.cat.ordered # type: ignore[no-any-return] + return bool(native_series.cat.ordered) if is_modin_series(native_series): # pragma: no cover return native_series.cat.ordered # type: ignore[no-any-return] if is_cudf_series(native_series): # pragma: no cover diff --git a/pyproject.toml b/pyproject.toml index c339e04236..d1b78c72a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ tests = [ "typing_extensions", ] typing = [ + "pandas-stubs", "typing_extensions", "mypy~=1.15.0", ] @@ -226,8 +227,6 @@ pretty = true [[tool.mypy.overrides]] module = [ - # TODO: enable step by step when it makes sense - # e.g. the pandas API is just too inconsistent for type hinting to be useful. "cudf.*", "dask.*", "dask_expr.*", @@ -235,7 +234,6 @@ module = [ "ibis.*", "modin.*", "numpy.*", - "pandas.*", "pyspark.*", "sklearn.*", "sqlframe.*", diff --git a/tests/conftest.py b/tests/conftest.py index 02c0921f2a..c8c4895ccf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -132,7 +132,7 @@ def dask_lazy_p2_constructor(obj: dict[str, list[Any]]) -> IntoFrame: # pragma: return dd.from_dict(obj, npartitions=2) # type: ignore[no-any-return] -def pyarrow_table_constructor(obj: dict[str, list[Any]]) -> IntoDataFrame: +def pyarrow_table_constructor(obj: dict[str, Any]) -> IntoDataFrame: return pa.table(obj) # type: ignore[no-any-return] @@ -227,7 +227,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: if x not in GPU_CONSTRUCTORS and x != "modin" # too slow ] else: # pragma: no cover - selected_constructors = metafunc.config.getoption("constructors").split(",") + selected_constructors = metafunc.config.getoption("constructors").split(",") # pyright: ignore[reportAttributeAccessIssue] eager_constructors: list[Callable[[Any], IntoDataFrame]] = [] eager_constructors_ids: list[str] = [] diff --git a/tests/dependencies/is_into_dataframe_test.py b/tests/dependencies/is_into_dataframe_test.py index 80bd5edd8d..4efa7b2cc9 100644 --- a/tests/dependencies/is_into_dataframe_test.py +++ b/tests/dependencies/is_into_dataframe_test.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING from typing import Any +from typing import Mapping import numpy as np import pandas as pd @@ -16,7 +17,7 @@ class DictDataFrame: - def __init__(self: Self, data: dict[str, list[Any]]) -> None: + def __init__(self: Self, data: Mapping[str, Any]) -> None: self._data = data def __len__(self) -> int: # pragma: no cover @@ -27,7 +28,7 @@ def __narwhals_dataframe__(self) -> Self: # pragma: no cover def test_is_into_dataframe() -> None: - data = {"a": [1, 2, 3], "b": [4, 5, 6]} + data: dict[str, Any] = {"a": [1, 2, 3], "b": [4, 5, 6]} assert is_into_dataframe(pa.table(data)) assert is_into_dataframe(pl.DataFrame(data)) assert is_into_dataframe(pd.DataFrame(data)) diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py index f6bd06de2e..ac69b0af77 100644 --- a/tests/dtypes_test.py +++ b/tests/dtypes_test.py @@ -15,8 +15,10 @@ import narwhals.stable.v1 as nw from tests.utils import PANDAS_VERSION from tests.utils import POLARS_VERSION +from tests.utils import PYARROW_VERSION if TYPE_CHECKING: + from narwhals.typing import IntoSeries from tests.utils import Constructor @@ -135,6 +137,8 @@ def test_struct_hashes() -> None: def test_2d_array(constructor: Constructor, request: pytest.FixtureRequest) -> None: if any(x in str(constructor) for x in ("dask", "modin", "cudf", "pyspark")): request.applymarker(pytest.mark.xfail) + if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (14,): + request.applymarker(pytest.mark.xfail) data = {"a": [[[1, 2], [3, 4], [5, 6]]]} df = nw.from_native(constructor(data)).with_columns( a=nw.col("a").cast(nw.Array(nw.Int64(), (3, 2))) @@ -144,13 +148,15 @@ def test_2d_array(constructor: Constructor, request: pytest.FixtureRequest) -> N def test_second_time_unit() -> None: - s = pd.Series(np.array([np.datetime64("2020-01-01", "s")])) + s: IntoSeries = pd.Series(np.array([np.datetime64("2020-01-01", "s")])) result = nw.from_native(s, series_only=True) if PANDAS_VERSION < (2,): # pragma: no cover assert result.dtype == nw.Datetime("ns") else: assert result.dtype == nw.Datetime("s") - s = pa.chunked_array([pa.array([datetime(2020, 1, 1)], type=pa.timestamp("s"))]) + ts_sec = pa.timestamp("s") + dur_sec = pa.duration("s") + s = pa.chunked_array([pa.array([datetime(2020, 1, 1)], type=ts_sec)], type=ts_sec) result = nw.from_native(s, series_only=True) assert result.dtype == nw.Datetime("s") s = pd.Series(np.array([np.timedelta64(1, "s")])) @@ -159,7 +165,7 @@ def test_second_time_unit() -> None: assert result.dtype == nw.Duration("ns") else: assert result.dtype == nw.Duration("s") - s = pa.chunked_array([pa.array([timedelta(1)], type=pa.duration("s"))]) + s = pa.chunked_array([pa.array([timedelta(1)], type=dur_sec)], type=dur_sec) result = nw.from_native(s, series_only=True) assert result.dtype == nw.Duration("s") diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py index 4260e40880..fd535c7acc 100644 --- a/tests/expr_and_series/arithmetic_test.py +++ b/tests/expr_and_series/arithmetic_test.py @@ -166,9 +166,9 @@ def test_floordiv(left: int, right: int) -> None: # test is a bit manual unfortunately assume(right != 0) expected = {"a": [left // right]} - result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select( - nw.col("a") // right - ) + result: nw.DataFrame[Any] = nw.from_native( + pd.DataFrame({"a": [left]}), eager_only=True + ).select(nw.col("a") // right) assert_equal_data(result, expected) if PANDAS_VERSION < (2, 2): # pragma: no cover # Bug in old version of pandas @@ -201,9 +201,9 @@ def test_mod(left: int, right: int) -> None: # test is a bit manual unfortunately assume(right != 0) expected = {"a": [left % right]} - result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select( - nw.col("a") % right - ) + result: nw.DataFrame[Any] = nw.from_native( + pd.DataFrame({"a": [left]}), eager_only=True + ).select(nw.col("a") % right) assert_equal_data(result, expected) result = nw.from_native( pd.DataFrame({"a": [left]}).convert_dtypes(), eager_only=True diff --git a/tests/expr_and_series/nth_test.py b/tests/expr_and_series/nth_test.py index 442b478bae..0283f2381d 100644 --- a/tests/expr_and_series/nth_test.py +++ b/tests/expr_and_series/nth_test.py @@ -1,5 +1,8 @@ from __future__ import annotations +from typing import Any +from typing import Mapping + import polars as pl import pytest @@ -8,7 +11,7 @@ from tests.utils import Constructor from tests.utils import assert_equal_data -data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} +data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} @pytest.mark.parametrize( diff --git a/tests/frame/getitem_test.py b/tests/frame/getitem_test.py index afab298145..bf64979ae8 100644 --- a/tests/frame/getitem_test.py +++ b/tests/frame/getitem_test.py @@ -1,6 +1,8 @@ from __future__ import annotations +from typing import TYPE_CHECKING from typing import Any +from typing import cast import numpy as np import pandas as pd @@ -12,7 +14,10 @@ from tests.utils import ConstructorEager from tests.utils import assert_equal_data -data = { +if TYPE_CHECKING: + from narwhals.typing import _1DArray + +data: dict[str, Any] = { "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], "b": [11, 12, 13, 14, 15, 16], } @@ -74,7 +79,8 @@ def test_gather(constructor_eager: ConstructorEager) -> None: "b": [11, 14, 12], } assert_equal_data(result, expected) - result = df[np.array([0, 3, 1])] + arr = cast("_1DArray", np.array([0, 3, 1])) + result = df[arr] assert_equal_data(result, expected) @@ -96,10 +102,10 @@ def test_gather_rows_cols(constructor_eager: ConstructorEager) -> None: expected = {"b": [11, 14, 12]} - result = {"b": df[[0, 3, 1], 1]} + result: Any = {"b": df[[0, 3, 1], 1]} assert_equal_data(result, expected) - - result = {"b": df[np.array([0, 3, 1]), "b"]} + arr = cast("_1DArray", np.array([0, 3, 1])) + result = {"b": df[arr, "b"]} assert_equal_data(result, expected) diff --git a/tests/frame/interchange_native_namespace_test.py b/tests/frame/interchange_native_namespace_test.py index 67fba1c119..9d79194428 100644 --- a/tests/frame/interchange_native_namespace_test.py +++ b/tests/frame/interchange_native_namespace_test.py @@ -1,11 +1,14 @@ from __future__ import annotations +from typing import Any +from typing import Mapping + import polars as pl import pytest import narwhals.stable.v1 as nw -data = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} +data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} def test_interchange() -> None: diff --git a/tests/frame/interchange_select_test.py b/tests/frame/interchange_select_test.py index 4e1400fd78..88234ff83f 100644 --- a/tests/frame/interchange_select_test.py +++ b/tests/frame/interchange_select_test.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING from typing import Any +from typing import Mapping import polars as pl import pytest @@ -11,7 +12,7 @@ if TYPE_CHECKING: from typing_extensions import Self -data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} +data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} class InterchangeDataFrame: diff --git a/tests/frame/interchange_to_arrow_test.py b/tests/frame/interchange_to_arrow_test.py index 064d15834a..14172e5d57 100644 --- a/tests/frame/interchange_to_arrow_test.py +++ b/tests/frame/interchange_to_arrow_test.py @@ -1,12 +1,15 @@ from __future__ import annotations +from typing import Any +from typing import Mapping + import polars as pl import pyarrow as pa import pytest import narwhals.stable.v1 as nw -data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} +data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]} def test_interchange_to_arrow() -> None: diff --git a/tests/frame/invalid_test.py b/tests/frame/invalid_test.py index e5eee63b9a..9bdedbd4b9 100644 --- a/tests/frame/invalid_test.py +++ b/tests/frame/invalid_test.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import pandas as pd import polars as pl import pyarrow as pa @@ -8,10 +10,13 @@ import narwhals.stable.v1 as nw from tests.utils import NUMPY_VERSION +if TYPE_CHECKING: + from narwhals.typing import Frame + def test_invalid() -> None: data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} - df = nw.from_native(pa.table({"a": [1, 2], "b": [3, 4]})) + df: Frame = nw.from_native(pa.table({"a": [1, 2], "b": [3, 4]})) with pytest.raises(ValueError, match="Multi-output"): df.select(nw.all() + nw.all()) df = nw.from_native(pd.DataFrame(data)) @@ -24,14 +29,14 @@ def test_invalid() -> None: def test_native_vs_non_native() -> None: - s = pd.Series([1, 2, 3]) - df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + s_pd = pd.Series([1, 2, 3]) + df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) with pytest.raises(TypeError, match="Perhaps you forgot"): - nw.from_native(df).filter(s > 1) - s = pl.Series([1, 2, 3]) - df = pl.DataFrame({"a": [2, 2, 3], "b": [4, 5, 6]}) + nw.from_native(df_pd).filter(s_pd > 1) # type: ignore[arg-type] + s_pl = pl.Series([1, 2, 3]) + df_pl = pl.DataFrame({"a": [2, 2, 3], "b": [4, 5, 6]}) with pytest.raises(TypeError, match="Perhaps you\n- forgot"): - nw.from_native(df).filter(s > 1) + nw.from_native(df_pl).filter(s_pl > 1) def test_validate_laziness() -> None: diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index 79c56886a1..fc2a422012 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -236,39 +236,39 @@ def test_validate_not_duplicated_columns_duckdb() -> None: ) def test_nested_dtypes() -> None: duckdb = pytest.importorskip("duckdb") - df = pl.DataFrame( + df_pd = pl.DataFrame( {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1}]}, schema_overrides={"b": pl.Array(pl.Int64, 2)}, ).to_pandas(use_pyarrow_extension_array=True) - nwdf = nw.from_native(df) + nwdf: nw.DataFrame[Any] | nw.LazyFrame[Any] = nw.from_native(df_pd) assert nwdf.schema == { "a": nw.List(nw.Int64), "b": nw.Array(nw.Int64, 2), "c": nw.Struct({"a": nw.Int64}), } - df = pl.DataFrame( + df_pl = pl.DataFrame( {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1}]}, schema_overrides={"b": pl.Array(pl.Int64, 2)}, ) - nwdf = nw.from_native(df) + nwdf = nw.from_native(df_pl) assert nwdf.schema == { "a": nw.List(nw.Int64), "b": nw.Array(nw.Int64, 2), "c": nw.Struct({"a": nw.Int64}), } - df = pl.DataFrame( + df_pa = pl.DataFrame( {"a": [[1, 2]], "b": [[1, 2]], "c": [{"a": 1, "b": "x", "c": 1.1}]}, schema_overrides={"b": pl.Array(pl.Int64, 2)}, ).to_arrow() - nwdf = nw.from_native(df) + nwdf = nw.from_native(df_pa) assert nwdf.schema == { "a": nw.List(nw.Int64), "b": nw.Array(nw.Int64, 2), "c": nw.Struct({"a": nw.Int64, "b": nw.String, "c": nw.Float64}), } - df = duckdb.sql("select * from df") - nwdf = nw.from_native(df) + rel = duckdb.sql("select * from df_pa") + nwdf = nw.from_native(rel) assert nwdf.schema == { "a": nw.List(nw.Int64), "b": nw.Array(nw.Int64, 2), diff --git a/tests/frame/to_arrow_test.py b/tests/frame/to_arrow_test.py index 651dfabad1..497b8c56b2 100644 --- a/tests/frame/to_arrow_test.py +++ b/tests/frame/to_arrow_test.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING +from typing import Any import pyarrow as pa import pytest @@ -21,7 +22,7 @@ def test_to_arrow( # pyarrow requires pandas>=1.0.0 request.applymarker(pytest.mark.xfail) - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} + data: dict[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} df_raw = constructor_eager(data) result = nw.from_native(df_raw, eager_only=True).to_arrow() diff --git a/tests/frame/to_pandas_test.py b/tests/frame/to_pandas_test.py index 07b35c5872..825602b2e8 100644 --- a/tests/frame/to_pandas_test.py +++ b/tests/frame/to_pandas_test.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING +from typing import cast import pandas as pd import pytest @@ -22,10 +23,10 @@ def test_convert_pandas( ) -> None: data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} df_raw = constructor_eager(data) - result = nw.from_native(df_raw).to_pandas() # type: ignore[union-attr] + result = nw.from_native(df_raw, eager_only=True).to_pandas() if constructor_eager.__name__.startswith("pandas"): - expected = constructor_eager(data) + expected = cast(pd.DataFrame, constructor_eager(data)) elif "modin_pyarrow" in str(constructor_eager): expected = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") else: diff --git a/tests/frame/to_polars_test.py b/tests/frame/to_polars_test.py index cc2f860d99..e09c3625f7 100644 --- a/tests/frame/to_polars_test.py +++ b/tests/frame/to_polars_test.py @@ -1,6 +1,8 @@ from __future__ import annotations from typing import TYPE_CHECKING +from typing import Any +from typing import Mapping import polars as pl import pytest @@ -14,7 +16,7 @@ @pytest.mark.filterwarnings("ignore:.*Passing a BlockManager.*:DeprecationWarning") def test_convert_polars(constructor_eager: ConstructorEager) -> None: - data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} + data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]} df_raw = constructor_eager(data) result = nw.from_native(df_raw).to_polars() # type: ignore[union-attr] diff --git a/tests/group_by_test.py b/tests/group_by_test.py index 09be5cab13..3ec5ecda11 100644 --- a/tests/group_by_test.py +++ b/tests/group_by_test.py @@ -1,6 +1,8 @@ from __future__ import annotations from contextlib import nullcontext +from typing import Any +from typing import Mapping import pandas as pd import polars as pl @@ -14,7 +16,7 @@ from tests.utils import ConstructorEager from tests.utils import assert_equal_data -data = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]} +data: Mapping[str, Any] = {"a": [1, 1, 3], "b": [4, 4, 6], "c": [7.0, 8.0, 9.0]} df_pandas = pd.DataFrame(data) df_lazy = pl.LazyFrame(data) @@ -25,14 +27,14 @@ def test_group_by_complex() -> None: df = nw.from_native(df_pandas) with pytest.warns(UserWarning, match="complex group-by"): - result = nw.to_native( + result_pd = nw.to_native( df.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a") ) - assert_equal_data(result, expected) + assert_equal_data(result_pd, expected) lf = nw.from_native(df_lazy).lazy() - result = lf.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a") - assert_equal_data(result, expected) + result_pl = lf.group_by("a").agg((nw.col("b") - nw.col("c").mean()).mean()).sort("a") + assert_equal_data(result_pl, expected) def test_invalid_group_by_dask() -> None: diff --git a/tests/hypothesis/basic_arithmetic_test.py b/tests/hypothesis/basic_arithmetic_test.py index 5e9b18703f..6e16b71296 100644 --- a/tests/hypothesis/basic_arithmetic_test.py +++ b/tests/hypothesis/basic_arithmetic_test.py @@ -1,5 +1,8 @@ from __future__ import annotations +from typing import Any +from typing import Mapping + import pandas as pd import polars as pl import pytest @@ -26,8 +29,9 @@ def test_mean( integer: st.SearchStrategy[list[int]], floats: st.SearchStrategy[float] ) -> None: - df_pandas = pd.DataFrame({"integer": integer, "floats": floats}) - df_polars = pl.DataFrame({"integer": integer, "floats": floats}) + data: Mapping[str, Any] = {"integer": integer, "floats": floats} + df_pandas = pd.DataFrame(data) + df_polars = pl.DataFrame(data) df_nw1 = nw.from_native(df_pandas, eager_only=True) df_nw2 = nw.from_native(df_polars, eager_only=True) diff --git a/tests/hypothesis/getitem_test.py b/tests/hypothesis/getitem_test.py index f6cfd45897..05abdc9db2 100644 --- a/tests/hypothesis/getitem_test.py +++ b/tests/hypothesis/getitem_test.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from typing import Any from typing import Callable +from typing import cast import hypothesis.strategies as st import numpy as np @@ -233,7 +234,7 @@ def test_getitem( return df_other = nw.from_native(pandas_or_pyarrow_constructor(TEST_DATA)) - result_other = df_other[selector] + result_other = df_other[cast("Any", selector)] if isinstance(result_polars, nw.Series): assert_equal_data({"a": result_other}, {"a": result_polars.to_list()}) diff --git a/tests/hypothesis/join_test.py b/tests/hypothesis/join_test.py index 97830ab0ac..879d54ca05 100644 --- a/tests/hypothesis/join_test.py +++ b/tests/hypothesis/join_test.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Any +from typing import Mapping from typing import cast import pandas as pd @@ -49,7 +51,7 @@ def test_join( # pragma: no cover floats: st.SearchStrategy[list[float]], cols: st.SearchStrategy[list[str]], ) -> None: - data = {"a": integers, "b": other_integers, "c": floats} + data: Mapping[str, Any] = {"a": integers, "b": other_integers, "c": floats} join_cols = cast(list[str], cols) df_polars = pl.DataFrame(data) @@ -96,7 +98,7 @@ def test_cross_join( # pragma: no cover integers: st.SearchStrategy[list[int]], other_integers: st.SearchStrategy[list[int]], ) -> None: - data = {"a": integers, "b": other_integers} + data: Mapping[str, Any] = {"a": integers, "b": other_integers} df_polars = pl.DataFrame(data) df_polars2 = pl.DataFrame(data) @@ -149,8 +151,12 @@ def test_left_join( # pragma: no cover right_key: list[str], ) -> None: assume(len(left_key) == len(right_key)) - data_left = {"a": a_left_data, "b": b_left_data, "c": c_left_data} - data_right = {"a": a_right_data, "b": b_right_data, "d": d_right_data} + data_left: dict[str, Any] = {"a": a_left_data, "b": b_left_data, "c": c_left_data} + data_right: dict[str, Any] = { + "a": a_right_data, + "b": b_right_data, + "d": d_right_data, + } result_pd = nw.from_native(pd.DataFrame(data_left), eager_only=True).join( nw.from_native(pd.DataFrame(data_right), eager_only=True), how="left", diff --git a/tests/pickle_test.py b/tests/pickle_test.py index 78c0debf95..677f6084d1 100644 --- a/tests/pickle_test.py +++ b/tests/pickle_test.py @@ -17,7 +17,7 @@ class Foo: a: Sequence[int] # dry-run to check that none of these error - asdict(Foo(pd.Series([1, 2, 3]))) + asdict(Foo(pd.Series([1, 2, 3]))) # type: ignore[arg-type] asdict(Foo(pl.Series([1, 2, 3]))) # type: ignore[arg-type] asdict(Foo(nw.from_native(pl.Series([1, 2, 3]), series_only=True))) # type: ignore[arg-type] asdict(Foo(nw.from_native(pd.Series([1, 2, 3]), series_only=True))) # type: ignore[arg-type] diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py index 55869b46b1..46c6d7324c 100644 --- a/tests/read_scan_test.py +++ b/tests/read_scan_test.py @@ -1,5 +1,8 @@ from __future__ import annotations +from typing import Any +from typing import Mapping + import pandas as pd import polars as pl import pytest @@ -11,7 +14,7 @@ from tests.utils import ConstructorEager from tests.utils import assert_equal_data -data = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} +data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]} def test_read_csv( diff --git a/tests/series_only/cast_test.py b/tests/series_only/cast_test.py index b4051e503c..1bedf4874f 100644 --- a/tests/series_only/cast_test.py +++ b/tests/series_only/cast_test.py @@ -108,13 +108,13 @@ def test_cast_to_enum() -> None: # we don't yet support metadata in dtypes, so for now disallow this # seems like a very niche use case anyway, and allowing it later wouldn't be # backwards-incompatible - df = pl.DataFrame({"a": ["a", "b"]}, schema={"a": pl.Categorical}) + df_pl = pl.DataFrame({"a": ["a", "b"]}, schema={"a": pl.Categorical}) with pytest.raises( NotImplementedError, match=r"Converting to Enum is not \(yet\) supported" ): - nw.from_native(df).select(nw.col("a").cast(nw.Enum)) - df = pd.DataFrame({"a": ["a", "b"]}, dtype="category") + nw.from_native(df_pl).select(nw.col("a").cast(nw.Enum)) + df_pd = pd.DataFrame({"a": ["a", "b"]}, dtype="category") with pytest.raises( NotImplementedError, match=r"Converting to Enum is not \(yet\) supported" ): - nw.from_native(df).select(nw.col("a").cast(nw.Enum)) + nw.from_native(df_pd).select(nw.col("a").cast(nw.Enum)) diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index 498ed2349a..f736921c5a 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -2,6 +2,7 @@ import os from datetime import datetime +from typing import TYPE_CHECKING from unittest import mock import pandas as pd @@ -14,6 +15,9 @@ from tests.utils import PANDAS_VERSION from tests.utils import assert_equal_data +if TYPE_CHECKING: + from narwhals.stable.v1.typing import IntoFrame + @pytest.mark.parametrize( "library", @@ -26,7 +30,7 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None: if library == "pandas" and PANDAS_VERSION < (1, 5): request.applymarker(pytest.mark.xfail) elif library == "pandas": - df_raw = pd.read_csv("tests/data/lineitem.csv") + df_raw: IntoFrame = pd.read_csv("tests/data/lineitem.csv") elif library == "polars": df_raw = pl.scan_csv("tests/data/lineitem.csv") elif library == "dask": @@ -105,7 +109,7 @@ def test_q1_w_generic_funcs(library: str, request: pytest.FixtureRequest) -> Non if library == "pandas" and PANDAS_VERSION < (1, 5): request.applymarker(pytest.mark.xfail) elif library == "pandas": - df_raw = pd.read_csv("tests/data/lineitem.csv") + df_raw: IntoFrame = pd.read_csv("tests/data/lineitem.csv") else: df_raw = pl.read_csv("tests/data/lineitem.csv") var_1 = datetime(1998, 9, 2) diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index b3269b8383..6bfb2314a6 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -19,7 +19,7 @@ from narwhals.utils import Version -data = {"a": [1, 2, 3]} +data: dict[str, Any] = {"a": [1, 2, 3]} df_pd = pd.DataFrame(data) df_pl = pl.DataFrame(data) diff --git a/tests/translate/get_native_namespace_test.py b/tests/translate/get_native_namespace_test.py index b269caef75..be10e8257f 100644 --- a/tests/translate/get_native_namespace_test.py +++ b/tests/translate/get_native_namespace_test.py @@ -19,11 +19,11 @@ def test_native_namespace() -> None: assert nw.get_native_namespace(df.to_native()) is pl assert nw.get_native_namespace(df.lazy().to_native()) is pl assert nw.get_native_namespace(df["a"].to_native()) is pl - df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]})) + df = nw.from_native(pd.DataFrame({"a": [1, 2, 3]}), eager_only=True) assert nw.get_native_namespace(df) is pd assert nw.get_native_namespace(df.to_native()) is pd assert nw.get_native_namespace(df["a"].to_native()) is pd - df = nw.from_native(pa.table({"a": [1, 2, 3]})) + df = nw.from_native(pa.table({"a": [1, 2, 3]}), eager_only=True) assert nw.get_native_namespace(df) is pa assert nw.get_native_namespace(df.to_native()) is pa assert nw.get_native_namespace(df["a"].to_native()) is pa diff --git a/tests/utils.py b/tests/utils.py index 947db4edbb..295729c310 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -7,6 +7,7 @@ from typing import Any from typing import Callable from typing import Iterator +from typing import Mapping from typing import Sequence import pandas as pd @@ -70,7 +71,7 @@ def _to_comparable_list(column_values: Any) -> Any: def _sort_dict_by_key( - data_dict: dict[str, list[Any]], key: str + data_dict: Mapping[str, list[Any]], key: str ) -> dict[str, list[Any]]: # pragma: no cover sort_list = data_dict[key] sorted_indices = sorted( @@ -84,7 +85,7 @@ def _sort_dict_by_key( return {key: [value[i] for i in sorted_indices] for key, value in data_dict.items()} -def assert_equal_data(result: Any, expected: dict[str, Any]) -> None: +def assert_equal_data(result: Any, expected: Mapping[str, Any]) -> None: is_pyspark = ( hasattr(result, "_compliant_frame") and result.implementation is Implementation.PYSPARK diff --git a/tests/utils_test.py b/tests/utils_test.py index aec7c652a4..dd15d92063 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -4,6 +4,8 @@ import string from dataclasses import dataclass from typing import TYPE_CHECKING +from typing import Any +from typing import cast import hypothesis.strategies as st import pandas as pd @@ -23,7 +25,7 @@ if TYPE_CHECKING: from narwhals.series import Series - from narwhals.typing import IntoSeriesT + from narwhals.typing import IntoSeries from narwhals.utils import _SupportsVersion @@ -123,17 +125,17 @@ def test_maybe_set_index_polars_column_names( ], ) def test_maybe_set_index_pandas_direct_index( - narwhals_index: Series[IntoSeriesT] | list[Series[IntoSeriesT]] | None, - pandas_index: pd.Series | list[pd.Series] | None, + narwhals_index: Series[IntoSeries] | list[Series[IntoSeries]], + pandas_index: pd.Series | list[pd.Series], native_df_or_series: pd.DataFrame | pd.Series, ) -> None: df = nw.from_native(native_df_or_series, allow_series=True) result = nw.maybe_set_index(df, index=narwhals_index) if isinstance(native_df_or_series, pd.Series): - native_df_or_series.index = pandas_index + native_df_or_series.index = pandas_index # type: ignore[assignment] assert_series_equal(nw.to_native(result), native_df_or_series) else: - expected = native_df_or_series.set_index(pandas_index) + expected = native_df_or_series.set_index(pandas_index) # type: ignore[type-var] assert_frame_equal(nw.to_native(result), expected) @@ -148,7 +150,7 @@ def test_maybe_set_index_pandas_direct_index( ], ) def test_maybe_set_index_polars_direct_index( - index: Series[IntoSeriesT] | list[Series[IntoSeriesT]] | None, + index: Series[IntoSeries] | list[Series[IntoSeries]] | None, ) -> None: df = nw.from_native(pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})) result = nw.maybe_set_index(df, index=index) @@ -179,10 +181,13 @@ def test_maybe_set_index_pandas_either_index_or_column_names() -> None: def test_maybe_get_index_pandas() -> None: pandas_df = pd.DataFrame({"a": [1, 2, 3]}, index=[1, 2, 0]) - result = nw.maybe_get_index(nw.from_native(pandas_df)) + result = cast("pd.Index[Any]", nw.maybe_get_index(nw.from_native(pandas_df))) assert_index_equal(result, pandas_df.index) pandas_series = pd.Series([1, 2, 3], index=[1, 2, 0]) - result_s = nw.maybe_get_index(nw.from_native(pandas_series, series_only=True)) + result_s = cast( + "pd.Index[Any]", + nw.maybe_get_index(nw.from_native(pandas_series, series_only=True)), + ) assert_index_equal(result_s, pandas_series.index)