narwhals-dev · MarcoGorelli · Feb 17, 2025 · Feb 13, 2025 · Feb 13, 2025 · Feb 13, 2025
diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py
@@ -26,10 +26,10 @@
 
 
 def n_unique() -> dd.Aggregation:
-    def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> int:
+    def chunk(s: pd.core.groupby.generic.SeriesGroupBy) -> pd.Series[Any]:
         return s.nunique(dropna=False)  # type: ignore[no-any-return]
 
-    def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> int:
+    def agg(s0: pd.core.groupby.generic.SeriesGroupBy) -> pd.Series[Any]:
         return s0.sum()  # type: ignore[no-any-return]
 
     return dd.Aggregation(

diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
@@ -77,18 +77,12 @@ def nth(self: Self, *column_indices: int) -> DaskExpr:
 
     def lit(self: Self, value: Any, dtype: DType | None) -> DaskExpr:
         def func(df: DaskLazyFrame) -> list[dx.Series]:
-            return [
-                dd.from_pandas(
-                    pd.Series(
-                        [value],
-                        dtype=narwhals_to_native_dtype(dtype, self._version)
-                        if dtype is not None
-                        else None,
-                        name="literal",
-                    ),
-                    npartitions=df._native_frame.npartitions,
-                )
-            ]
+            if dtype is not None:
+                native_dtype = narwhals_to_native_dtype(dtype, self._version)
+                s = pd.Series([value], dtype=native_dtype)
+            else:
+                s = pd.Series([value])
+            return [dd.from_pandas(s, npartitions=df._native_frame.npartitions)]
 
         return DaskExpr(
             func,

diff --git a/narwhals/_pandas_like/utils.py b/narwhals/_pandas_like/utils.py
@@ -9,6 +9,7 @@
 from typing import Iterable
 from typing import Sequence
 from typing import TypeVar
+from typing import cast
 
 import pandas as pd
 
@@ -511,7 +512,11 @@ def get_dtype_backend(dtype: Any, implementation: Implementation) -> DTypeBacken
     if hasattr(pd, "ArrowDtype") and isinstance(dtype, pd.ArrowDtype):
         return "pyarrow"
     with suppress(AttributeError):
-        if isinstance(dtype, pd.core.dtypes.dtypes.BaseMaskedDtype):
+        sentinel = object()
+        if (
+            isinstance(dtype, pd.api.extensions.ExtensionDtype)
+            and getattr(dtype, "base", sentinel) is None
+        ):
             return "numpy_nullable"
     return None
 
@@ -722,8 +727,17 @@ def int_dtype_mapper(dtype: Any) -> str:
 def convert_str_slice_to_int_slice(
     str_slice: slice, columns: pd.Index
 ) -> tuple[int | None, int | None, int | None]:
-    start = columns.get_loc(str_slice.start) if str_slice.start is not None else None
-    stop = columns.get_loc(str_slice.stop) + 1 if str_slice.stop is not None else None
+    # We can safely cast to int because we know that `columns` doesn't contain duplicates.
+    start = (
+        cast(int, columns.get_loc(str_slice.start))
+        if str_slice.start is not None
+        else None
+    )
+    stop = (
+        cast(int, columns.get_loc(str_slice.stop)) + 1
+        if str_slice.stop is not None
+        else None
+    )
     step = str_slice.step
     return (start, stop, step)
 

diff --git a/narwhals/_polars/series.py b/narwhals/_polars/series.py
@@ -483,7 +483,7 @@ def hist(
                 version=self._version,
             )
         elif (self._backend_version < (1, 15)) and self._native_series.count() < 1:
-            data_dict: dict[str, list[int | float] | pl.Series | pl.Expr]
+            data_dict: dict[str, Sequence[Any] | pl.Series]
             if bins is not None:
                 data_dict = {
                     "breakpoint": bins[1:],

diff --git a/narwhals/utils.py b/narwhals/utils.py
@@ -977,7 +977,7 @@ def is_ordered_categorical(series: Series[Any]) -> bool:
     if is_polars_series(native_series):
         return native_series.dtype.ordering == "physical"  # type: ignore[attr-defined, no-any-return]
     if is_pandas_series(native_series):
-        return native_series.cat.ordered  # type: ignore[no-any-return]
+        return bool(native_series.cat.ordered)
     if is_modin_series(native_series):  # pragma: no cover
         return native_series.cat.ordered  # type: ignore[no-any-return]
     if is_cudf_series(native_series):  # pragma: no cover

diff --git a/pyproject.toml b/pyproject.toml
@@ -47,6 +47,7 @@ tests = [
   "typing_extensions",
 ]
 typing = [
+  "pandas-stubs",
   "typing_extensions",
   "mypy~=1.15.0",
 ]
@@ -226,16 +227,13 @@ pretty = true
 
 [[tool.mypy.overrides]]
 module = [
-  # TODO: enable step by step when it makes sense
-  # e.g. the pandas API is just too inconsistent for type hinting to be useful.
   "cudf.*",
   "dask.*",
   "dask_expr.*",
   "duckdb.*",
   "ibis.*",
   "modin.*",
   "numpy.*",
-  "pandas.*",
   "pyspark.*",
   "sklearn.*",
   "sqlframe.*",

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -132,7 +132,7 @@ def dask_lazy_p2_constructor(obj: dict[str, list[Any]]) -> IntoFrame:  # pragma:
     return dd.from_dict(obj, npartitions=2)  # type: ignore[no-any-return]
 
 
-def pyarrow_table_constructor(obj: dict[str, list[Any]]) -> IntoDataFrame:
+def pyarrow_table_constructor(obj: dict[str, Any]) -> IntoDataFrame:
     return pa.table(obj)  # type: ignore[no-any-return]
 
 
@@ -227,7 +227,7 @@ def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
             if x not in GPU_CONSTRUCTORS and x != "modin"  # too slow
         ]
     else:  # pragma: no cover
-        selected_constructors = metafunc.config.getoption("constructors").split(",")
+        selected_constructors = metafunc.config.getoption("constructors").split(",")  # pyright: ignore[reportAttributeAccessIssue]
 
     eager_constructors: list[Callable[[Any], IntoDataFrame]] = []
     eager_constructors_ids: list[str] = []

diff --git a/tests/dependencies/is_into_dataframe_test.py b/tests/dependencies/is_into_dataframe_test.py
@@ -2,6 +2,7 @@
 
 from typing import TYPE_CHECKING
 from typing import Any
+from typing import Mapping
 
 import numpy as np
 import pandas as pd
@@ -16,7 +17,7 @@
 
 
 class DictDataFrame:
-    def __init__(self: Self, data: dict[str, list[Any]]) -> None:
+    def __init__(self: Self, data: Mapping[str, Any]) -> None:
         self._data = data
 
     def __len__(self) -> int:  # pragma: no cover
@@ -27,7 +28,7 @@ def __narwhals_dataframe__(self) -> Self:  # pragma: no cover
 
 
 def test_is_into_dataframe() -> None:
-    data = {"a": [1, 2, 3], "b": [4, 5, 6]}
+    data: dict[str, Any] = {"a": [1, 2, 3], "b": [4, 5, 6]}
     assert is_into_dataframe(pa.table(data))
     assert is_into_dataframe(pl.DataFrame(data))
     assert is_into_dataframe(pd.DataFrame(data))

diff --git a/tests/dtypes_test.py b/tests/dtypes_test.py
@@ -15,8 +15,10 @@
 import narwhals.stable.v1 as nw
 from tests.utils import PANDAS_VERSION
 from tests.utils import POLARS_VERSION
+from tests.utils import PYARROW_VERSION
 
 if TYPE_CHECKING:
+    from narwhals.typing import IntoSeries
     from tests.utils import Constructor
 
 
@@ -135,6 +137,8 @@ def test_struct_hashes() -> None:
 def test_2d_array(constructor: Constructor, request: pytest.FixtureRequest) -> None:
     if any(x in str(constructor) for x in ("dask", "modin", "cudf", "pyspark")):
         request.applymarker(pytest.mark.xfail)
+    if "pyarrow_table" in str(constructor) and PYARROW_VERSION < (14,):
+        request.applymarker(pytest.mark.xfail)
     data = {"a": [[[1, 2], [3, 4], [5, 6]]]}
     df = nw.from_native(constructor(data)).with_columns(
         a=nw.col("a").cast(nw.Array(nw.Int64(), (3, 2)))
@@ -144,13 +148,15 @@ def test_2d_array(constructor: Constructor, request: pytest.FixtureRequest) -> N
 
 
 def test_second_time_unit() -> None:
-    s = pd.Series(np.array([np.datetime64("2020-01-01", "s")]))
+    s: IntoSeries = pd.Series(np.array([np.datetime64("2020-01-01", "s")]))
     result = nw.from_native(s, series_only=True)
     if PANDAS_VERSION < (2,):  # pragma: no cover
         assert result.dtype == nw.Datetime("ns")
     else:
         assert result.dtype == nw.Datetime("s")
-    s = pa.chunked_array([pa.array([datetime(2020, 1, 1)], type=pa.timestamp("s"))])
+    ts_sec = pa.timestamp("s")
+    dur_sec = pa.duration("s")
+    s = pa.chunked_array([pa.array([datetime(2020, 1, 1)], type=ts_sec)], type=ts_sec)
     result = nw.from_native(s, series_only=True)
     assert result.dtype == nw.Datetime("s")
     s = pd.Series(np.array([np.timedelta64(1, "s")]))
@@ -159,7 +165,7 @@ def test_second_time_unit() -> None:
         assert result.dtype == nw.Duration("ns")
     else:
         assert result.dtype == nw.Duration("s")
-    s = pa.chunked_array([pa.array([timedelta(1)], type=pa.duration("s"))])
+    s = pa.chunked_array([pa.array([timedelta(1)], type=dur_sec)], type=dur_sec)
     result = nw.from_native(s, series_only=True)
     assert result.dtype == nw.Duration("s")
 

diff --git a/tests/expr_and_series/arithmetic_test.py b/tests/expr_and_series/arithmetic_test.py
@@ -166,9 +166,9 @@ def test_floordiv(left: int, right: int) -> None:
     # test is a bit manual unfortunately
     assume(right != 0)
     expected = {"a": [left // right]}
-    result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
-        nw.col("a") // right
-    )
+    result: nw.DataFrame[Any] = nw.from_native(
+        pd.DataFrame({"a": [left]}), eager_only=True
+    ).select(nw.col("a") // right)
     assert_equal_data(result, expected)
     if PANDAS_VERSION < (2, 2):  # pragma: no cover
         # Bug in old version of pandas
@@ -201,9 +201,9 @@ def test_mod(left: int, right: int) -> None:
     # test is a bit manual unfortunately
     assume(right != 0)
     expected = {"a": [left % right]}
-    result = nw.from_native(pd.DataFrame({"a": [left]}), eager_only=True).select(
-        nw.col("a") % right
-    )
+    result: nw.DataFrame[Any] = nw.from_native(
+        pd.DataFrame({"a": [left]}), eager_only=True
+    ).select(nw.col("a") % right)
     assert_equal_data(result, expected)
     result = nw.from_native(
         pd.DataFrame({"a": [left]}).convert_dtypes(), eager_only=True

diff --git a/tests/expr_and_series/nth_test.py b/tests/expr_and_series/nth_test.py
@@ -1,5 +1,8 @@
 from __future__ import annotations
 
+from typing import Any
+from typing import Mapping
+
 import polars as pl
 import pytest
 
@@ -8,7 +11,7 @@
 from tests.utils import Constructor
 from tests.utils import assert_equal_data
 
-data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
+data: Mapping[str, Any] = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.1, 8.0, 9.0]}
 
 
 @pytest.mark.parametrize(

diff --git a/tests/frame/getitem_test.py b/tests/frame/getitem_test.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
 from typing import Any
+from typing import cast
 
 import numpy as np
 import pandas as pd
@@ -12,7 +14,10 @@
 from tests.utils import ConstructorEager
 from tests.utils import assert_equal_data
 
-data = {
+if TYPE_CHECKING:
+    from narwhals.typing import _1DArray
+
+data: dict[str, Any] = {
     "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
     "b": [11, 12, 13, 14, 15, 16],
 }
@@ -74,7 +79,8 @@ def test_gather(constructor_eager: ConstructorEager) -> None:
         "b": [11, 14, 12],
     }
     assert_equal_data(result, expected)
-    result = df[np.array([0, 3, 1])]
+    arr = cast("_1DArray", np.array([0, 3, 1]))
+    result = df[arr]
     assert_equal_data(result, expected)
 
 
@@ -96,10 +102,10 @@ def test_gather_rows_cols(constructor_eager: ConstructorEager) -> None:
 
     expected = {"b": [11, 14, 12]}
 
-    result = {"b": df[[0, 3, 1], 1]}
+    result: Any = {"b": df[[0, 3, 1], 1]}
     assert_equal_data(result, expected)
-
-    result = {"b": df[np.array([0, 3, 1]), "b"]}
+    arr = cast("_1DArray", np.array([0, 3, 1]))
+    result = {"b": df[arr, "b"]}
     assert_equal_data(result, expected)
 
 

diff --git a/tests/frame/interchange_native_namespace_test.py b/tests/frame/interchange_native_namespace_test.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
+from typing import Any
+from typing import Mapping
+
 import polars as pl
 import pytest
 
 import narwhals.stable.v1 as nw
 
-data = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}
+data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.5, 6.7, 8.9], "z": ["x", "y", "w"]}
 
 
 def test_interchange() -> None:

diff --git a/tests/frame/interchange_select_test.py b/tests/frame/interchange_select_test.py
@@ -2,6 +2,7 @@
 
 from typing import TYPE_CHECKING
 from typing import Any
+from typing import Mapping
 
 import polars as pl
 import pytest
@@ -11,7 +12,7 @@
 if TYPE_CHECKING:
     from typing_extensions import Self
 
-data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
+data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
 
 
 class InterchangeDataFrame:

diff --git a/tests/frame/interchange_to_arrow_test.py b/tests/frame/interchange_to_arrow_test.py
@@ -1,12 +1,15 @@
 from __future__ import annotations
 
+from typing import Any
+from typing import Mapping
+
 import polars as pl
 import pyarrow as pa
 import pytest
 
 import narwhals.stable.v1 as nw
 
-data = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
+data: Mapping[str, Any] = {"a": [1, 2, 3], "b": [4.0, 5.0, 6.1], "z": ["x", "y", "z"]}
 
 
 def test_interchange_to_arrow() -> None:

diff --git a/tests/frame/invalid_test.py b/tests/frame/invalid_test.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import pandas as pd
 import polars as pl
 import pyarrow as pa
@@ -8,10 +10,13 @@
 import narwhals.stable.v1 as nw
 from tests.utils import NUMPY_VERSION
 
+if TYPE_CHECKING:
+    from narwhals.typing import Frame
+
 
 def test_invalid() -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}
-    df = nw.from_native(pa.table({"a": [1, 2], "b": [3, 4]}))
+    df: Frame = nw.from_native(pa.table({"a": [1, 2], "b": [3, 4]}))
     with pytest.raises(ValueError, match="Multi-output"):
         df.select(nw.all() + nw.all())
     df = nw.from_native(pd.DataFrame(data))
@@ -24,14 +29,14 @@ def test_invalid() -> None:
 
 
 def test_native_vs_non_native() -> None:
-    s = pd.Series([1, 2, 3])
-    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    s_pd = pd.Series([1, 2, 3])
+    df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
     with pytest.raises(TypeError, match="Perhaps you forgot"):
-        nw.from_native(df).filter(s > 1)
-    s = pl.Series([1, 2, 3])
-    df = pl.DataFrame({"a": [2, 2, 3], "b": [4, 5, 6]})
+        nw.from_native(df_pd).filter(s_pd > 1)  # type: ignore[arg-type]
+    s_pl = pl.Series([1, 2, 3])
+    df_pl = pl.DataFrame({"a": [2, 2, 3], "b": [4, 5, 6]})
     with pytest.raises(TypeError, match="Perhaps you\n- forgot"):
-        nw.from_native(df).filter(s > 1)
+        nw.from_native(df_pl).filter(s_pl > 1)
 
 
 def test_validate_laziness() -> None: