diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 04e49cab0..09a3adf8f 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -360,7 +360,7 @@ def shift(self, n: int) -> Self: ) def cum_sum(self: Self, *, reverse: bool) -> Self: - if reverse: + if reverse: # pragma: no cover msg = "`cum_sum(reverse=True)` is not supported with Dask backend" raise NotImplementedError(msg) @@ -371,7 +371,7 @@ def cum_sum(self: Self, *, reverse: bool) -> Self: ) def cum_count(self: Self, *, reverse: bool) -> Self: - if reverse: + if reverse: # pragma: no cover msg = "`cum_count(reverse=True)` is not supported with Dask backend" raise NotImplementedError(msg) @@ -382,7 +382,7 @@ def cum_count(self: Self, *, reverse: bool) -> Self: ) def cum_min(self: Self, *, reverse: bool) -> Self: - if reverse: + if reverse: # pragma: no cover msg = "`cum_min(reverse=True)` is not supported with Dask backend" raise NotImplementedError(msg) @@ -393,7 +393,7 @@ def cum_min(self: Self, *, reverse: bool) -> Self: ) def cum_max(self: Self, *, reverse: bool) -> Self: - if reverse: + if reverse: # pragma: no cover msg = "`cum_max(reverse=True)` is not supported with Dask backend" raise NotImplementedError(msg) @@ -404,7 +404,7 @@ def cum_max(self: Self, *, reverse: bool) -> Self: ) def cum_prod(self: Self, *, reverse: bool) -> Self: - if reverse: + if reverse: # pragma: no cover msg = "`cum_prod(reverse=True)` is not supported with Dask backend" raise NotImplementedError(msg) diff --git a/narwhals/_dask/group_by.py b/narwhals/_dask/group_by.py index 2fbb4edb6..b92cf471b 100644 --- a/narwhals/_dask/group_by.py +++ b/narwhals/_dask/group_by.py @@ -51,7 +51,7 @@ def var( try: import dask.dataframe.dask_expr as dx - except ModuleNotFoundError: + except ModuleNotFoundError: # pragma: no cover import dask_expr as dx return partial(dx._groupby.GroupBy.var, ddof=ddof) @@ -66,7 +66,7 @@ def std( try: import dask.dataframe.dask_expr as dx - except ModuleNotFoundError: + except ModuleNotFoundError: # pragma: no cover import dask_expr as dx return partial(dx._groupby.GroupBy.std, ddof=ddof) diff --git a/narwhals/_dask/utils.py b/narwhals/_dask/utils.py index cd303d8ec..d14383869 100644 --- a/narwhals/_dask/utils.py +++ b/narwhals/_dask/utils.py @@ -89,7 +89,7 @@ def add_row_index( def validate_comparand(lhs: dx.Series, rhs: dx.Series) -> None: try: import dask.dataframe.dask_expr as dx - except ModuleNotFoundError: + except ModuleNotFoundError: # pragma: no cover import dask_expr as dx if not dx._expr.are_co_aligned(lhs._expr, rhs._expr): # pragma: no cover diff --git a/narwhals/_expression_parsing.py b/narwhals/_expression_parsing.py index ac7e645f2..dc52da002 100644 --- a/narwhals/_expression_parsing.py +++ b/narwhals/_expression_parsing.py @@ -27,6 +27,7 @@ from narwhals.typing import CompliantNamespace from narwhals.typing import CompliantSeries from narwhals.typing import CompliantSeriesT_co + from narwhals.typing import IntoExpr IntoCompliantExpr: TypeAlias = ( CompliantExpr[CompliantSeriesT_co] | str | CompliantSeriesT_co @@ -334,3 +335,10 @@ def extract_compliant( if isinstance(other, Series): return other._compliant_series return other + + +def operation_is_order_dependent(*args: IntoExpr | Any) -> bool: + # If an arg is an Expr, we look at `_is_order_dependent`. If it isn't, + # it means that it was a scalar (e.g. nw.col('a') + 1) or a column name, + # neither of which is order-dependent, so we default to `False`. + return any(getattr(x, "_is_order_dependent", False) for x in args) diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index e45041679..b055fed12 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -1,5 +1,6 @@ from __future__ import annotations +from abc import abstractmethod from typing import TYPE_CHECKING from typing import Any from typing import Callable @@ -15,6 +16,7 @@ from narwhals.dependencies import get_polars from narwhals.dependencies import is_numpy_array +from narwhals.exceptions import OrderDependentExprError from narwhals.schema import Schema from narwhals.translate import to_native from narwhals.utils import find_stacklevel @@ -70,25 +72,9 @@ def _flatten_and_extract(self, *args: Any, **kwargs: Any) -> Any: kwargs = {k: self._extract_compliant(v) for k, v in kwargs.items()} return args, kwargs + @abstractmethod def _extract_compliant(self, arg: Any) -> Any: - from narwhals.expr import Expr - from narwhals.series import Series - - if isinstance(arg, BaseFrame): - return arg._compliant_frame - if isinstance(arg, Series): - return arg._compliant_series - if isinstance(arg, Expr): - return arg._to_compliant_expr(self.__narwhals_namespace__()) - if get_polars() is not None and "polars" in str(type(arg)): - msg = ( - f"Expected Narwhals object, got: {type(arg)}.\n\n" - "Perhaps you:\n" - "- Forgot a `nw.from_native` somewhere?\n" - "- Used `pl.col` instead of `nw.col`?" - ) - raise TypeError(msg) - return arg + raise NotImplementedError @property def schema(self) -> Schema: @@ -361,6 +347,26 @@ class DataFrame(BaseFrame[DataFrameT]): ``` """ + def _extract_compliant(self, arg: Any) -> Any: + from narwhals.expr import Expr + from narwhals.series import Series + + if isinstance(arg, BaseFrame): + return arg._compliant_frame + if isinstance(arg, Series): + return arg._compliant_series + if isinstance(arg, Expr): + return arg._to_compliant_expr(self.__narwhals_namespace__()) + if get_polars() is not None and "polars" in str(type(arg)): + msg = ( + f"Expected Narwhals object, got: {type(arg)}.\n\n" + "Perhaps you:\n" + "- Forgot a `nw.from_native` somewhere?\n" + "- Used `pl.col` instead of `nw.col`?" + ) + raise TypeError(msg) + return arg + @property def _series(self) -> type[Series[Any]]: from narwhals.series import Series @@ -3621,6 +3627,40 @@ class LazyFrame(BaseFrame[FrameT]): ``` """ + def _extract_compliant(self, arg: Any) -> Any: + from narwhals.expr import Expr + from narwhals.series import Series + + if isinstance(arg, BaseFrame): + return arg._compliant_frame + if isinstance(arg, Series): # pragma: no cover + msg = "Binary operations between Series and LazyFrame are not supported." + raise TypeError(msg) + if isinstance(arg, Expr): + if arg._is_order_dependent: + msg = ( + "Order-dependent expressions are not supported for use in LazyFrame.\n\n" + "Hints:\n" + "- Instead of `lf.select(nw.col('a').sort())`, use `lf.select('a').sort()\n" + "- Instead of `lf.select(nw.col('a').head())`, use `lf.select('a').head()\n" + "- `Expr.cum_sum`, and other such expressions, are not currently supported.\n" + " In a future version of Narwhals, a `order_by` argument will be added and \n" + " they will be supported." + ) + raise OrderDependentExprError(msg) + return arg._to_compliant_expr(self.__narwhals_namespace__()) + if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover + msg = ( + f"Expected Narwhals object, got: {type(arg)}.\n\n" + "Perhaps you:\n" + "- Forgot a `nw.from_native` somewhere?\n" + "- Used `pl.col` instead of `nw.col`?" + ) + raise TypeError(msg) + # TODO(unassigned): should this line even be reachable? Should we + # be raising here? + return arg # pragma: no cover + @property def _dataframe(self) -> type[DataFrame[Any]]: return DataFrame diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py index 43904a0ba..df671be3a 100644 --- a/narwhals/dependencies.py +++ b/narwhals/dependencies.py @@ -92,7 +92,7 @@ def get_ibis() -> Any: return sys.modules.get("ibis", None) -def get_dask_expr() -> Any: +def get_dask_expr() -> Any: # pragma: no cover """Get dask_expr module (if already imported - else return None).""" return sys.modules.get("dask_expr", None) diff --git a/narwhals/exceptions.py b/narwhals/exceptions.py index 61447e54f..9b05e3ba8 100644 --- a/narwhals/exceptions.py +++ b/narwhals/exceptions.py @@ -83,6 +83,14 @@ def from_expr_name(cls, expr_name: str) -> AnonymousExprError: return AnonymousExprError(message) +class OrderDependentExprError(ValueError): + """Exception raised when trying to use an order-dependent expressions with LazyFrames.""" + + def __init__(self, message: str) -> None: + self.message = message + super().__init__(self.message) + + class UnsupportedDTypeError(ValueError): """Exception raised when trying to convert to a DType which is not supported by the given backend.""" diff --git a/narwhals/expr.py b/narwhals/expr.py index 119189b59..69c2e7dcc 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -9,6 +9,7 @@ from typing import Sequence from narwhals._expression_parsing import extract_compliant +from narwhals._expression_parsing import operation_is_order_dependent from narwhals.dtypes import _validate_dtype from narwhals.expr_cat import ExprCatNamespace from narwhals.expr_dt import ExprDateTimeNamespace @@ -29,14 +30,21 @@ class Expr: - def __init__(self, to_compliant_expr: Callable[[Any], Any]) -> None: + def __init__( + self, + to_compliant_expr: Callable[[Any], Any], + is_order_dependent: bool, # noqa: FBT001 + ) -> None: # callable from CompliantNamespace to CompliantExpr self._to_compliant_expr = to_compliant_expr + self._is_order_dependent = is_order_dependent def _taxicab_norm(self) -> Self: # This is just used to test out the stable api feature in a realistic-ish way. # It's not intended to be used. - return self.__class__(lambda plx: self._to_compliant_expr(plx).abs().sum()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).abs().sum(), self._is_order_dependent + ) # --- convert --- def alias(self, name: str) -> Self: @@ -92,7 +100,10 @@ def alias(self, name: str) -> Self: c: [[14,15]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).alias(name)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).alias(name), + is_order_dependent=self._is_order_dependent, + ) def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self: """Pipe function call. @@ -213,24 +224,32 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self: _validate_dtype(dtype) return self.__class__( lambda plx: self._to_compliant_expr(plx).cast(dtype), + is_order_dependent=self._is_order_dependent, ) # --- binary --- def __eq__(self, other: object) -> Self: # type: ignore[override] return self.__class__( - lambda plx: self._to_compliant_expr(plx).__eq__(extract_compliant(plx, other)) + lambda plx: self._to_compliant_expr(plx).__eq__( + extract_compliant(plx, other) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __ne__(self, other: object) -> Self: # type: ignore[override] return self.__class__( - lambda plx: self._to_compliant_expr(plx).__ne__(extract_compliant(plx, other)) + lambda plx: self._to_compliant_expr(plx).__ne__( + extract_compliant(plx, other) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __and__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__and__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __rand__(self, other: Any) -> Self: @@ -239,11 +258,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __or__(self, other: Any) -> Self: return self.__class__( - lambda plx: self._to_compliant_expr(plx).__or__(extract_compliant(plx, other)) + lambda plx: self._to_compliant_expr(plx).__or__( + extract_compliant(plx, other) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __ror__(self, other: Any) -> Self: @@ -252,13 +277,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __add__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__add__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __radd__(self, other: Any) -> Self: @@ -267,13 +296,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __sub__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__sub__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __rsub__(self, other: Any) -> Self: @@ -282,13 +315,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __truediv__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__truediv__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __rtruediv__(self, other: Any) -> Self: @@ -297,13 +334,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __mul__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__mul__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __rmul__(self, other: Any) -> Self: @@ -312,33 +353,49 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __le__(self, other: Any) -> Self: return self.__class__( - lambda plx: self._to_compliant_expr(plx).__le__(extract_compliant(plx, other)) + lambda plx: self._to_compliant_expr(plx).__le__( + extract_compliant(plx, other) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __lt__(self, other: Any) -> Self: return self.__class__( - lambda plx: self._to_compliant_expr(plx).__lt__(extract_compliant(plx, other)) + lambda plx: self._to_compliant_expr(plx).__lt__( + extract_compliant(plx, other) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __gt__(self, other: Any) -> Self: return self.__class__( - lambda plx: self._to_compliant_expr(plx).__gt__(extract_compliant(plx, other)) + lambda plx: self._to_compliant_expr(plx).__gt__( + extract_compliant(plx, other) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __ge__(self, other: Any) -> Self: return self.__class__( - lambda plx: self._to_compliant_expr(plx).__ge__(extract_compliant(plx, other)) + lambda plx: self._to_compliant_expr(plx).__ge__( + extract_compliant(plx, other) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __pow__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__pow__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __rpow__(self, other: Any) -> Self: @@ -347,13 +404,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __floordiv__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__floordiv__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __rfloordiv__(self, other: Any) -> Self: @@ -362,13 +423,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) def __mod__(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).__mod__( extract_compliant(plx, other) - ) + ), + is_order_dependent=operation_is_order_dependent(self, other), ) def __rmod__(self, other: Any) -> Self: @@ -377,11 +442,17 @@ def func(plx: CompliantNamespace[Any]) -> CompliantExpr[Any]: extract_compliant(plx, self) ) - return self.__class__(func) + return self.__class__( + func, + is_order_dependent=operation_is_order_dependent(self, other), + ) # --- unary --- def __invert__(self) -> Self: - return self.__class__(lambda plx: self._to_compliant_expr(plx).__invert__()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).__invert__(), + is_order_dependent=self._is_order_dependent, + ) def any(self) -> Self: """Return whether any of the values in the column are `True`. @@ -432,7 +503,10 @@ def any(self) -> Self: a: [[true]] b: [[true]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).any()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).any(), + is_order_dependent=self._is_order_dependent, + ) def all(self) -> Self: """Return whether all values in the column are `True`. @@ -483,7 +557,10 @@ def all(self) -> Self: a: [[false]] b: [[true]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).all()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).all(), + is_order_dependent=self._is_order_dependent, + ) def ewm_mean( self: Self, @@ -584,7 +661,8 @@ def ewm_mean( adjust=adjust, min_periods=min_periods, ignore_nulls=ignore_nulls, - ) + ), + is_order_dependent=self._is_order_dependent, ) def mean(self) -> Self: @@ -636,7 +714,10 @@ def mean(self) -> Self: a: [[0]] b: [[4]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).mean()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).mean(), + is_order_dependent=self._is_order_dependent, + ) def median(self) -> Self: """Get median value. @@ -690,7 +771,10 @@ def median(self) -> Self: a: [[3]] b: [[4]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).median()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).median(), + is_order_dependent=self._is_order_dependent, + ) def std(self, *, ddof: int = 1) -> Self: """Get standard deviation. @@ -744,7 +828,10 @@ def std(self, *, ddof: int = 1) -> Self: b: [[1.2657891697365016]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).std(ddof=ddof)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).std(ddof=ddof), + is_order_dependent=self._is_order_dependent, + ) def var(self, *, ddof: int = 1) -> Self: """Get variance. @@ -799,7 +886,10 @@ def var(self, *, ddof: int = 1) -> Self: a: [[316.6666666666667]] b: [[1.6022222222222222]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).var(ddof=ddof)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).var(ddof=ddof), + is_order_dependent=self._is_order_dependent, + ) def map_batches( self, @@ -873,7 +963,8 @@ def map_batches( return self.__class__( lambda plx: self._to_compliant_expr(plx).map_batches( function=function, return_dtype=return_dtype - ) + ), + is_order_dependent=True, # safest assumption ) def skew(self: Self) -> Self: @@ -925,7 +1016,10 @@ def skew(self: Self) -> Self: a: [[0]] b: [[1.4724267269058975]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).skew()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).skew(), + is_order_dependent=self._is_order_dependent, + ) def sum(self) -> Expr: """Return the sum value. @@ -974,7 +1068,10 @@ def sum(self) -> Expr: a: [[15]] b: [[150]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).sum()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).sum(), + is_order_dependent=self._is_order_dependent, + ) def min(self) -> Self: """Returns the minimum value(s) from a column(s). @@ -1025,7 +1122,10 @@ def min(self) -> Self: a: [[1]] b: [[3]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).min()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).min(), + is_order_dependent=self._is_order_dependent, + ) def max(self) -> Self: """Returns the maximum value(s) from a column(s). @@ -1076,7 +1176,10 @@ def max(self) -> Self: a: [[20]] b: [[100]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).max()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).max(), + is_order_dependent=self._is_order_dependent, + ) def arg_min(self) -> Self: """Returns the index of the minimum value. @@ -1129,7 +1232,9 @@ def arg_min(self) -> Self: a_arg_min: [[0]] b_arg_min: [[1]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).arg_min()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).arg_min(), is_order_dependent=True + ) def arg_max(self) -> Self: """Returns the index of the maximum value. @@ -1182,7 +1287,9 @@ def arg_max(self) -> Self: a_arg_max: [[1]] b_arg_max: [[0]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).arg_max()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).arg_max(), is_order_dependent=True + ) def count(self) -> Self: """Returns the number of non-null elements in the column. @@ -1233,7 +1340,9 @@ def count(self) -> Self: a: [[3]] b: [[2]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).count()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).count(), self._is_order_dependent + ) def n_unique(self) -> Self: """Returns count of unique values. @@ -1282,7 +1391,9 @@ def n_unique(self) -> Self: a: [[5]] b: [[3]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).n_unique()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).n_unique(), self._is_order_dependent + ) def unique(self, *, maintain_order: bool = False) -> Self: """Return unique values of this expression. @@ -1343,7 +1454,10 @@ def unique(self, *, maintain_order: bool = False) -> Self: b: [[2,4,6]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).unique(maintain_order=maintain_order) + lambda plx: self._to_compliant_expr(plx).unique( + maintain_order=maintain_order + ), + self._is_order_dependent, ) def abs(self) -> Self: @@ -1397,7 +1511,9 @@ def abs(self) -> Self: a: [[1,2]] b: [[3,4]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).abs()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).abs(), self._is_order_dependent + ) def cum_sum(self: Self, *, reverse: bool = False) -> Self: """Return cumulative sum. @@ -1458,7 +1574,8 @@ def cum_sum(self: Self, *, reverse: bool = False) -> Self: b: [[2,6,10,16,22]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).cum_sum(reverse=reverse) + lambda plx: self._to_compliant_expr(plx).cum_sum(reverse=reverse), + is_order_dependent=True, ) def diff(self) -> Self: @@ -1525,7 +1642,9 @@ def diff(self) -> Self: ---- a_diff: [[null,0,2,2,0]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).diff()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).diff(), is_order_dependent=True + ) def shift(self, n: int) -> Self: """Shift values by `n` positions. @@ -1594,7 +1713,9 @@ def shift(self, n: int) -> Self: ---- a_shift: [[null,1,1,3,5]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).shift(n)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).shift(n), is_order_dependent=True + ) def replace_strict( self, @@ -1685,7 +1806,8 @@ def replace_strict( return self.__class__( lambda plx: self._to_compliant_expr(plx).replace_strict( old, new, return_dtype=return_dtype - ) + ), + self._is_order_dependent, ) def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: @@ -1715,7 +1837,8 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).sort( descending=descending, nulls_last=nulls_last - ) + ), + is_order_dependent=True, ) # --- transform --- @@ -1789,7 +1912,10 @@ def is_between( extract_compliant(plx, lower_bound), extract_compliant(plx, upper_bound), closed, - ) + ), + is_order_dependent=operation_is_order_dependent( + self, lower_bound, upper_bound + ), ) def is_in(self, other: Any) -> Self: @@ -1854,7 +1980,8 @@ def is_in(self, other: Any) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).is_in( extract_compliant(plx, other) - ) + ), + self._is_order_dependent, ) else: msg = "Narwhals `is_in` doesn't accept expressions as an argument, as opposed to Polars. You should provide an iterable instead." @@ -1919,10 +2046,12 @@ def filter(self, *predicates: Any) -> Self: a: [[5,6,7]] b: [[10,11,12]] """ + flat_predicates = flatten(predicates) return self.__class__( lambda plx: self._to_compliant_expr(plx).filter( - *[extract_compliant(plx, pred) for pred in flatten(predicates)], - ) + *[extract_compliant(plx, pred) for pred in flat_predicates], + ), + is_order_dependent=operation_is_order_dependent(*flat_predicates), ) def is_null(self) -> Self: @@ -2001,7 +2130,9 @@ def is_null(self) -> Self: a_is_null: [[false,false,true,false,false]] b_is_null: [[false,false,true,false,false]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).is_null()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).is_null(), self._is_order_dependent + ) def is_nan(self) -> Self: """Indicate which values are NaN. @@ -2066,7 +2197,9 @@ def is_nan(self) -> Self: divided: [[nan,null,1]] divided_is_nan: [[true,null,false]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).is_nan()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).is_nan(), self._is_order_dependent + ) def arg_true(self) -> Self: """Find elements where boolean expression is True. @@ -2117,7 +2250,9 @@ def arg_true(self) -> Self: ---- a: [[1,2]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).arg_true()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).arg_true(), is_order_dependent=True + ) def fill_null( self, @@ -2259,7 +2394,8 @@ def fill_null( return self.__class__( lambda plx: self._to_compliant_expr(plx).fill_null( value=value, strategy=strategy, limit=limit - ) + ), + self._is_order_dependent, ) # --- partial reduction --- @@ -2320,7 +2456,10 @@ def drop_nulls(self) -> Self: ---- a: [[2,4,3,5]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).drop_nulls()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).drop_nulls(), + self._is_order_dependent, + ) def sample( self: Self, @@ -2359,7 +2498,8 @@ def sample( return self.__class__( lambda plx: self._to_compliant_expr(plx).sample( n, fraction=fraction, with_replacement=with_replacement, seed=seed - ) + ), + self._is_order_dependent, ) def over(self, *keys: str | Iterable[str]) -> Self: @@ -2450,7 +2590,8 @@ def over(self, *keys: str | Iterable[str]) -> Self: └─────┴─────┴─────┘ """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).over(flatten(keys)) + lambda plx: self._to_compliant_expr(plx).over(flatten(keys)), + self._is_order_dependent, ) def is_duplicated(self) -> Self: @@ -2508,7 +2649,10 @@ def is_duplicated(self) -> Self: a: [[true,false,false,true]] b: [[true,true,false,false]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).is_duplicated()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).is_duplicated(), + self._is_order_dependent, + ) def is_unique(self) -> Self: r"""Return a boolean mask indicating unique values. @@ -2565,7 +2709,9 @@ def is_unique(self) -> Self: a: [[false,true,true,false]] b: [[false,false,true,true]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).is_unique()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).is_unique(), self._is_order_dependent + ) def null_count(self) -> Self: r"""Count null values. @@ -2621,7 +2767,10 @@ def null_count(self) -> Self: a: [[1]] b: [[2]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).null_count()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).null_count(), + self._is_order_dependent, + ) def is_first_distinct(self) -> Self: r"""Return a boolean mask indicating the first occurrence of each distinct value. @@ -2679,7 +2828,8 @@ def is_first_distinct(self) -> Self: b: [[true,false,true,true]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).is_first_distinct() + lambda plx: self._to_compliant_expr(plx).is_first_distinct(), + is_order_dependent=True, ) def is_last_distinct(self) -> Self: @@ -2737,7 +2887,10 @@ def is_last_distinct(self) -> Self: a: [[false,true,true,true]] b: [[false,true,true,true]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).is_last_distinct()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).is_last_distinct(), + is_order_dependent=True, + ) def quantile( self, @@ -2806,7 +2959,8 @@ def quantile( b: [[74.5]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).quantile(quantile, interpolation) + lambda plx: self._to_compliant_expr(plx).quantile(quantile, interpolation), + self._is_order_dependent, ) def head(self, n: int = 10) -> Self: @@ -2832,7 +2986,9 @@ def head(self, n: int = 10) -> Self: "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" ) issue_deprecation_warning(msg, _version="1.22.0") - return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).head(n), is_order_dependent=True + ) def tail(self, n: int = 10) -> Self: r"""Get the last `n` rows. @@ -2857,7 +3013,9 @@ def tail(self, n: int = 10) -> Self: "See https://narwhals-dev.github.io/narwhals/backcompat/ for more information.\n" ) issue_deprecation_warning(msg, _version="1.22.0") - return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).tail(n), is_order_dependent=True + ) def round(self, decimals: int = 0) -> Self: r"""Round underlying floating point data by `decimals` digits. @@ -2922,7 +3080,10 @@ def round(self, decimals: int = 0) -> Self: ---- a: [[1.1,2.6,3.9]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).round(decimals)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).round(decimals), + self._is_order_dependent, + ) def len(self) -> Self: r"""Return the number of elements in the column. @@ -2979,7 +3140,9 @@ def len(self) -> Self: a1: [[2]] a2: [[1]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).len()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).len(), self._is_order_dependent + ) def gather_every(self: Self, n: int, offset: int = 0) -> Self: r"""Take every nth value in the Series and return as new Series. @@ -3006,7 +3169,8 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: ) issue_deprecation_warning(msg, _version="1.22.0") return self.__class__( - lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) + lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset), + is_order_dependent=True, ) # need to allow numeric typing @@ -3153,7 +3317,10 @@ def clip( lambda plx: self._to_compliant_expr(plx).clip( extract_compliant(plx, lower_bound), extract_compliant(plx, upper_bound), - ) + ), + is_order_dependent=operation_is_order_dependent( + self, lower_bound, upper_bound + ), ) def mode(self: Self) -> Self: @@ -3208,7 +3375,9 @@ def mode(self: Self) -> Self: ---- a: [[1]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).mode()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).mode(), self._is_order_dependent + ) def is_finite(self: Self) -> Self: """Returns boolean values indicating which original values are finite. @@ -3268,7 +3437,9 @@ def is_finite(self: Self) -> Self: ---- a: [[false,false,true,null]] """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).is_finite()) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).is_finite(), self._is_order_dependent + ) def cum_count(self: Self, *, reverse: bool = False) -> Self: r"""Return the cumulative count of the non-null values in the column. @@ -3334,7 +3505,8 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self: cum_count_reverse: [[3,2,1,1]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).cum_count(reverse=reverse) + lambda plx: self._to_compliant_expr(plx).cum_count(reverse=reverse), + is_order_dependent=True, ) def cum_min(self: Self, *, reverse: bool = False) -> Self: @@ -3401,7 +3573,8 @@ def cum_min(self: Self, *, reverse: bool = False) -> Self: cum_min_reverse: [[1,1,null,2]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).cum_min(reverse=reverse) + lambda plx: self._to_compliant_expr(plx).cum_min(reverse=reverse), + is_order_dependent=True, ) def cum_max(self: Self, *, reverse: bool = False) -> Self: @@ -3468,7 +3641,8 @@ def cum_max(self: Self, *, reverse: bool = False) -> Self: cum_max_reverse: [[3,3,null,2]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).cum_max(reverse=reverse) + lambda plx: self._to_compliant_expr(plx).cum_max(reverse=reverse), + is_order_dependent=True, ) def cum_prod(self: Self, *, reverse: bool = False) -> Self: @@ -3535,7 +3709,8 @@ def cum_prod(self: Self, *, reverse: bool = False) -> Self: cum_prod_reverse: [[6,6,null,2]] """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).cum_prod(reverse=reverse) + lambda plx: self._to_compliant_expr(plx).cum_prod(reverse=reverse), + is_order_dependent=True, ) def rolling_sum( @@ -3629,7 +3804,8 @@ def rolling_sum( window_size=window_size, min_periods=min_periods, center=center, - ) + ), + is_order_dependent=True, ) def rolling_mean( @@ -3723,7 +3899,8 @@ def rolling_mean( window_size=window_size, min_periods=min_periods, center=center, - ) + ), + is_order_dependent=True, ) def rolling_var( @@ -3817,7 +3994,8 @@ def rolling_var( return self.__class__( lambda plx: self._to_compliant_expr(plx).rolling_var( window_size=window_size, min_periods=min_periods, center=center, ddof=ddof - ) + ), + is_order_dependent=True, ) def rolling_std( @@ -3914,7 +4092,8 @@ def rolling_std( min_periods=min_periods, center=center, ddof=ddof, - ) + ), + is_order_dependent=True, ) def rank( @@ -4011,7 +4190,8 @@ def rank( return self.__class__( lambda plx: self._to_compliant_expr(plx).rank( method=method, descending=descending - ) + ), + is_order_dependent=True, ) @property diff --git a/narwhals/expr_cat.py b/narwhals/expr_cat.py index ada8e3a45..baf467df3 100644 --- a/narwhals/expr_cat.py +++ b/narwhals/expr_cat.py @@ -62,5 +62,6 @@ def get_categories(self: Self) -> ExprT: └────────┘ """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).cat.get_categories() + lambda plx: self._expr._to_compliant_expr(plx).cat.get_categories(), + self._expr._is_order_dependent, ) diff --git a/narwhals/expr_dt.py b/narwhals/expr_dt.py index d0676dd9b..6b981315d 100644 --- a/narwhals/expr_dt.py +++ b/narwhals/expr_dt.py @@ -71,7 +71,8 @@ def date(self: Self) -> ExprT: a: [[2012-01-07,2023-03-10]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.date() + lambda plx: self._expr._to_compliant_expr(plx).dt.date(), + self._expr._is_order_dependent, ) def year(self: Self) -> ExprT: @@ -139,7 +140,8 @@ def year(self: Self) -> ExprT: year: [[1978,2024,2065]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.year() + lambda plx: self._expr._to_compliant_expr(plx).dt.year(), + self._expr._is_order_dependent, ) def month(self: Self) -> ExprT: @@ -207,7 +209,8 @@ def month(self: Self) -> ExprT: month: [[6,12,1]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.month() + lambda plx: self._expr._to_compliant_expr(plx).dt.month(), + self._expr._is_order_dependent, ) def day(self: Self) -> ExprT: @@ -275,7 +278,8 @@ def day(self: Self) -> ExprT: day: [[1,13,1]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.day() + lambda plx: self._expr._to_compliant_expr(plx).dt.day(), + self._expr._is_order_dependent, ) def hour(self: Self) -> ExprT: @@ -343,7 +347,8 @@ def hour(self: Self) -> ExprT: hour: [[1,5,10]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.hour() + lambda plx: self._expr._to_compliant_expr(plx).dt.hour(), + self._expr._is_order_dependent, ) def minute(self: Self) -> ExprT: @@ -411,7 +416,8 @@ def minute(self: Self) -> ExprT: minute: [[1,30,20]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.minute() + lambda plx: self._expr._to_compliant_expr(plx).dt.minute(), + self._expr._is_order_dependent, ) def second(self: Self) -> ExprT: @@ -477,7 +483,8 @@ def second(self: Self) -> ExprT: second: [[1,14,30]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.second() + lambda plx: self._expr._to_compliant_expr(plx).dt.second(), + self._expr._is_order_dependent, ) def millisecond(self: Self) -> ExprT: @@ -543,7 +550,8 @@ def millisecond(self: Self) -> ExprT: millisecond: [[0,505,67]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.millisecond() + lambda plx: self._expr._to_compliant_expr(plx).dt.millisecond(), + self._expr._is_order_dependent, ) def microsecond(self: Self) -> ExprT: @@ -609,7 +617,8 @@ def microsecond(self: Self) -> ExprT: microsecond: [[0,505000,67000]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.microsecond() + lambda plx: self._expr._to_compliant_expr(plx).dt.microsecond(), + self._expr._is_order_dependent, ) def nanosecond(self: Self) -> ExprT: @@ -675,7 +684,8 @@ def nanosecond(self: Self) -> ExprT: nanosecond: [[0,500000000,60000000]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.nanosecond() + lambda plx: self._expr._to_compliant_expr(plx).dt.nanosecond(), + self._expr._is_order_dependent, ) def ordinal_day(self: Self) -> ExprT: @@ -733,7 +743,8 @@ def ordinal_day(self: Self) -> ExprT: a_ordinal_day: [[1,216]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.ordinal_day() + lambda plx: self._expr._to_compliant_expr(plx).dt.ordinal_day(), + self._expr._is_order_dependent, ) def weekday(self: Self) -> ExprT: @@ -789,7 +800,8 @@ def weekday(self: Self) -> ExprT: a_weekday: [[3,1]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.weekday() + lambda plx: self._expr._to_compliant_expr(plx).dt.weekday(), + self._expr._is_order_dependent, ) def total_minutes(self: Self) -> ExprT: @@ -852,7 +864,8 @@ def total_minutes(self: Self) -> ExprT: a_total_minutes: [[10,20]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.total_minutes() + lambda plx: self._expr._to_compliant_expr(plx).dt.total_minutes(), + self._expr._is_order_dependent, ) def total_seconds(self: Self) -> ExprT: @@ -915,7 +928,8 @@ def total_seconds(self: Self) -> ExprT: a_total_seconds: [[10,20]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.total_seconds() + lambda plx: self._expr._to_compliant_expr(plx).dt.total_seconds(), + self._expr._is_order_dependent, ) def total_milliseconds(self: Self) -> ExprT: @@ -983,7 +997,8 @@ def total_milliseconds(self: Self) -> ExprT: a_total_milliseconds: [[10,20]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.total_milliseconds() + lambda plx: self._expr._to_compliant_expr(plx).dt.total_milliseconds(), + self._expr._is_order_dependent, ) def total_microseconds(self: Self) -> ExprT: @@ -1051,7 +1066,8 @@ def total_microseconds(self: Self) -> ExprT: a_total_microseconds: [[10,1200]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.total_microseconds() + lambda plx: self._expr._to_compliant_expr(plx).dt.total_microseconds(), + self._expr._is_order_dependent, ) def total_nanoseconds(self: Self) -> ExprT: @@ -1106,7 +1122,8 @@ def total_nanoseconds(self: Self) -> ExprT: └───────────────────────────────┴──────────────────────────┘ """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.total_nanoseconds() + lambda plx: self._expr._to_compliant_expr(plx).dt.total_nanoseconds(), + self._expr._is_order_dependent, ) def to_string(self: Self, format: str) -> ExprT: # noqa: A002 @@ -1204,7 +1221,8 @@ def to_string(self: Self, format: str) -> ExprT: # noqa: A002 a: [["2020/03/01 00:00:00.000000","2020/04/01 00:00:00.000000","2020/05/01 00:00:00.000000"]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.to_string(format) + lambda plx: self._expr._to_compliant_expr(plx).dt.to_string(format), + self._expr._is_order_dependent, ) def replace_time_zone(self: Self, time_zone: str | None) -> ExprT: @@ -1268,7 +1286,10 @@ def replace_time_zone(self: Self, time_zone: str | None) -> ExprT: a: [[2023-12-31 18:15:00.000000Z,2024-01-01 18:15:00.000000Z]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.replace_time_zone(time_zone) + lambda plx: self._expr._to_compliant_expr(plx).dt.replace_time_zone( + time_zone + ), + self._expr._is_order_dependent, ) def convert_time_zone(self: Self, time_zone: str) -> ExprT: @@ -1338,7 +1359,10 @@ def convert_time_zone(self: Self, time_zone: str) -> ExprT: msg = "Target `time_zone` cannot be `None` in `convert_time_zone`. Please use `replace_time_zone(None)` if you want to remove the time zone." raise TypeError(msg) return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.convert_time_zone(time_zone) + lambda plx: self._expr._to_compliant_expr(plx).dt.convert_time_zone( + time_zone + ), + self._expr._is_order_dependent, ) def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ExprT: @@ -1411,5 +1435,6 @@ def timestamp(self: Self, time_unit: Literal["ns", "us", "ms"] = "us") -> ExprT: ) raise ValueError(msg) return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).dt.timestamp(time_unit) + lambda plx: self._expr._to_compliant_expr(plx).dt.timestamp(time_unit), + self._expr._is_order_dependent, ) diff --git a/narwhals/expr_list.py b/narwhals/expr_list.py index 17efeaf29..c64defda8 100644 --- a/narwhals/expr_list.py +++ b/narwhals/expr_list.py @@ -73,5 +73,6 @@ def len(self: Self) -> ExprT: a_len: [[2,3,null,0]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).list.len() + lambda plx: self._expr._to_compliant_expr(plx).list.len(), + self._expr._is_order_dependent, ) diff --git a/narwhals/expr_name.py b/narwhals/expr_name.py index 312a2bc9c..0d428cea1 100644 --- a/narwhals/expr_name.py +++ b/narwhals/expr_name.py @@ -59,7 +59,8 @@ def keep(self: Self) -> ExprT: ['foo'] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).name.keep() + lambda plx: self._expr._to_compliant_expr(plx).name.keep(), + self._expr._is_order_dependent, ) def map(self: Self, function: Callable[[str], str]) -> ExprT: @@ -108,7 +109,8 @@ def map(self: Self, function: Callable[[str], str]) -> ExprT: ['oof', 'RAB'] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).name.map(function) + lambda plx: self._expr._to_compliant_expr(plx).name.map(function), + self._expr._is_order_dependent, ) def prefix(self: Self, prefix: str) -> ExprT: @@ -156,7 +158,8 @@ def prefix(self: Self, prefix: str) -> ExprT: ['with_prefix_foo', 'with_prefix_BAR'] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).name.prefix(prefix) + lambda plx: self._expr._to_compliant_expr(plx).name.prefix(prefix), + self._expr._is_order_dependent, ) def suffix(self: Self, suffix: str) -> ExprT: @@ -204,7 +207,8 @@ def suffix(self: Self, suffix: str) -> ExprT: ['foo_with_suffix', 'BAR_with_suffix'] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).name.suffix(suffix) + lambda plx: self._expr._to_compliant_expr(plx).name.suffix(suffix), + self._expr._is_order_dependent, ) def to_lowercase(self: Self) -> ExprT: @@ -249,7 +253,8 @@ def to_lowercase(self: Self) -> ExprT: ['foo', 'bar'] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).name.to_lowercase() + lambda plx: self._expr._to_compliant_expr(plx).name.to_lowercase(), + self._expr._is_order_dependent, ) def to_uppercase(self: Self) -> ExprT: @@ -294,5 +299,6 @@ def to_uppercase(self: Self) -> ExprT: ['FOO', 'BAR'] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).name.to_uppercase() + lambda plx: self._expr._to_compliant_expr(plx).name.to_uppercase(), + self._expr._is_order_dependent, ) diff --git a/narwhals/expr_str.py b/narwhals/expr_str.py index 91d355c66..0ea89ceb4 100644 --- a/narwhals/expr_str.py +++ b/narwhals/expr_str.py @@ -76,7 +76,8 @@ def len_chars(self: Self) -> ExprT: words_len: [[3,4,3,2,null]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.len_chars() + lambda plx: self._expr._to_compliant_expr(plx).str.len_chars(), + self._expr._is_order_dependent, ) def replace( @@ -142,7 +143,8 @@ def replace( return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.replace( pattern, value, literal=literal, n=n - ) + ), + self._expr._is_order_dependent, ) def replace_all( @@ -207,7 +209,8 @@ def replace_all( return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.replace_all( pattern, value, literal=literal - ) + ), + self._expr._is_order_dependent, ) def strip_chars(self: Self, characters: str | None = None) -> ExprT: @@ -255,7 +258,8 @@ def strip_chars(self: Self, characters: str | None = None) -> ExprT: {'fruits': ['apple', '\nmango'], 'stripped': ['apple', 'mango']} """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.strip_chars(characters) + lambda plx: self._expr._to_compliant_expr(plx).str.strip_chars(characters), + self._expr._is_order_dependent, ) def starts_with(self: Self, prefix: str) -> ExprT: @@ -317,7 +321,8 @@ def starts_with(self: Self, prefix: str) -> ExprT: has_prefix: [[true,false,null]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.starts_with(prefix) + lambda plx: self._expr._to_compliant_expr(plx).str.starts_with(prefix), + self._expr._is_order_dependent, ) def ends_with(self: Self, suffix: str) -> ExprT: @@ -379,7 +384,8 @@ def ends_with(self: Self, suffix: str) -> ExprT: has_suffix: [[false,true,null]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.ends_with(suffix) + lambda plx: self._expr._to_compliant_expr(plx).str.ends_with(suffix), + self._expr._is_order_dependent, ) def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT: @@ -457,7 +463,8 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT: return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.contains( pattern, literal=literal - ) + ), + self._expr._is_order_dependent, ) def slice(self: Self, offset: int, length: int | None = None) -> ExprT: @@ -559,7 +566,8 @@ def slice(self: Self, offset: int, length: int | None = None) -> ExprT: return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.slice( offset=offset, length=length - ) + ), + self._expr._is_order_dependent, ) def head(self: Self, n: int = 5) -> ExprT: @@ -626,7 +634,8 @@ def head(self: Self, n: int = 5) -> ExprT: lyrics_head: [["Atata","taata","taata","zukky"]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.slice(0, n) + lambda plx: self._expr._to_compliant_expr(plx).str.slice(0, n), + self._expr._is_order_dependent, ) def tail(self: Self, n: int = 5) -> ExprT: @@ -695,7 +704,8 @@ def tail(self: Self, n: int = 5) -> ExprT: return self._expr.__class__( lambda plx: self._expr._to_compliant_expr(plx).str.slice( offset=-n, length=None - ) + ), + self._expr._is_order_dependent, ) def to_datetime(self: Self, format: str | None = None) -> ExprT: # noqa: A002 @@ -764,7 +774,8 @@ def to_datetime(self: Self, format: str | None = None) -> ExprT: # noqa: A002 a: [[2020-01-01 00:00:00.000000,2020-01-02 00:00:00.000000]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.to_datetime(format=format) + lambda plx: self._expr._to_compliant_expr(plx).str.to_datetime(format=format), + self._expr._is_order_dependent, ) def to_uppercase(self: Self) -> ExprT: @@ -828,7 +839,8 @@ def to_uppercase(self: Self) -> ExprT: upper_col: [["APPLE","MANGO",null]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.to_uppercase() + lambda plx: self._expr._to_compliant_expr(plx).str.to_uppercase(), + self._expr._is_order_dependent, ) def to_lowercase(self: Self) -> ExprT: @@ -887,5 +899,6 @@ def to_lowercase(self: Self) -> ExprT: lower_col: [["apple","mango",null]] """ return self._expr.__class__( - lambda plx: self._expr._to_compliant_expr(plx).str.to_lowercase() + lambda plx: self._expr._to_compliant_expr(plx).str.to_lowercase(), + self._expr._is_order_dependent, ) diff --git a/narwhals/functions.py b/narwhals/functions.py index f10a397f7..5071d1bae 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -13,6 +13,7 @@ from typing import overload from narwhals._expression_parsing import extract_compliant +from narwhals._expression_parsing import operation_is_order_dependent from narwhals._pandas_like.utils import broadcast_align_and_extract_native from narwhals.dataframe import DataFrame from narwhals.dataframe import LazyFrame @@ -1382,7 +1383,7 @@ def col(*names: str | Iterable[str]) -> Expr: def func(plx: Any) -> Any: return plx.col(*flatten(names)) - return Expr(func) + return Expr(func, is_order_dependent=False) def nth(*indices: int | Sequence[int]) -> Expr: @@ -1444,7 +1445,7 @@ def nth(*indices: int | Sequence[int]) -> Expr: def func(plx: Any) -> Any: return plx.nth(*flatten(indices)) - return Expr(func) + return Expr(func, is_order_dependent=False) # Add underscore so it doesn't conflict with builtin `all` @@ -1501,7 +1502,7 @@ def all_() -> Expr: a: [[2,4,6]] b: [[8,10,12]] """ - return Expr(lambda plx: plx.all()) + return Expr(lambda plx: plx.all(), is_order_dependent=False) # Add underscore so it doesn't conflict with builtin `len` @@ -1554,7 +1555,7 @@ def len_() -> Expr: def func(plx: Any) -> Any: return plx.len() - return Expr(func) + return Expr(func, is_order_dependent=False) def sum(*columns: str) -> Expr: @@ -1610,7 +1611,7 @@ def sum(*columns: str) -> Expr: ---- a: [[3]] """ - return Expr(lambda plx: plx.col(*columns).sum()) + return Expr(lambda plx: plx.col(*columns).sum(), is_order_dependent=False) def mean(*columns: str) -> Expr: @@ -1666,7 +1667,7 @@ def mean(*columns: str) -> Expr: ---- a: [[4]] """ - return Expr(lambda plx: plx.col(*columns).mean()) + return Expr(lambda plx: plx.col(*columns).mean(), is_order_dependent=False) def median(*columns: str) -> Expr: @@ -1724,7 +1725,7 @@ def median(*columns: str) -> Expr: ---- a: [[4]] """ - return Expr(lambda plx: plx.col(*columns).median()) + return Expr(lambda plx: plx.col(*columns).median(), is_order_dependent=False) def min(*columns: str) -> Expr: @@ -1780,7 +1781,7 @@ def min(*columns: str) -> Expr: ---- b: [[5]] """ - return Expr(lambda plx: plx.col(*columns).min()) + return Expr(lambda plx: plx.col(*columns).min(), is_order_dependent=False) def max(*columns: str) -> Expr: @@ -1836,7 +1837,7 @@ def max(*columns: str) -> Expr: ---- a: [[2]] """ - return Expr(lambda plx: plx.col(*columns).max()) + return Expr(lambda plx: plx.col(*columns).max(), is_order_dependent=False) def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: @@ -1899,10 +1900,10 @@ def sum_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: if not exprs: msg = "At least one expression must be passed to `sum_horizontal`" raise ValueError(msg) + flat_exprs = flatten(exprs) return Expr( - lambda plx: plx.sum_horizontal( - *[extract_compliant(plx, v) for v in flatten(exprs)] - ) + lambda plx: plx.sum_horizontal(*[extract_compliant(plx, v) for v in flat_exprs]), + is_order_dependent=operation_is_order_dependent(*flat_exprs), ) @@ -1969,10 +1970,10 @@ def min_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: if not exprs: msg = "At least one expression must be passed to `min_horizontal`" raise ValueError(msg) + flat_exprs = flatten(exprs) return Expr( - lambda plx: plx.min_horizontal( - *[extract_compliant(plx, v) for v in flatten(exprs)] - ) + lambda plx: plx.min_horizontal(*[extract_compliant(plx, v) for v in flat_exprs]), + is_order_dependent=operation_is_order_dependent(*flat_exprs), ) @@ -2039,10 +2040,10 @@ def max_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: if not exprs: msg = "At least one expression must be passed to `max_horizontal`" raise ValueError(msg) + flat_exprs = flatten(exprs) return Expr( - lambda plx: plx.max_horizontal( - *[extract_compliant(plx, v) for v in flatten(exprs)] - ) + lambda plx: plx.max_horizontal(*[extract_compliant(plx, v) for v in flat_exprs]), + is_order_dependent=operation_is_order_dependent(*flat_exprs), ) @@ -2056,20 +2057,22 @@ def __init__(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> None: def _extract_predicates(self, plx: Any) -> Any: return [extract_compliant(plx, v) for v in self._predicates] - def then(self, value: Any) -> Then: + def then(self, value: IntoExpr | Any) -> Then: return Then( lambda plx: plx.when(*self._extract_predicates(plx)).then( extract_compliant(plx, value) - ) + ), + is_order_dependent=operation_is_order_dependent(*self._predicates, value), ) class Then(Expr): - def otherwise(self, value: Any) -> Expr: + def otherwise(self, value: IntoExpr | Any) -> Expr: return Expr( lambda plx: self._to_compliant_expr(plx).otherwise( extract_compliant(plx, value) - ) + ), + is_order_dependent=operation_is_order_dependent(self, value), ) @@ -2219,10 +2222,10 @@ def all_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: if not exprs: msg = "At least one expression must be passed to `all_horizontal`" raise ValueError(msg) + flat_exprs = flatten(exprs) return Expr( - lambda plx: plx.all_horizontal( - *[extract_compliant(plx, v) for v in flatten(exprs)] - ) + lambda plx: plx.all_horizontal(*[extract_compliant(plx, v) for v in flat_exprs]), + is_order_dependent=operation_is_order_dependent(*flat_exprs), ) @@ -2293,7 +2296,7 @@ def lit(value: Any, dtype: DType | type[DType] | None = None) -> Expr: msg = f"Nested datatypes are not supported yet. Got {value}" raise NotImplementedError(msg) - return Expr(lambda plx: plx.lit(value, dtype)) + return Expr(lambda plx: plx.lit(value, dtype), is_order_dependent=False) def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: @@ -2367,10 +2370,10 @@ def any_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: if not exprs: msg = "At least one expression must be passed to `any_horizontal`" raise ValueError(msg) + flat_exprs = flatten(exprs) return Expr( - lambda plx: plx.any_horizontal( - *[extract_compliant(plx, v) for v in flatten(exprs)] - ) + lambda plx: plx.any_horizontal(*[extract_compliant(plx, v) for v in flat_exprs]), + is_order_dependent=operation_is_order_dependent(*flat_exprs), ) @@ -2437,10 +2440,10 @@ def mean_horizontal(*exprs: IntoExpr | Iterable[IntoExpr]) -> Expr: if not exprs: msg = "At least one expression must be passed to `mean_horizontal`" raise ValueError(msg) + flat_exprs = flatten(exprs) return Expr( - lambda plx: plx.mean_horizontal( - *[extract_compliant(plx, v) for v in flatten(exprs)] - ) + lambda plx: plx.mean_horizontal(*[extract_compliant(plx, v) for v in flat_exprs]), + is_order_dependent=operation_is_order_dependent(*flat_exprs), ) @@ -2522,11 +2525,13 @@ def concat_str( ---- full_sentence: [["2 dogs play","4 cats swim",null]] """ + flat_exprs = flatten([exprs]) return Expr( lambda plx: plx.concat_str( - [extract_compliant(plx, v) for v in flatten([exprs])], + [extract_compliant(plx, v) for v in flat_exprs], *[extract_compliant(plx, v) for v in more_exprs], separator=separator, ignore_nulls=ignore_nulls, - ) + ), + is_order_dependent=operation_is_order_dependent(*flat_exprs, *more_exprs), ) diff --git a/narwhals/selectors.py b/narwhals/selectors.py index 31a5f80e8..664fdc9ca 100644 --- a/narwhals/selectors.py +++ b/narwhals/selectors.py @@ -52,7 +52,9 @@ def by_dtype(*dtypes: Any) -> Expr: │ 4 ┆ 4.6 │ └─────┴─────┘ """ - return Selector(lambda plx: plx.selectors.by_dtype(flatten(dtypes))) + return Selector( + lambda plx: plx.selectors.by_dtype(flatten(dtypes)), is_order_dependent=False + ) def numeric() -> Expr: @@ -95,7 +97,7 @@ def numeric() -> Expr: │ 4 ┆ 4.6 │ └─────┴─────┘ """ - return Selector(lambda plx: plx.selectors.numeric()) + return Selector(lambda plx: plx.selectors.numeric(), is_order_dependent=False) def boolean() -> Expr: @@ -138,7 +140,7 @@ def boolean() -> Expr: │ true │ └───────┘ """ - return Selector(lambda plx: plx.selectors.boolean()) + return Selector(lambda plx: plx.selectors.boolean(), is_order_dependent=False) def string() -> Expr: @@ -181,7 +183,7 @@ def string() -> Expr: │ y │ └─────┘ """ - return Selector(lambda plx: plx.selectors.string()) + return Selector(lambda plx: plx.selectors.string(), is_order_dependent=False) def categorical() -> Expr: @@ -224,7 +226,7 @@ def categorical() -> Expr: │ y │ └─────┘ """ - return Selector(lambda plx: plx.selectors.categorical()) + return Selector(lambda plx: plx.selectors.categorical(), is_order_dependent=False) def all() -> Expr: @@ -267,7 +269,7 @@ def all() -> Expr: │ 2 ┆ y ┆ true │ └─────┴─────┴───────┘ """ - return Selector(lambda plx: plx.selectors.all()) + return Selector(lambda plx: plx.selectors.all(), is_order_dependent=False) __all__ = [ diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index b4b3540ab..941f16377 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -17,6 +17,7 @@ from narwhals import selectors from narwhals.dataframe import DataFrame as NwDataFrame from narwhals.dataframe import LazyFrame as NwLazyFrame +from narwhals.dependencies import get_polars from narwhals.expr import Expr as NwExpr from narwhals.functions import Then as NwThen from narwhals.functions import When as NwWhen @@ -238,6 +239,31 @@ class LazyFrame(NwLazyFrame[IntoFrameT]): def _dataframe(self) -> type[DataFrame[Any]]: return DataFrame + def _extract_compliant(self, arg: Any) -> Any: + # After v1, we raise when passing order-dependent + # expressions to LazyFrame + from narwhals.dataframe import BaseFrame + from narwhals.expr import Expr + from narwhals.series import Series + + if isinstance(arg, BaseFrame): + return arg._compliant_frame + if isinstance(arg, Series): # pragma: no cover + msg = "Mixing Series with LazyFrame is not supported." + raise TypeError(msg) + if isinstance(arg, Expr): + # After stable.v1, we raise if arg._is_order_dependent + return arg._to_compliant_expr(self.__narwhals_namespace__()) + if get_polars() is not None and "polars" in str(type(arg)): # pragma: no cover + msg = ( + f"Expected Narwhals object, got: {type(arg)}.\n\n" + "Perhaps you:\n" + "- Forgot a `nw.from_native` somewhere?\n" + "- Used `pl.col` instead of `nw.col`?" + ) + raise TypeError(msg) + return arg + def collect(self) -> DataFrame[Any]: r"""Materialize this LazyFrame into a DataFrame. @@ -845,7 +871,9 @@ def head(self, n: int = 10) -> Self: Returns: A new expression. """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).head(n)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).head(n), is_order_dependent=True + ) def tail(self, n: int = 10) -> Self: r"""Get the last `n` rows. @@ -856,7 +884,9 @@ def tail(self, n: int = 10) -> Self: Returns: A new expression. """ - return self.__class__(lambda plx: self._to_compliant_expr(plx).tail(n)) + return self.__class__( + lambda plx: self._to_compliant_expr(plx).tail(n), is_order_dependent=True + ) def gather_every(self: Self, n: int, offset: int = 0) -> Self: r"""Take every nth value in the Series and return as new Series. @@ -869,7 +899,8 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self: A new expression. """ return self.__class__( - lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset) + lambda plx: self._to_compliant_expr(plx).gather_every(n=n, offset=offset), + is_order_dependent=True, ) def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: @@ -885,7 +916,8 @@ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Self: return self.__class__( lambda plx: self._to_compliant_expr(plx).sort( descending=descending, nulls_last=nulls_last - ) + ), + is_order_dependent=True, ) def sample( @@ -918,7 +950,8 @@ def sample( return self.__class__( lambda plx: self._to_compliant_expr(plx).sample( n, fraction=fraction, with_replacement=with_replacement, seed=seed - ) + ), + is_order_dependent=True, ) @@ -963,7 +996,7 @@ def _stableify( level=obj._level, ) if isinstance(obj, NwExpr): - return Expr(obj._to_compliant_expr) + return Expr(obj._to_compliant_expr, is_order_dependent=obj._is_order_dependent) return obj @@ -1921,7 +1954,7 @@ def then(self, value: Any) -> Then: class Then(NwThen, Expr): @classmethod def from_then(cls, then: NwThen) -> Self: - return cls(then._to_compliant_expr) + return cls(then._to_compliant_expr, is_order_dependent=then._is_order_dependent) def otherwise(self, value: Any) -> Expr: return _stableify(super().otherwise(value)) diff --git a/tests/expr_and_series/cum_count_test.py b/tests/expr_and_series/cum_count_test.py index dab77ebbc..22a983445 100644 --- a/tests/expr_and_series/cum_count_test.py +++ b/tests/expr_and_series/cum_count_test.py @@ -3,7 +3,6 @@ import pytest import narwhals.stable.v1 as nw -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -16,16 +15,9 @@ @pytest.mark.parametrize("reverse", [True, False]) -def test_cum_count_expr( - request: pytest.FixtureRequest, constructor: Constructor, *, reverse: bool -) -> None: - if "dask" in str(constructor) and reverse: - request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - +def test_cum_count_expr(constructor_eager: ConstructorEager, *, reverse: bool) -> None: name = "reverse_cum_count" if reverse else "cum_count" - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) result = df.select( nw.col("a").cum_count(reverse=reverse).alias(name), ) diff --git a/tests/expr_and_series/cum_max_test.py b/tests/expr_and_series/cum_max_test.py index 3df5a6ad4..c5ec38b42 100644 --- a/tests/expr_and_series/cum_max_test.py +++ b/tests/expr_and_series/cum_max_test.py @@ -5,7 +5,6 @@ import narwhals.stable.v1 as nw from tests.utils import PANDAS_VERSION from tests.utils import PYARROW_VERSION -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -19,23 +18,18 @@ @pytest.mark.parametrize("reverse", [True, False]) def test_cum_max_expr( - request: pytest.FixtureRequest, constructor: Constructor, *, reverse: bool + request: pytest.FixtureRequest, constructor_eager: ConstructorEager, *, reverse: bool ) -> None: - if "dask" in str(constructor) and reverse: - request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - - if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor): + if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( - constructor + constructor_eager ): request.applymarker(pytest.mark.xfail) name = "reverse_cum_max" if reverse else "cum_max" - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) result = df.select( nw.col("a").cum_max(reverse=reverse).alias(name), ) diff --git a/tests/expr_and_series/cum_min_test.py b/tests/expr_and_series/cum_min_test.py index a758dc8b4..137914806 100644 --- a/tests/expr_and_series/cum_min_test.py +++ b/tests/expr_and_series/cum_min_test.py @@ -5,7 +5,6 @@ import narwhals.stable.v1 as nw from tests.utils import PANDAS_VERSION from tests.utils import PYARROW_VERSION -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -19,23 +18,18 @@ @pytest.mark.parametrize("reverse", [True, False]) def test_cum_min_expr( - request: pytest.FixtureRequest, constructor: Constructor, *, reverse: bool + request: pytest.FixtureRequest, constructor_eager: ConstructorEager, *, reverse: bool ) -> None: - if "dask" in str(constructor) and reverse: - request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - - if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor): + if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( - constructor + constructor_eager ): request.applymarker(pytest.mark.xfail) name = "reverse_cum_min" if reverse else "cum_min" - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) result = df.select( nw.col("a").cum_min(reverse=reverse).alias(name), ) diff --git a/tests/expr_and_series/cum_prod_test.py b/tests/expr_and_series/cum_prod_test.py index 2d6861b8d..0257d9284 100644 --- a/tests/expr_and_series/cum_prod_test.py +++ b/tests/expr_and_series/cum_prod_test.py @@ -5,7 +5,6 @@ import narwhals.stable.v1 as nw from tests.utils import PANDAS_VERSION from tests.utils import PYARROW_VERSION -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -19,23 +18,18 @@ @pytest.mark.parametrize("reverse", [True, False]) def test_cum_prod_expr( - request: pytest.FixtureRequest, constructor: Constructor, *, reverse: bool + request: pytest.FixtureRequest, constructor_eager: ConstructorEager, *, reverse: bool ) -> None: - if "dask" in str(constructor) and reverse: - request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - - if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor): + if PYARROW_VERSION < (13, 0, 0) and "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) if (PANDAS_VERSION < (2, 1) or PYARROW_VERSION < (13,)) and "pandas_pyarrow" in str( - constructor + constructor_eager ): request.applymarker(pytest.mark.xfail) name = "reverse_cum_prod" if reverse else "cum_prod" - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) result = df.select( nw.col("a").cum_prod(reverse=reverse).alias(name), ) diff --git a/tests/expr_and_series/cum_sum_test.py b/tests/expr_and_series/cum_sum_test.py index 8a419c9a9..eb1b985b8 100644 --- a/tests/expr_and_series/cum_sum_test.py +++ b/tests/expr_and_series/cum_sum_test.py @@ -3,7 +3,6 @@ import pytest import narwhals.stable.v1 as nw -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -15,16 +14,9 @@ @pytest.mark.parametrize("reverse", [True, False]) -def test_cum_sum_expr( - request: pytest.FixtureRequest, constructor: Constructor, *, reverse: bool -) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - if "dask" in str(constructor) and reverse: - request.applymarker(pytest.mark.xfail) - +def test_cum_sum_expr(constructor_eager: ConstructorEager, *, reverse: bool) -> None: name = "reverse_cum_sum" if reverse else "cum_sum" - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) result = df.select( nw.col("a").cum_sum(reverse=reverse).alias(name), ) diff --git a/tests/expr_and_series/diff_test.py b/tests/expr_and_series/diff_test.py index f7730a2d4..0114ba19a 100644 --- a/tests/expr_and_series/diff_test.py +++ b/tests/expr_and_series/diff_test.py @@ -4,7 +4,6 @@ import narwhals.stable.v1 as nw from tests.utils import PYARROW_VERSION -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -16,15 +15,15 @@ def test_diff( - constructor: Constructor, + constructor_eager: ConstructorEager, request: pytest.FixtureRequest, ) -> None: - if "pyarrow_table_constructor" in str(constructor) and PYARROW_VERSION < (13,): + if "pyarrow_table_constructor" in str(constructor_eager) and PYARROW_VERSION < (13,): # pc.pairwisediff is available since pyarrow 13.0.0 request.applymarker(pytest.mark.xfail) - if any(x in str(constructor) for x in ("duckdb", "pyspark")): + if any(x in str(constructor_eager) for x in ("duckdb", "pyspark")): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) result = df.with_columns(c_diff=nw.col("c").diff()).filter(nw.col("i") > 0) expected = { "i": [1, 2, 3, 4], diff --git a/tests/expr_and_series/is_first_distinct_test.py b/tests/expr_and_series/is_first_distinct_test.py index 6870c3394..21ded6aa8 100644 --- a/tests/expr_and_series/is_first_distinct_test.py +++ b/tests/expr_and_series/is_first_distinct_test.py @@ -1,9 +1,6 @@ from __future__ import annotations -import pytest - import narwhals.stable.v1 as nw -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -13,12 +10,8 @@ } -def test_is_first_distinct_expr( - constructor: Constructor, request: pytest.FixtureRequest -) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data)) +def test_is_first_distinct_expr(constructor_eager: ConstructorEager) -> None: + df = nw.from_native(constructor_eager(data)) result = df.select(nw.all().is_first_distinct()) expected = { "a": [True, False, True, True, False], diff --git a/tests/expr_and_series/is_last_distinct_test.py b/tests/expr_and_series/is_last_distinct_test.py index 9362cd02a..4bfa7e13c 100644 --- a/tests/expr_and_series/is_last_distinct_test.py +++ b/tests/expr_and_series/is_last_distinct_test.py @@ -1,9 +1,6 @@ from __future__ import annotations -import pytest - import narwhals.stable.v1 as nw -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -13,12 +10,8 @@ } -def test_is_last_distinct_expr( - constructor: Constructor, request: pytest.FixtureRequest -) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data)) +def test_is_last_distinct_expr(constructor_eager: ConstructorEager) -> None: + df = nw.from_native(constructor_eager(data)) result = df.select(nw.all().is_last_distinct()) expected = { "a": [False, True, False, True, True], diff --git a/tests/expr_and_series/mode_test.py b/tests/expr_and_series/mode_test.py index a78a442fa..3a9c1eee3 100644 --- a/tests/expr_and_series/mode_test.py +++ b/tests/expr_and_series/mode_test.py @@ -4,7 +4,6 @@ import narwhals.stable.v1 as nw from tests.utils import POLARS_VERSION -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -14,7 +13,7 @@ } -def test_mode_single_expr(constructor_eager: Constructor) -> None: +def test_mode_single_expr(constructor_eager: ConstructorEager) -> None: df = nw.from_native(constructor_eager(data)) result = df.select(nw.col("a").mode()).sort("a") expected = {"a": [1, 2]} diff --git a/tests/expr_and_series/order_dependent_lazy_test.py b/tests/expr_and_series/order_dependent_lazy_test.py new file mode 100644 index 000000000..769f508a5 --- /dev/null +++ b/tests/expr_and_series/order_dependent_lazy_test.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pandas as pd +import pytest + +import narwhals as nw +import narwhals.stable.v1 as nw_v1 +from narwhals.exceptions import OrderDependentExprError +from tests.utils import assert_equal_data + +if TYPE_CHECKING: + from tests.utils import Constructor + + +def test_order_dependent_raises_in_lazy(constructor: Constructor) -> None: + lf = nw.from_native(constructor({"a": [1, 2, 3]})).lazy() + with pytest.raises(OrderDependentExprError, match="Order-dependent expressions"): + lf.select(nw.col("a").diff()) + + +def test_dask_order_dependent_ops() -> None: + # Preserve these for narwhals.stable.v1, even though they + # raise after stable.v1. + pytest.importorskip("dask") + import dask.dataframe as dd + + df = nw_v1.from_native(dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]}))) + result = df.select( + a=nw.col("a").cum_sum(), + b=nw.col("a").cum_count(), + c=nw.col("a").cum_prod(), + d=nw.col("a").cum_max(), + e=nw.col("a").cum_min(), + f=nw.col("a").shift(1), + g=nw.col("a").diff(), + h=nw.col("a").is_first_distinct(), + i=nw.col("a").is_last_distinct(), + ) + expected = { + "a": [1, 3, 6], + "b": [1, 2, 3], + "c": [1, 2, 6], + "d": [1, 2, 3], + "e": [1, 1, 1], + "f": [None, 1.0, 2.0], + "g": [None, 1.0, 1.0], + "h": [True, True, True], + "i": [True, True, True], + } + assert_equal_data(result, expected) diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py index 45b64eba0..57ab4118f 100644 --- a/tests/expr_and_series/over_test.py +++ b/tests/expr_and_series/over_test.py @@ -6,6 +6,7 @@ import narwhals.stable.v1 as nw from tests.utils import PANDAS_VERSION from tests.utils import Constructor +from tests.utils import ConstructorEager from tests.utils import assert_equal_data data = { @@ -68,15 +69,15 @@ def test_over_invalid(request: pytest.FixtureRequest, constructor: Constructor) df.with_columns(c_min=nw.all().min().over("a", "b")) -def test_over_cumsum(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if "pyarrow_table" in str(constructor) or "dask_lazy_p2" in str(constructor): +def test_over_cumsum( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1): - request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if "pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data_cum)) + df = nw.from_native(constructor_eager(data_cum)) expected = { "a": ["a", "a", "b", "b", "b"], "b": [1, 2, None, 5, 3], @@ -89,13 +90,13 @@ def test_over_cumsum(request: pytest.FixtureRequest, constructor: Constructor) - assert_equal_data(result, expected) -def test_over_cumcount(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if "pyarrow_table" in str(constructor) or "dask_lazy_p2" in str(constructor): - request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): +def test_over_cumcount( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data_cum)) + df = nw.from_native(constructor_eager(data_cum)) expected = { "a": ["a", "a", "b", "b", "b"], "b": [1, 2, None, 5, 3], @@ -110,14 +111,14 @@ def test_over_cumcount(request: pytest.FixtureRequest, constructor: Constructor) assert_equal_data(result, expected) -def test_over_cummax(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(x in str(constructor) for x in ("pyarrow_table", "dask_lazy_p2", "duckdb")): - request.applymarker(pytest.mark.xfail) - if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1): +def test_over_cummax( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if any(x in str(constructor_eager) for x in ("pyarrow_table",)): request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if "pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data_cum)) + df = nw.from_native(constructor_eager(data_cum)) expected = { "a": ["a", "a", "b", "b", "b"], "b": [1, 2, None, 5, 3], @@ -129,15 +130,15 @@ def test_over_cummax(request: pytest.FixtureRequest, constructor: Constructor) - assert_equal_data(result, expected) -def test_over_cummin(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if "pyarrow_table" in str(constructor) or "dask_lazy_p2" in str(constructor): +def test_over_cummin( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if "pyarrow_table" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1): - request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if "pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data_cum)) + df = nw.from_native(constructor_eager(data_cum)) expected = { "a": ["a", "a", "b", "b", "b"], "b": [1, 2, None, 5, 3], @@ -150,15 +151,15 @@ def test_over_cummin(request: pytest.FixtureRequest, constructor: Constructor) - assert_equal_data(result, expected) -def test_over_cumprod(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if any(x in str(constructor) for x in ("pyarrow_table", "dask_lazy_p2", "duckdb")): - request.applymarker(pytest.mark.xfail) - if "pandas_pyarrow" in str(constructor) and PANDAS_VERSION < (2, 1): +def test_over_cumprod( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if any(x in str(constructor_eager) for x in ("pyarrow_table",)): request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if "pandas_pyarrow" in str(constructor_eager) and PANDAS_VERSION < (2, 1): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data_cum)) + df = nw.from_native(constructor_eager(data_cum)) expected = { "a": ["a", "a", "b", "b", "b"], "b": [1, 2, None, 5, 3], @@ -179,15 +180,15 @@ def test_over_anonymous() -> None: nw.from_native(df).select(nw.all().cum_max().over("a")) -def test_over_shift(request: pytest.FixtureRequest, constructor: Constructor) -> None: - if "pyarrow_table_constructor" in str( - constructor - ) or "dask_lazy_p2_constructor" in str(constructor): +def test_over_shift( + request: pytest.FixtureRequest, constructor_eager: ConstructorEager +) -> None: + if "pyarrow_table_constructor" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): + if ("pyspark" in str(constructor_eager)) or "duckdb" in str(constructor_eager): request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data)) + df = nw.from_native(constructor_eager(data)) expected = { "a": ["a", "a", "b", "b", "b"], "b": [1, 2, 3, 5, 3], diff --git a/tests/expr_and_series/sample_test.py b/tests/expr_and_series/sample_test.py index eaea6a527..af1f5665f 100644 --- a/tests/expr_and_series/sample_test.py +++ b/tests/expr_and_series/sample_test.py @@ -9,13 +9,15 @@ def test_expr_sample(constructor_eager: ConstructorEager) -> None: - df = nw.from_native(constructor_eager({"a": [1, 2, 3], "b": [4, 5, 6]})).lazy() + df = nw.from_native( + constructor_eager({"a": [1, 2, 3], "b": [4, 5, 6]}), eager_only=True + ) - result_expr = df.select(nw.col("a").sample(n=2)).collect().shape + result_expr = df.select(nw.col("a").sample(n=2)).shape expected_expr = (2, 1) assert result_expr == expected_expr - result_series = df.collect()["a"].sample(n=2).shape + result_series = df["a"].sample(n=2).shape expected_series = (2,) assert result_series == expected_series @@ -39,7 +41,7 @@ def test_expr_sample_fraction(constructor_eager: ConstructorEager) -> None: def test_sample_with_seed(constructor_eager: ConstructorEager) -> None: size, n = 100, 10 - df = nw.from_native(constructor_eager({"a": list(range(size))})).lazy() + df = nw.from_native(constructor_eager({"a": list(range(size))})) expected = {"res1": [True], "res2": [False]} result = df.select( seed1=nw.col("a").sample(n=n, seed=123), @@ -52,7 +54,7 @@ def test_sample_with_seed(constructor_eager: ConstructorEager) -> None: assert_equal_data(result, expected) - series = df.collect()["a"] + series = df["a"] seed1 = series.sample(n=n, seed=123) seed2 = series.sample(n=n, seed=123) seed3 = series.sample(n=n, seed=42) diff --git a/tests/expr_and_series/shift_test.py b/tests/expr_and_series/shift_test.py index 4f7894939..d6f0274f5 100644 --- a/tests/expr_and_series/shift_test.py +++ b/tests/expr_and_series/shift_test.py @@ -1,10 +1,8 @@ from __future__ import annotations import pyarrow as pa -import pytest import narwhals.stable.v1 as nw -from tests.utils import Constructor from tests.utils import ConstructorEager from tests.utils import assert_equal_data @@ -16,10 +14,8 @@ } -def test_shift(constructor: Constructor, request: pytest.FixtureRequest) -> None: - if ("pyspark" in str(constructor)) or "duckdb" in str(constructor): - request.applymarker(pytest.mark.xfail) - df = nw.from_native(constructor(data)) +def test_shift(constructor_eager: ConstructorEager) -> None: + df = nw.from_native(constructor_eager(data)) result = df.with_columns(nw.col("a", "b", "c").shift(2)).filter(nw.col("i") > 1) expected = { "i": [2, 3, 4], diff --git a/tests/frame/join_test.py b/tests/frame/join_test.py index 5ff112f31..4c3c1419b 100644 --- a/tests/frame/join_test.py +++ b/tests/frame/join_test.py @@ -8,6 +8,7 @@ import pandas as pd import pytest +import narwhals as nw_main # use nw_main in some tests for coverage import narwhals.stable.v1 as nw from narwhals.utils import Implementation from tests.utils import DUCKDB_VERSION @@ -23,7 +24,7 @@ def test_inner_join_two_keys(constructor: Constructor) -> None: "zor ro": [7.0, 8, 9], "idx": [0, 1, 2], } - df = nw.from_native(constructor(data)) + df = nw_main.from_native(constructor(data)) df_right = df result = df.join( df_right, # type: ignore[arg-type] diff --git a/tests/frame/sample_test.py b/tests/frame/sample_test.py index ff3591fdd..19cbf0cb3 100644 --- a/tests/frame/sample_test.py +++ b/tests/frame/sample_test.py @@ -5,10 +5,10 @@ import narwhals.stable.v1 as nw if TYPE_CHECKING: - from tests.utils import Constructor + from tests.utils import ConstructorEager -def test_sample_n(constructor_eager: Constructor) -> None: +def test_sample_n(constructor_eager: ConstructorEager) -> None: df = nw.from_native( constructor_eager({"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}), eager_only=True ) @@ -18,7 +18,7 @@ def test_sample_n(constructor_eager: Constructor) -> None: assert result_expr == expected_expr -def test_sample_fraction(constructor_eager: Constructor) -> None: +def test_sample_fraction(constructor_eager: ConstructorEager) -> None: df = nw.from_native( constructor_eager({"a": [1, 2, 3, 4], "b": ["x", "y", "x", "y"]}), eager_only=True ) @@ -28,7 +28,7 @@ def test_sample_fraction(constructor_eager: Constructor) -> None: assert result_expr == expected_expr -def test_sample_with_seed(constructor_eager: Constructor) -> None: +def test_sample_with_seed(constructor_eager: ConstructorEager) -> None: size, n = 100, 10 df = nw.from_native(constructor_eager({"a": range(size)}), eager_only=True) diff --git a/tests/frame/schema_test.py b/tests/frame/schema_test.py index 9de397748..565bf0159 100644 --- a/tests/frame/schema_test.py +++ b/tests/frame/schema_test.py @@ -274,7 +274,6 @@ def test_nested_dtypes_ibis(request: pytest.FixtureRequest) -> None: # pragma: ) def test_nested_dtypes_dask() -> None: pytest.importorskip("dask") - pytest.importorskip("dask_expr", exc_type=ImportError) import dask.dataframe as dd df = dd.from_pandas( diff --git a/tests/group_by_test.py b/tests/group_by_test.py index d446f8003..eeaf18750 100644 --- a/tests/group_by_test.py +++ b/tests/group_by_test.py @@ -38,7 +38,6 @@ def test_group_by_complex() -> None: def test_invalid_group_by_dask() -> None: pytest.importorskip("dask") - pytest.importorskip("dask_expr", exc_type=ImportError) import dask.dataframe as dd df_dask = dd.from_pandas(df_pandas) diff --git a/tests/new_series_test.py b/tests/new_series_test.py index 0d635c853..416972ff8 100644 --- a/tests/new_series_test.py +++ b/tests/new_series_test.py @@ -47,7 +47,6 @@ def test_new_series_v1(constructor_eager: ConstructorEager) -> None: def test_new_series_dask() -> None: pytest.importorskip("dask") - pytest.importorskip("dask_expr", exc_type=ImportError) import dask.dataframe as dd df = nw.from_native(dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]}))) diff --git a/tests/no_imports_test.py b/tests/no_imports_test.py index c89a92567..4f2e75e6f 100644 --- a/tests/no_imports_test.py +++ b/tests/no_imports_test.py @@ -51,7 +51,6 @@ def test_pandas(monkeypatch: pytest.MonkeyPatch) -> None: def test_dask(monkeypatch: pytest.MonkeyPatch) -> None: pytest.importorskip("dask") - pytest.importorskip("dask_expr", exc_type=ImportError) import dask.dataframe as dd monkeypatch.delitem(sys.modules, "polars") diff --git a/tests/series_only/alias_rename_test.py b/tests/series_only/alias_rename_test.py index 87143a574..8b02b3894 100644 --- a/tests/series_only/alias_rename_test.py +++ b/tests/series_only/alias_rename_test.py @@ -1,11 +1,11 @@ from __future__ import annotations import narwhals as nw -from tests.utils import Constructor +from tests.utils import ConstructorEager from tests.utils import assert_equal_data -def test_alias_rename(constructor_eager: Constructor) -> None: +def test_alias_rename(constructor_eager: ConstructorEager) -> None: data = [1, 2, 3] expected = {"bar": data} series = nw.from_native(constructor_eager({"foo": data}), eager_only=True)["foo"] diff --git a/tests/tpch_q1_test.py b/tests/tpch_q1_test.py index cb6d48548..498ed2349 100644 --- a/tests/tpch_q1_test.py +++ b/tests/tpch_q1_test.py @@ -31,7 +31,6 @@ def test_q1(library: str, request: pytest.FixtureRequest) -> None: df_raw = pl.scan_csv("tests/data/lineitem.csv") elif library == "dask": pytest.importorskip("dask") - pytest.importorskip("dask_expr", exc_type=ImportError) import dask.dataframe as dd df_raw = dd.from_pandas( diff --git a/tests/translate/from_native_test.py b/tests/translate/from_native_test.py index a95674456..c99287951 100644 --- a/tests/translate/from_native_test.py +++ b/tests/translate/from_native_test.py @@ -202,7 +202,6 @@ def test_init_already_narwhals_unstable() -> None: def test_series_only_dask() -> None: pytest.importorskip("dask") - pytest.importorskip("dask_expr", exc_type=ImportError) import dask.dataframe as dd dframe = dd.from_pandas(df_pd) @@ -221,7 +220,6 @@ def test_series_only_dask() -> None: ) def test_eager_only_lazy_dask(eager_only: Any, context: Any) -> None: pytest.importorskip("dask") - pytest.importorskip("dask_expr", exc_type=ImportError) import dask.dataframe as dd dframe = dd.from_pandas(df_pd) diff --git a/utils/check_api_reference.py b/utils/check_api_reference.py index 1b1867744..8df16e88b 100644 --- a/utils/check_api_reference.py +++ b/utils/check_api_reference.py @@ -160,7 +160,9 @@ # Expr methods expr_methods = [ - i for i in nw.Expr(lambda: 0).__dir__() if not i[0].isupper() and i[0] != "_" + i + for i in nw.Expr(lambda: 0, is_order_dependent=False).__dir__() + if not i[0].isupper() and i[0] != "_" ] with open("docs/api-reference/expr.md") as fd: content = fd.read() @@ -182,7 +184,9 @@ for namespace in NAMESPACES: expr_methods = [ i - for i in getattr(nw.Expr(lambda: 0), namespace).__dir__() + for i in getattr( + nw.Expr(lambda: 0, is_order_dependent=False), namespace + ).__dir__() if not i[0].isupper() and i[0] != "_" ] with open(f"docs/api-reference/expr_{namespace}.md") as fd: @@ -222,7 +226,11 @@ ret = 1 # Check Expr vs Series -expr = [i for i in nw.Expr(lambda: 0).__dir__() if not i[0].isupper() and i[0] != "_"] +expr = [ + i + for i in nw.Expr(lambda: 0, is_order_dependent=False).__dir__() + if not i[0].isupper() and i[0] != "_" +] series = [ i for i in nw.from_native(pl.Series(), series_only=True).__dir__() @@ -241,7 +249,9 @@ for namespace in NAMESPACES.difference({"name"}): expr_internal = [ i - for i in getattr(nw.Expr(lambda: 0), namespace).__dir__() + for i in getattr( + nw.Expr(lambda: 0, is_order_dependent=False), namespace + ).__dir__() if not i[0].isupper() and i[0] != "_" ] series_internal = [