From 37f6315f52541e39fa35b8a6177d85ebb9c218fa Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 26 Jan 2025 10:35:42 +0000 Subject: [PATCH] loudly raise for over with key overlap --- narwhals/_dask/expr.py | 10 +++++++++- tests/expr_and_series/over_test.py | 14 +++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py index 565deca16..25b1cf893 100644 --- a/narwhals/_dask/expr.py +++ b/narwhals/_dask/expr.py @@ -661,7 +661,15 @@ def null_count(self: Self) -> Self: def over(self: Self, keys: list[str]) -> Self: def func(df: DaskLazyFrame) -> list[Any]: - _, aliases = evaluate_output_names_and_aliases(self, df, []) + output_names, aliases = evaluate_output_names_and_aliases(self, df, []) + if overlap := set(output_names).intersection(keys): + # E.g. `df.select(nw.all().sum().over('a'))`. This is well-defined, + # we just don't support it yet. + msg = ( + f"Column names {overlap} appear in both expression output names and in `over` keys.\n" + "This is not yet supported." + ) + raise NotImplementedError(msg) if df._native_frame.npartitions == 1: # pragma: no cover tmp = df.group_by(*keys, drop_null_keys=False).agg(self) tmp_native = ( diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py index 5473b79df..993326d02 100644 --- a/tests/expr_and_series/over_test.py +++ b/tests/expr_and_series/over_test.py @@ -188,11 +188,19 @@ def test_over_anonymous_cumulative(constructor_eager: ConstructorEager) -> None: assert_equal_data(result, expected) -def test_over_anonymous_reduction(constructor_eager: ConstructorEager) -> None: - df = nw.from_native(constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]})) +def test_over_anonymous_reduction( + constructor: Constructor, request: pytest.FixtureRequest +) -> None: + if "duckdb" in str(constructor) or "pyspark" in str(constructor): + # TODO(unassigned): we should be able to support these + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor({"a": [1, 1, 2], "b": [4, 5, 6]})) context = ( pytest.raises(NotImplementedError) - if df.implementation.is_pyarrow() or df.implementation.is_pandas_like() + if df.implementation.is_pyarrow() + or df.implementation.is_pandas_like() + or df.implementation.is_dask() else does_not_raise() ) with context: