From 37f6315f52541e39fa35b8a6177d85ebb9c218fa Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sun, 26 Jan 2025 10:35:42 +0000
Subject: [PATCH] loudly raise for over with key overlap

---
 narwhals/_dask/expr.py             | 10 +++++++++-
 tests/expr_and_series/over_test.py | 14 +++++++++++---
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
index 565deca16..25b1cf893 100644
--- a/narwhals/_dask/expr.py
+++ b/narwhals/_dask/expr.py
@@ -661,7 +661,15 @@ def null_count(self: Self) -> Self:
 
     def over(self: Self, keys: list[str]) -> Self:
         def func(df: DaskLazyFrame) -> list[Any]:
-            _, aliases = evaluate_output_names_and_aliases(self, df, [])
+            output_names, aliases = evaluate_output_names_and_aliases(self, df, [])
+            if overlap := set(output_names).intersection(keys):
+                # E.g. `df.select(nw.all().sum().over('a'))`. This is well-defined,
+                # we just don't support it yet.
+                msg = (
+                    f"Column names {overlap} appear in both expression output names and in `over` keys.\n"
+                    "This is not yet supported."
+                )
+                raise NotImplementedError(msg)
             if df._native_frame.npartitions == 1:  # pragma: no cover
                 tmp = df.group_by(*keys, drop_null_keys=False).agg(self)
                 tmp_native = (
diff --git a/tests/expr_and_series/over_test.py b/tests/expr_and_series/over_test.py
index 5473b79df..993326d02 100644
--- a/tests/expr_and_series/over_test.py
+++ b/tests/expr_and_series/over_test.py
@@ -188,11 +188,19 @@ def test_over_anonymous_cumulative(constructor_eager: ConstructorEager) -> None:
         assert_equal_data(result, expected)
 
 
-def test_over_anonymous_reduction(constructor_eager: ConstructorEager) -> None:
-    df = nw.from_native(constructor_eager({"a": [1, 1, 2], "b": [4, 5, 6]}))
+def test_over_anonymous_reduction(
+    constructor: Constructor, request: pytest.FixtureRequest
+) -> None:
+    if "duckdb" in str(constructor) or "pyspark" in str(constructor):
+        # TODO(unassigned): we should be able to support these
+        request.applymarker(pytest.mark.xfail)
+
+    df = nw.from_native(constructor({"a": [1, 1, 2], "b": [4, 5, 6]}))
     context = (
         pytest.raises(NotImplementedError)
-        if df.implementation.is_pyarrow() or df.implementation.is_pandas_like()
+        if df.implementation.is_pyarrow()
+        or df.implementation.is_pandas_like()
+        or df.implementation.is_dask()
         else does_not_raise()
     )
     with context: