narwhals-dev · MarcoGorelli · Jan 28, 2025 · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/narwhals/_duckdb/dataframe.py b/narwhals/_duckdb/dataframe.py
@@ -104,19 +104,27 @@ def select(
         *exprs: DuckDBExpr,
         **named_exprs: DuckDBExpr,
     ) -> Self:
-        new_columns_map = parse_exprs_and_named_exprs(self)(*exprs, **named_exprs)
+        new_columns_map, returns_scalar = parse_exprs_and_named_exprs(self)(
+            *exprs, **named_exprs
+        )
         if not new_columns_map:
             # TODO(marco): return empty relation with 0 columns?
             return self._from_native_frame(self._native_frame.limit(0))
 
-        if all(getattr(x, "_returns_scalar", False) for x in exprs) and all(
-            getattr(x, "_returns_scalar", False) for x in named_exprs.values()
-        ):
+        if all(returns_scalar):
             return self._from_native_frame(
                 self._native_frame.aggregate(
                     [val.alias(col) for col, val in new_columns_map.items()]
                 )
             )
+        if any(returns_scalar):
+            msg = (
+                "Mixing expressions which aggregate and expressions which don't\n"
+                "is not yet supported by the DuckDB backend. Once they introduce\n"
+                "duckdb.WindowExpression to their Python API, we'll be able to\n"
+                "support this."
+            )
+            raise NotImplementedError(msg)
 
         return self._from_native_frame(
             self._native_frame.select(
@@ -139,7 +147,18 @@ def with_columns(
         *exprs: DuckDBExpr,
         **named_exprs: DuckDBExpr,
     ) -> Self:
-        new_columns_map = parse_exprs_and_named_exprs(self)(*exprs, **named_exprs)
+        new_columns_map, returns_scalar = parse_exprs_and_named_exprs(self)(
+            *exprs, **named_exprs
+        )
+
+        if any(returns_scalar):
+            msg = (
+                "Expressions which return scalars are not yet supported in `with_columns`\n"
+                "for the DuckDB backend. Once they introduce duckdb.WindowExpression to \n"
+                "their Python API, we'll be able to support this."
+            )
+            raise NotImplementedError(msg)
+
         result = []
         for col in self._native_frame.columns:
             if col in new_columns_map:

diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py
@@ -126,7 +126,7 @@ def _from_call(
         def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]:
             native_series_list = self._call(df)
             other_native_series = {
-                key: maybe_evaluate(df, value)
+                key: maybe_evaluate(df, value, returns_scalar=returns_scalar)
                 for key, value in expressifiable_args.items()
             }
             return [

diff --git a/narwhals/_duckdb/utils.py b/narwhals/_duckdb/utils.py
@@ -18,7 +18,7 @@
     from narwhals.utils import Version
 
 
-def maybe_evaluate(df: DuckDBLazyFrame, obj: Any) -> Any:
+def maybe_evaluate(df: DuckDBLazyFrame, obj: Any, *, returns_scalar: bool) -> Any:
     from narwhals._duckdb.expr import DuckDBExpr
 
     if isinstance(obj, DuckDBExpr):
@@ -27,20 +27,30 @@ def maybe_evaluate(df: DuckDBLazyFrame, obj: Any) -> Any:
             msg = "Multi-output expressions (e.g. `nw.all()` or `nw.col('a', 'b')`) not supported in this context"
             raise NotImplementedError(msg)
         column_result = column_results[0]
-        if obj._returns_scalar:
-            msg = "Reductions are not yet supported for DuckDB, at least until they implement duckdb.WindowExpression"
+        if obj._returns_scalar and not returns_scalar:
+            # Returns scalar, but overall expression doesn't.
+            # Not yet supported.
+            msg = (
+                "Mixing expressions which aggregate and expressions which don't\n"
+                "is not yet supported by the DuckDB backend. Once they introduce\n"
+                "duckdb.WindowExpression to their Python API, we'll be able to\n"
+                "support this."
+            )
             raise NotImplementedError(msg)
         return column_result
     return duckdb.ConstantExpression(obj)
 
 
 def parse_exprs_and_named_exprs(
     df: DuckDBLazyFrame,
-) -> Callable[..., dict[str, duckdb.Expression]]:
+) -> Callable[..., tuple[dict[str, duckdb.Expression], list[bool]]]:
     def func(
         *exprs: DuckDBExpr, **named_exprs: DuckDBExpr
-    ) -> dict[str, duckdb.Expression]:
+    ) -> tuple[dict[str, duckdb.Expression], list[bool]]:
         native_results: dict[str, list[duckdb.Expression]] = {}
+
+        # `returns_scalar` keeps track if an expression returns a scalar.
+        returns_scalar: list[bool] = []
         for expr in exprs:
             native_series_list = expr._call(df)
             output_names = expr._evaluate_output_names(df)
@@ -50,13 +60,15 @@ def func(
                 msg = f"Internal error: got output names {output_names}, but only got {len(native_series_list)} results"
                 raise AssertionError(msg)
             native_results.update(zip(output_names, native_series_list))
+            returns_scalar.extend([expr._returns_scalar] * len(output_names))
         for col_alias, expr in named_exprs.items():
             native_series_list = expr._call(df)
             if len(native_series_list) != 1:  # pragma: no cover
                 msg = "Named expressions must return a single column"
                 raise ValueError(msg)
             native_results[col_alias] = native_series_list[0]
-        return native_results
+            returns_scalar.append(expr._returns_scalar)
+        return native_results, returns_scalar
 
     return func
 

diff --git a/narwhals/utils.py b/narwhals/utils.py
@@ -1035,11 +1035,11 @@ def generate_repr(header: str, native_repr: str) -> str:
     try:
         terminal_width = os.get_terminal_size().columns
     except OSError:
-        terminal_width = 80
+        terminal_width = int(os.getenv("COLUMNS", 80))  # noqa: PLW1508
     native_lines = native_repr.splitlines()
     max_native_width = max(len(line) for line in native_lines)
 
-    if max_native_width + 2 < terminal_width:
+    if max_native_width + 2 <= terminal_width:
         length = max(max_native_width, len(header))
         output = f"┌{'─'*length}┐\n"
         header_extra = length - len(header)

diff --git a/tests/expr_and_series/lit_test.py b/tests/expr_and_series/lit_test.py
@@ -95,8 +95,6 @@ def test_lit_operation_in_select(
     if "duckdb" in str(constructor) and col_name in (
         "left_scalar_with_agg",
         "left_lit_with_agg",
-        "right_lit",
-        "right_lit_with_agg",
     ):
         request.applymarker(pytest.mark.xfail)
     if (
@@ -126,10 +124,7 @@ def test_lit_operation_in_with_columns(
     col_name: str,
     expr: nw.Expr,
     expected_result: list[int],
-    request: pytest.FixtureRequest,
 ) -> None:
-    if "duckdb" in str(constructor) and col_name == "scalar_and_lit":
-        request.applymarker(pytest.mark.xfail)
     data = {"a": [1, 3, 2]}
     df_raw = constructor(data)
     df = nw.from_native(df_raw).lazy()

diff --git a/tpch/execute.py b/tpch/execute.py
@@ -40,7 +40,7 @@
     "dask": lambda x: x.compute(),
 }
 
-DUCKDB_SKIPS = ["q14", "q15"]
+DUCKDB_SKIPS = ["q15"]
 
 QUERY_DATA_PATH_MAP = {
     "q1": (LINEITEM_PATH,),