From 41e13c6547ae77c6cd75048a6e83d57e8f6ffdaa Mon Sep 17 00:00:00 2001 From: Stijn Date: Mon, 25 Nov 2024 15:49:48 +0100 Subject: [PATCH] fix: Fix lazy frame join expression (#19974) --- .../optimizer/projection_pushdown/joins.rs | 18 ++++++++---------- py-polars/tests/unit/operations/test_join.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/joins.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/joins.rs index f8ee048c2464..4fe80a8b92c5 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/joins.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/joins.rs @@ -19,16 +19,14 @@ fn add_keys_to_accumulated_state( // that means we don't want to execute the projection as that is already done by // the JOIN executor if add_local { - // take the left most name as output name - let mut iter = aexpr_to_leaf_names_iter(expr, expr_arena); - if let Some(name) = iter.next() { - drop(iter); - let node = expr_arena.add(AExpr::Column(name.clone())); + // return the left most name as output name + let names = aexpr_to_leaf_names_iter(expr, expr_arena).collect::>(); + let output_name = names.first().cloned(); + for name in names { + let node = expr_arena.add(AExpr::Column(name)); local_projection.push(ColumnNode(node)); - Some(name) - } else { - None } + output_name } else { None } @@ -43,7 +41,7 @@ pub(super) fn process_asof_join( right_on: Vec, options: Arc, acc_projections: Vec, - _projected_names: PlHashSet, + projected_names: PlHashSet, projections_seen: usize, lp_arena: &mut Arena, expr_arena: &mut Arena, @@ -76,7 +74,7 @@ pub(super) fn process_asof_join( // make sure that the asof join 'by' columns are projected if let (Some(left_by), Some(right_by)) = (&asof_options.left_by, &asof_options.right_by) { for name in left_by { - let add = _projected_names.contains(name.as_str()); + let add = projected_names.contains(name.as_str()); let node = expr_arena.add(AExpr::Column(name.clone())); add_keys_to_accumulated_state( diff --git a/py-polars/tests/unit/operations/test_join.py b/py-polars/tests/unit/operations/test_join.py index b32a7633e749..27cba18e18d5 100644 --- a/py-polars/tests/unit/operations/test_join.py +++ b/py-polars/tests/unit/operations/test_join.py @@ -158,6 +158,24 @@ def test_join_on_expressions() -> None: ).to_dict(as_series=False) == {"a": [1, 2, 3, 3], "b": [1, 4, 9, 9]} +def test_join_lazy_frame_on_expression() -> None: + # Tests a lazy frame projection pushdown bug + # https://github.com/pola-rs/polars/issues/19822 + + df = pl.DataFrame(data={"a": [0, 1], "b": [2, 3]}) + + lazy_join = ( + df.lazy() + .join(df.lazy(), left_on=pl.coalesce("b", "a"), right_on="a") + .select("a") + .collect() + ) + + eager_join = df.join(df, left_on=pl.coalesce("b", "a"), right_on="a").select("a") + + assert lazy_join.shape == eager_join.shape + + def test_join() -> None: df_left = pl.DataFrame( {