Skip to content

Commit

Permalink
fix: Fix lazy frame join expression (#19974)
Browse files Browse the repository at this point in the history
  • Loading branch information
stijnherfst authored Nov 25, 2024
1 parent ac9d3c6 commit 41e13c6
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,14 @@ fn add_keys_to_accumulated_state(
// that means we don't want to execute the projection as that is already done by
// the JOIN executor
if add_local {
// take the left most name as output name
let mut iter = aexpr_to_leaf_names_iter(expr, expr_arena);
if let Some(name) = iter.next() {
drop(iter);
let node = expr_arena.add(AExpr::Column(name.clone()));
// return the left most name as output name
let names = aexpr_to_leaf_names_iter(expr, expr_arena).collect::<Vec<_>>();
let output_name = names.first().cloned();
for name in names {
let node = expr_arena.add(AExpr::Column(name));
local_projection.push(ColumnNode(node));
Some(name)
} else {
None
}
output_name
} else {
None
}
Expand All @@ -43,7 +41,7 @@ pub(super) fn process_asof_join(
right_on: Vec<ExprIR>,
options: Arc<JoinOptions>,
acc_projections: Vec<ColumnNode>,
_projected_names: PlHashSet<PlSmallStr>,
projected_names: PlHashSet<PlSmallStr>,
projections_seen: usize,
lp_arena: &mut Arena<IR>,
expr_arena: &mut Arena<AExpr>,
Expand Down Expand Up @@ -76,7 +74,7 @@ pub(super) fn process_asof_join(
// make sure that the asof join 'by' columns are projected
if let (Some(left_by), Some(right_by)) = (&asof_options.left_by, &asof_options.right_by) {
for name in left_by {
let add = _projected_names.contains(name.as_str());
let add = projected_names.contains(name.as_str());

let node = expr_arena.add(AExpr::Column(name.clone()));
add_keys_to_accumulated_state(
Expand Down
18 changes: 18 additions & 0 deletions py-polars/tests/unit/operations/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,24 @@ def test_join_on_expressions() -> None:
).to_dict(as_series=False) == {"a": [1, 2, 3, 3], "b": [1, 4, 9, 9]}


def test_join_lazy_frame_on_expression() -> None:
# Tests a lazy frame projection pushdown bug
# https://github.com/pola-rs/polars/issues/19822

df = pl.DataFrame(data={"a": [0, 1], "b": [2, 3]})

lazy_join = (
df.lazy()
.join(df.lazy(), left_on=pl.coalesce("b", "a"), right_on="a")
.select("a")
.collect()
)

eager_join = df.join(df, left_on=pl.coalesce("b", "a"), right_on="a").select("a")

assert lazy_join.shape == eager_join.shape


def test_join() -> None:
df_left = pl.DataFrame(
{
Expand Down

0 comments on commit 41e13c6

Please sign in to comment.