Skip to content

Commit

Permalink
fix: Ensure Function name correctness in cse (#20929)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jan 27, 2025
1 parent 176268e commit 6a3fc59
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 5 deletions.
43 changes: 39 additions & 4 deletions crates/polars-plan/src/plans/optimizer/cse/cse_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,6 @@ fn skip_pre_visit(ae: &AExpr, is_groupby: bool) -> bool {
match ae {
AExpr::Window { .. } => true,
#[cfg(feature = "dtype-struct")]
AExpr::Function {
function: FunctionExpr::AsStruct,
..
} => true,
AExpr::Ternary { .. } => is_groupby,
_ => false,
}
Expand Down Expand Up @@ -724,6 +720,45 @@ impl CommonSubExprOptimizer {
} else {
out_e.set_node(out_node);

// Ensure the function ExprIR's have the proper names.
let mut scratch = vec![];
let mut stack = vec![(e.node(), out_node)];
while let Some((original, new)) = stack.pop() {
let aes = expr_arena.get_many_mut([original, new]);

aes[0].inputs_rev(&mut scratch);
aes[1].inputs_rev(&mut scratch);

for i in 0..scratch.len() / 2 {
stack.push((scratch[i], scratch[i + 1]));
}
scratch.clear();

match expr_arena.get_many_mut([original, new]) {
[AExpr::Function {
input: input_original,
..
}, AExpr::Function {
input: input_new, ..
}] => {
for (new, original) in input_new.iter_mut().zip(input_original) {
new.set_alias(original.output_name().clone());
}
},
[AExpr::AnonymousFunction {
input: input_original,
..
}, AExpr::AnonymousFunction {
input: input_new, ..
}] => {
for (new, original) in input_new.iter_mut().zip(input_original) {
new.set_alias(original.output_name().clone());
}
},
_ => {},
}
}

// If we don't end with an alias we add an alias. Because the normal left-hand
// rule we apply for determining the name will not work we now refer to
// intermediate temporary names starting with the `CSE_REPLACED` constant.
Expand Down
1 change: 1 addition & 0 deletions crates/polars-plan/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ pub fn aexpr_output_name(node: Node, arena: &Arena<AExpr>) -> PolarsResult<PlSma
AExpr::Alias(_, name) => return Ok(name.clone()),
AExpr::Len => return Ok(get_len_name()),
AExpr::Literal(val) => return Ok(val.output_column_name().clone()),
AExpr::Ternary { truthy, .. } => return aexpr_output_name(*truthy, arena),
_ => {},
}
}
Expand Down
12 changes: 11 additions & 1 deletion py-polars/tests/unit/test_cse.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,7 @@ def test_eager_cse_during_struct_expansion_18411() -> None:
)["foo"].all()


def test_cse_skip_as_struct_19253() -> None:
def test_cse_as_struct_19253() -> None:
df = pl.LazyFrame({"x": [1, 2], "y": [4, 5]})

assert (
Expand All @@ -807,6 +807,16 @@ def test_cse_skip_as_struct_19253() -> None:
}


@pytest.mark.may_fail_auto_streaming
def test_cse_as_struct_value_counts_20927() -> None:
assert pl.DataFrame({"x": [i for i in range(1, 6) for _ in range(i)]}).select(
pl.struct("x").value_counts().struct.unnest()
).sort("count").to_dict(as_series=False) == {
"x": [{"x": 1}, {"x": 2}, {"x": 3}, {"x": 4}, {"x": 5}],
"count": [1, 2, 3, 4, 5],
}


def test_cse_union_19227() -> None:
lf = pl.LazyFrame({"A": [1], "B": [2]})
lf_1 = lf.select(C="A", B="B")
Expand Down

0 comments on commit 6a3fc59

Please sign in to comment.