diff --git a/datafusion/core/benches/dataframe.rs b/datafusion/core/benches/dataframe.rs index 087764883a33..03078e05e105 100644 --- a/datafusion/core/benches/dataframe.rs +++ b/datafusion/core/benches/dataframe.rs @@ -56,8 +56,7 @@ fn run(column_count: u32, ctx: Arc) { data_frame = data_frame .with_column_renamed(field_name, new_field_name) - .unwrap(); - data_frame = data_frame + .unwrap() .with_column(new_field_name, btrim(vec![col(new_field_name)])) .unwrap(); } @@ -68,8 +67,7 @@ fn run(column_count: u32, ctx: Arc) { } fn criterion_benchmark(c: &mut Criterion) { - // 500 takes far too long right now - for column_count in [10, 100, 200 /* 500 */] { + for column_count in [10, 100, 200, 500] { let ctx = create_context(column_count).unwrap(); c.bench_function(&format!("with_column_{column_count}"), |b| { diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 0f77217f86ef..debbf96d3566 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1700,7 +1700,7 @@ impl DataFrame { .await } - /// Add an additional column to the DataFrame. + /// Add or replace a column in the DataFrame. /// /// # Example /// ``` @@ -1728,6 +1728,14 @@ impl DataFrame { let mut col_exists = false; let new_column = expr.alias(name); + // if the existing plan is a projection we can skip validation + // this is only really true as long as only the dataframe api + // is used. If the logical plan is built first then .with_column + // is called that wouldn't be a valid assumption + let existing_requires_validation = match plan { + LogicalPlan::Projection(_) => false, + _ => true, + }; let mut fields: Vec<(Expr, bool)> = plan .schema() .iter() @@ -1741,7 +1749,7 @@ impl DataFrame { .as_ref() .filter(|s| *s == &e.to_string()) .is_none() - .then_some((e, false)) + .then_some((e, existing_requires_validation)) } }) .collect(); @@ -1813,9 +1821,11 @@ impl DataFrame { .iter() .map(|(qualifier, field)| { if qualifier.eq(&qualifier_rename) && field.as_ref() == field_rename { - (col(Column::from((qualifier, field))) - .alias_qualified(qualifier.cloned(), new_name), - false) + ( + col(Column::from((qualifier, field))) + .alias_qualified(qualifier.cloned(), new_name), + false, + ) } else { (col(Column::from((qualifier, field))), false) } diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 2d2c471bd1ea..10c5a2c24908 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -499,6 +499,7 @@ impl LogicalPlanBuilder { } /// Apply a projection without alias with optional validation + /// (true to validate, false to not validate) pub fn project_with_validation( self, expr: Vec<(impl Into, bool)>, @@ -1626,7 +1627,7 @@ pub fn project( /// Create Projection. Similar to project except that the expressions /// passed in have a flag to indicate if that expression requires -/// validation (normalize & columnize) or not +/// validation (normalize & columnize) (true) or not (false) /// # Errors /// This function errors under any of the following conditions: /// * Two or more expressions have the same name