From ce060294b2e14f95f848be95e1ddb95ca6e2aa33 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 4 Oct 2022 07:13:50 -0400 Subject: [PATCH 1/2] Fix `DataFrame::with_column` to handle creating column names with a period --- datafusion/core/src/dataframe.rs | 48 ++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs index e683455b30dc..f0373d8463aa 100644 --- a/datafusion/core/src/dataframe.rs +++ b/datafusion/core/src/dataframe.rs @@ -38,6 +38,7 @@ use crate::physical_plan::{execute_stream, execute_stream_partitioned, Execution use crate::prelude::SessionContext; use crate::scalar::ScalarValue; use async_trait::async_trait; +use datafusion_common::Column; use parking_lot::RwLock; use parquet::file::properties::WriterProperties; use std::any::Any; @@ -672,7 +673,10 @@ impl DataFrame { col_exists = true; new_column.clone() } else { - col(f.name()) + Expr::Column(Column { + relation: None, + name: f.name().into(), + }) } }) .collect(); @@ -827,6 +831,7 @@ mod tests { use crate::physical_plan::ColumnarValue; use crate::test_util; use crate::{assert_batches_sorted_eq, execution::context::SessionContext}; + use arrow::array::Int32Array; use arrow::datatypes::DataType; use datafusion_expr::{ avg, cast, count, count_distinct, create_udf, lit, max, min, sum, @@ -1365,9 +1370,9 @@ mod tests { ctx.register_batch("test", data)?; let sql = r#" - SELECT + SELECT COUNT(1) - FROM + FROM test GROUP BY column_1"#; @@ -1378,6 +1383,43 @@ mod tests { Ok(()) } + #[tokio::test] + async fn with_column_name() -> Result<()> { + // define data with a column name that has a "." in it: + let array: Int32Array = [1, 10].into_iter().collect(); + let batch = + RecordBatch::try_from_iter(vec![("f.c1", Arc::new(array) as _)]).unwrap(); + + let ctx = SessionContext::new(); + ctx.register_batch("t", batch).unwrap(); + + let df = ctx + .table("t") + .unwrap() + // try and create a column with a '.' in it + .with_column("f.c2", lit("hello")) + .unwrap(); + // Note trying to select causes an error (todo file a separate ticket) + //.select_columns(&["f.c1", "f.c2"]) + //.unwrap(); + + let df_results = df.collect().await.unwrap(); + + assert_batches_sorted_eq!( + vec![ + "+------+-------+", + "| f.c1 | f.c2 |", + "+------+-------+", + "| 1 | hello |", + "| 10 | hello |", + "+------+-------+", + ], + &df_results + ); + + Ok(()) + } + #[tokio::test] async fn cache_test() -> Result<()> { let df = test_table() From e48b14d8b087c7ad88f33151eb056a2d91e85b68 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 6 Oct 2022 09:12:16 -0400 Subject: [PATCH 2/2] Remove old comments --- datafusion/core/src/dataframe.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/datafusion/core/src/dataframe.rs b/datafusion/core/src/dataframe.rs index c45fff44fb95..cced61eb2721 100644 --- a/datafusion/core/src/dataframe.rs +++ b/datafusion/core/src/dataframe.rs @@ -1399,9 +1399,6 @@ mod tests { // try and create a column with a '.' in it .with_column("f.c2", lit("hello")) .unwrap(); - // Note trying to select causes an error (todo file a separate ticket) - //.select_columns(&["f.c1", "f.c2"]) - //.unwrap(); let df_results = df.collect().await.unwrap();