Skip to content

Commit 34a1e95

Browse files
DandandanDaniël Heres
and
Daniël Heres
authored
Update DataFusion to 26 (apache#798) (#83)
* Enable all tests * Adapt * Update DataFusion to 26 * Add physical_round_trip test * Fmt * Add cfg again * Do not enable q15 just yet * fmt * Fix * Fix * Fix * Fix * Fix * Schema fix * Undo some --------- Co-authored-by: Daniël Heres <[email protected]>
1 parent bb0dece commit 34a1e95

File tree

7 files changed

+129
-102
lines changed

7 files changed

+129
-102
lines changed

Cargo.toml

+7-6
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,16 @@ exclude = ["python"]
2020
members = ["ballista-cli", "ballista/client", "ballista/core", "ballista/executor", "ballista/scheduler", "ballista/tests", "benchmarks", "examples"]
2121

2222
[workspace.dependencies]
23-
arrow = { version = "39.0.0" }
24-
arrow-flight = { version = "39.0.0", features = ["flight-sql-experimental"] }
23+
arrow = { version = "40.0.0" }
24+
arrow-flight = { version = "40.0.0", features = ["flight-sql-experimental"] }
25+
arrow-schema = { version = "40.0.0", default-features = false }
2526
configure_me = { version = "0.4.0" }
2627
configure_me_codegen = { version = "0.4.4" }
27-
datafusion = "25.0.0"
28-
datafusion-cli = "25.0.0"
29-
datafusion-proto = "25.0.0"
28+
datafusion = "26.0.0"
29+
datafusion-cli = "26.0.0"
30+
datafusion-proto = "26.0.0"
3031
object_store = "0.5.6"
31-
sqlparser = "0.33.0"
32+
sqlparser = "0.34.0"
3233
tonic = { version = "0.9" }
3334
tonic-build = { version = "0.9", default-features = false, features = ["transport", "prost"] }
3435

ballista-cli/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dirs = "4.0.0"
3939
env_logger = "0.10"
4040
mimalloc = { version = "0.1", default-features = false }
4141
num_cpus = "1.13.0"
42-
rustyline = "10.0"
42+
rustyline = "11.0"
4343
tokio = { version = "1.0", features = [
4444
"macros",
4545
"rt",

ballista-cli/src/exec.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ pub async fn exec_from_files(
9090

9191
/// run and execute SQL statements and commands against a context with the given print options
9292
pub async fn exec_from_repl(ctx: &BallistaContext, print_options: &mut PrintOptions) {
93-
let mut rl = Editor::<CliHelper>::new().expect("created editor");
93+
let mut rl = Editor::new().expect("created editor");
9494
rl.set_helper(Some(CliHelper::default()));
9595
rl.load_history(".history").ok();
9696

@@ -99,7 +99,7 @@ pub async fn exec_from_repl(ctx: &BallistaContext, print_options: &mut PrintOpti
9999
loop {
100100
match rl.readline("❯ ") {
101101
Ok(line) if line.starts_with('\\') => {
102-
rl.add_history_entry(line.trim_end());
102+
rl.add_history_entry(line.trim_end()).unwrap();
103103
let command = line.split_whitespace().collect::<Vec<_>>().join(" ");
104104
if let Ok(cmd) = &command[1..].parse::<Command>() {
105105
match cmd {
@@ -133,7 +133,7 @@ pub async fn exec_from_repl(ctx: &BallistaContext, print_options: &mut PrintOpti
133133
}
134134
}
135135
Ok(line) => {
136-
rl.add_history_entry(line.trim_end());
136+
rl.add_history_entry(line.trim_end()).unwrap();
137137
match exec_and_print(ctx, &print_options, line).await {
138138
Ok(_) => {}
139139
Err(err) => eprintln!("{err:?}"),

ballista/client/src/context.rs

+22-22
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ mod tests {
617617
table_partition_cols: x.table_partition_cols.clone(),
618618
collect_stat: x.collect_stat,
619619
target_partitions: x.target_partitions,
620-
file_sort_order: None,
620+
file_sort_order: vec![],
621621
infinite_source: false,
622622
};
623623

@@ -814,11 +814,11 @@ mod tests {
814814
.unwrap();
815815
let res = df.collect().await.unwrap();
816816
let expected = vec![
817-
"+-------------------------+",
818-
"| APPROXDISTINCT(test.id) |",
819-
"+-------------------------+",
820-
"| 8 |",
821-
"+-------------------------+",
817+
"+--------------------------+",
818+
"| APPROX_DISTINCT(test.id) |",
819+
"+--------------------------+",
820+
"| 8 |",
821+
"+--------------------------+",
822822
];
823823
assert_result_eq(expected, &res);
824824

@@ -829,7 +829,7 @@ mod tests {
829829
let res = df.collect().await.unwrap();
830830
let expected = vec![
831831
"+--------------------------+",
832-
"| ARRAYAGG(test.id) |",
832+
"| ARRAY_AGG(test.id) |",
833833
"+--------------------------+",
834834
"| [4, 5, 6, 7, 2, 3, 0, 1] |",
835835
"+--------------------------+",
@@ -853,11 +853,11 @@ mod tests {
853853
.unwrap();
854854
let res = df.collect().await.unwrap();
855855
let expected = vec![
856-
"+----------------------+",
857-
"| VARIANCEPOP(test.id) |",
858-
"+----------------------+",
859-
"| 5.250000000000001 |",
860-
"+----------------------+",
856+
"+-----------------------+",
857+
"| VARIANCE_POP(test.id) |",
858+
"+-----------------------+",
859+
"| 5.250000000000001 |",
860+
"+-----------------------+",
861861
];
862862
assert_result_eq(expected, &res);
863863

@@ -937,11 +937,11 @@ mod tests {
937937
.unwrap();
938938
let res = df.collect().await.unwrap();
939939
let expected = vec![
940-
"+---------------------------------------------------------------+",
941-
"| APPROXPERCENTILECONTWITHWEIGHT(test.id,Int64(2),Float64(0.5)) |",
942-
"+---------------------------------------------------------------+",
943-
"| 1 |",
944-
"+---------------------------------------------------------------+",
940+
"+-------------------------------------------------------------------+",
941+
"| APPROX_PERCENTILE_CONT_WITH_WEIGHT(test.id,Int64(2),Float64(0.5)) |",
942+
"+-------------------------------------------------------------------+",
943+
"| 1 |",
944+
"+-------------------------------------------------------------------+",
945945
];
946946
assert_result_eq(expected, &res);
947947

@@ -951,11 +951,11 @@ mod tests {
951951
.unwrap();
952952
let res = df.collect().await.unwrap();
953953
let expected = vec![
954-
"+----------------------------------------------------+",
955-
"| APPROXPERCENTILECONT(test.double_col,Float64(0.5)) |",
956-
"+----------------------------------------------------+",
957-
"| 7.574999999999999 |",
958-
"+----------------------------------------------------+",
954+
"+------------------------------------------------------+",
955+
"| APPROX_PERCENTILE_CONT(test.double_col,Float64(0.5)) |",
956+
"+------------------------------------------------------+",
957+
"| 7.574999999999999 |",
958+
"+------------------------------------------------------+",
959959
];
960960

961961
assert_result_eq(expected, &res);

ballista/scheduler/src/test_utils.rs

+24-24
Original file line numberDiff line numberDiff line change
@@ -154,51 +154,51 @@ pub fn get_tpch_schema(table: &str) -> Schema {
154154

155155
match table {
156156
"part" => Schema::new(vec![
157-
Field::new("p_partkey", DataType::Int32, false),
157+
Field::new("p_partkey", DataType::Int64, false),
158158
Field::new("p_name", DataType::Utf8, false),
159159
Field::new("p_mfgr", DataType::Utf8, false),
160160
Field::new("p_brand", DataType::Utf8, false),
161161
Field::new("p_type", DataType::Utf8, false),
162162
Field::new("p_size", DataType::Int32, false),
163163
Field::new("p_container", DataType::Utf8, false),
164-
Field::new("p_retailprice", DataType::Float64, false),
164+
Field::new("p_retailprice", DataType::Decimal128(15, 2), false),
165165
Field::new("p_comment", DataType::Utf8, false),
166166
]),
167167

168168
"supplier" => Schema::new(vec![
169-
Field::new("s_suppkey", DataType::Int32, false),
169+
Field::new("s_suppkey", DataType::Int64, false),
170170
Field::new("s_name", DataType::Utf8, false),
171171
Field::new("s_address", DataType::Utf8, false),
172-
Field::new("s_nationkey", DataType::Int32, false),
172+
Field::new("s_nationkey", DataType::Int64, false),
173173
Field::new("s_phone", DataType::Utf8, false),
174-
Field::new("s_acctbal", DataType::Float64, false),
174+
Field::new("s_acctbal", DataType::Decimal128(15, 2), false),
175175
Field::new("s_comment", DataType::Utf8, false),
176176
]),
177177

178178
"partsupp" => Schema::new(vec![
179-
Field::new("ps_partkey", DataType::Int32, false),
180-
Field::new("ps_suppkey", DataType::Int32, false),
179+
Field::new("ps_partkey", DataType::Int64, false),
180+
Field::new("ps_suppkey", DataType::Int64, false),
181181
Field::new("ps_availqty", DataType::Int32, false),
182-
Field::new("ps_supplycost", DataType::Float64, false),
182+
Field::new("ps_supplycost", DataType::Decimal128(15, 2), false),
183183
Field::new("ps_comment", DataType::Utf8, false),
184184
]),
185185

186186
"customer" => Schema::new(vec![
187-
Field::new("c_custkey", DataType::Int32, false),
187+
Field::new("c_custkey", DataType::Int64, false),
188188
Field::new("c_name", DataType::Utf8, false),
189189
Field::new("c_address", DataType::Utf8, false),
190-
Field::new("c_nationkey", DataType::Int32, false),
190+
Field::new("c_nationkey", DataType::Int64, false),
191191
Field::new("c_phone", DataType::Utf8, false),
192-
Field::new("c_acctbal", DataType::Float64, false),
192+
Field::new("c_acctbal", DataType::Decimal128(15, 2), false),
193193
Field::new("c_mktsegment", DataType::Utf8, false),
194194
Field::new("c_comment", DataType::Utf8, false),
195195
]),
196196

197197
"orders" => Schema::new(vec![
198-
Field::new("o_orderkey", DataType::Int32, false),
199-
Field::new("o_custkey", DataType::Int32, false),
198+
Field::new("o_orderkey", DataType::Int64, false),
199+
Field::new("o_custkey", DataType::Int64, false),
200200
Field::new("o_orderstatus", DataType::Utf8, false),
201-
Field::new("o_totalprice", DataType::Float64, false),
201+
Field::new("o_totalprice", DataType::Decimal128(15, 2), false),
202202
Field::new("o_orderdate", DataType::Date32, false),
203203
Field::new("o_orderpriority", DataType::Utf8, false),
204204
Field::new("o_clerk", DataType::Utf8, false),
@@ -207,14 +207,14 @@ pub fn get_tpch_schema(table: &str) -> Schema {
207207
]),
208208

209209
"lineitem" => Schema::new(vec![
210-
Field::new("l_orderkey", DataType::Int32, false),
211-
Field::new("l_partkey", DataType::Int32, false),
212-
Field::new("l_suppkey", DataType::Int32, false),
210+
Field::new("l_orderkey", DataType::Int64, false),
211+
Field::new("l_partkey", DataType::Int64, false),
212+
Field::new("l_suppkey", DataType::Int64, false),
213213
Field::new("l_linenumber", DataType::Int32, false),
214-
Field::new("l_quantity", DataType::Float64, false),
215-
Field::new("l_extendedprice", DataType::Float64, false),
216-
Field::new("l_discount", DataType::Float64, false),
217-
Field::new("l_tax", DataType::Float64, false),
214+
Field::new("l_quantity", DataType::Decimal128(15, 2), false),
215+
Field::new("l_extendedprice", DataType::Decimal128(15, 2), false),
216+
Field::new("l_discount", DataType::Decimal128(15, 2), false),
217+
Field::new("l_tax", DataType::Decimal128(15, 2), false),
218218
Field::new("l_returnflag", DataType::Utf8, false),
219219
Field::new("l_linestatus", DataType::Utf8, false),
220220
Field::new("l_shipdate", DataType::Date32, false),
@@ -226,14 +226,14 @@ pub fn get_tpch_schema(table: &str) -> Schema {
226226
]),
227227

228228
"nation" => Schema::new(vec![
229-
Field::new("n_nationkey", DataType::Int32, false),
229+
Field::new("n_nationkey", DataType::Int64, false),
230230
Field::new("n_name", DataType::Utf8, false),
231-
Field::new("n_regionkey", DataType::Int32, false),
231+
Field::new("n_regionkey", DataType::Int64, false),
232232
Field::new("n_comment", DataType::Utf8, false),
233233
]),
234234

235235
"region" => Schema::new(vec![
236-
Field::new("r_regionkey", DataType::Int32, false),
236+
Field::new("r_regionkey", DataType::Int64, false),
237237
Field::new("r_name", DataType::Utf8, false),
238238
Field::new("r_comment", DataType::Utf8, false),
239239
]),

benchmarks/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ simd = ["datafusion/simd"]
3434
snmalloc = ["snmalloc-rs"]
3535

3636
[dependencies]
37+
arrow-schema = { workspace = true }
3738
ballista = { path = "../ballista/client", version = "0.11.0" }
3839
datafusion = { workspace = true }
3940
datafusion-proto = { workspace = true }

0 commit comments

Comments
 (0)