@@ -29,6 +29,7 @@ use crate::physical_plan::{
29
29
} ;
30
30
use arrow:: array:: ArrayRef ;
31
31
use arrow:: datatypes:: { Field , Schema , SchemaRef } ;
32
+ use arrow:: record_batch:: RecordBatch ;
32
33
use datafusion_common:: Result ;
33
34
use datafusion_expr:: Accumulator ;
34
35
use datafusion_physical_expr:: expressions:: Column ;
@@ -40,9 +41,13 @@ use std::sync::Arc;
40
41
41
42
mod hash;
42
43
mod no_grouping;
44
+ mod row_hash;
43
45
46
+ use crate :: physical_plan:: aggregates:: row_hash:: GroupedHashAggregateStreamV2 ;
44
47
pub use datafusion_expr:: AggregateFunction ;
48
+ use datafusion_physical_expr:: aggregate:: row_accumulator:: RowAccumulator ;
45
49
pub use datafusion_physical_expr:: expressions:: create_aggregate_expr;
50
+ use datafusion_row:: { row_supported, RowType } ;
46
51
47
52
/// Hash aggregate modes
48
53
#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
@@ -142,6 +147,12 @@ impl AggregateExec {
142
147
pub fn input_schema ( & self ) -> SchemaRef {
143
148
self . input_schema . clone ( )
144
149
}
150
+
151
+ fn row_aggregate_supported ( & self ) -> bool {
152
+ let group_schema = group_schema ( & self . schema , self . group_expr . len ( ) ) ;
153
+ row_supported ( & group_schema, RowType :: Compact )
154
+ && accumulator_v2_supported ( & self . aggr_expr )
155
+ }
145
156
}
146
157
147
158
impl ExecutionPlan for AggregateExec {
@@ -212,6 +223,15 @@ impl ExecutionPlan for AggregateExec {
212
223
input,
213
224
baseline_metrics,
214
225
) ?) )
226
+ } else if self . row_aggregate_supported ( ) {
227
+ Ok ( Box :: pin ( GroupedHashAggregateStreamV2 :: new (
228
+ self . mode ,
229
+ self . schema . clone ( ) ,
230
+ group_expr,
231
+ self . aggr_expr . clone ( ) ,
232
+ input,
233
+ baseline_metrics,
234
+ ) ?) )
215
235
} else {
216
236
Ok ( Box :: pin ( GroupedHashAggregateStream :: new (
217
237
self . mode ,
@@ -315,6 +335,11 @@ fn create_schema(
315
335
Ok ( Schema :: new ( fields) )
316
336
}
317
337
338
+ fn group_schema ( schema : & Schema , group_count : usize ) -> SchemaRef {
339
+ let group_fields = schema. fields ( ) [ 0 ..group_count] . to_vec ( ) ;
340
+ Arc :: new ( Schema :: new ( group_fields) )
341
+ }
342
+
318
343
/// returns physical expressions to evaluate against a batch
319
344
/// The expressions are different depending on `mode`:
320
345
/// * Partial: AggregateExpr::expressions
@@ -362,6 +387,7 @@ fn merge_expressions(
362
387
}
363
388
364
389
pub ( crate ) type AccumulatorItem = Box < dyn Accumulator > ;
390
+ pub ( crate ) type AccumulatorItemV2 = Box < dyn RowAccumulator > ;
365
391
366
392
fn create_accumulators (
367
393
aggr_expr : & [ Arc < dyn AggregateExpr > ] ,
@@ -372,6 +398,26 @@ fn create_accumulators(
372
398
. collect :: < datafusion_common:: Result < Vec < _ > > > ( )
373
399
}
374
400
401
+ fn accumulator_v2_supported ( aggr_expr : & [ Arc < dyn AggregateExpr > ] ) -> bool {
402
+ aggr_expr
403
+ . iter ( )
404
+ . all ( |expr| expr. row_accumulator_supported ( ) )
405
+ }
406
+
407
+ fn create_accumulators_v2 (
408
+ aggr_expr : & [ Arc < dyn AggregateExpr > ] ,
409
+ ) -> datafusion_common:: Result < Vec < AccumulatorItemV2 > > {
410
+ let mut state_index = 0 ;
411
+ aggr_expr
412
+ . iter ( )
413
+ . map ( |expr| {
414
+ let result = expr. create_row_accumulator ( state_index) ;
415
+ state_index += expr. state_fields ( ) . unwrap ( ) . len ( ) ;
416
+ result
417
+ } )
418
+ . collect :: < datafusion_common:: Result < Vec < _ > > > ( )
419
+ }
420
+
375
421
/// returns a vector of ArrayRefs, where each entry corresponds to either the
376
422
/// final value (mode = Final) or states (mode = Partial)
377
423
fn finalize_aggregation (
@@ -402,6 +448,27 @@ fn finalize_aggregation(
402
448
}
403
449
}
404
450
451
+ /// Evaluates expressions against a record batch.
452
+ fn evaluate (
453
+ expr : & [ Arc < dyn PhysicalExpr > ] ,
454
+ batch : & RecordBatch ,
455
+ ) -> Result < Vec < ArrayRef > > {
456
+ expr. iter ( )
457
+ . map ( |expr| expr. evaluate ( batch) )
458
+ . map ( |r| r. map ( |v| v. into_array ( batch. num_rows ( ) ) ) )
459
+ . collect :: < Result < Vec < _ > > > ( )
460
+ }
461
+
462
+ /// Evaluates expressions against a record batch.
463
+ fn evaluate_many (
464
+ expr : & [ Vec < Arc < dyn PhysicalExpr > > ] ,
465
+ batch : & RecordBatch ,
466
+ ) -> Result < Vec < Vec < ArrayRef > > > {
467
+ expr. iter ( )
468
+ . map ( |expr| evaluate ( expr, batch) )
469
+ . collect :: < Result < Vec < _ > > > ( )
470
+ }
471
+
405
472
#[ cfg( test) ]
406
473
mod tests {
407
474
use crate :: execution:: context:: TaskContext ;
0 commit comments