Skip to content

Commit

Permalink
Add StatisticsType in expr.poto
Browse files Browse the repository at this point in the history
  • Loading branch information
Huaxin Gao committed Apr 24, 2024
1 parent bddbc71 commit c250bea
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 32 deletions.
40 changes: 21 additions & 19 deletions core/src/execution/datafusion/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1220,27 +1220,29 @@ impl PhysicalPlanner {
StatsType::Population,
)))
}
AggExprStruct::VarianceSample(expr) => {
AggExprStruct::Variance(expr) => {
let child = self.create_expr(expr.child.as_ref().unwrap(), schema.clone())?;
let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
Ok(Arc::new(Variance::new(
child,
"variance",
datatype,
StatsType::Sample,
expr.null_on_divide_by_zero,
)))
}
AggExprStruct::VariancePopulation(expr) => {
let child = self.create_expr(expr.child.as_ref().unwrap(), schema.clone())?;
let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
Ok(Arc::new(Variance::new(
child,
"variance_pop",
datatype,
StatsType::Population,
expr.null_on_divide_by_zero,
)))
match expr.stats_type {
0 => Ok(Arc::new(Variance::new(
child,
"variance",
datatype,
StatsType::Sample,
expr.null_on_divide_by_zero,
))),
1 => Ok(Arc::new(Variance::new(
child,
"variance_pop",
datatype,
StatsType::Population,
expr.null_on_divide_by_zero,
))),
stats_type => Err(ExecutionError::GeneralError(format!(
"Unknown StatisticsType {:?} for Variance",
stats_type
))),
}
}
}
}
Expand Down
17 changes: 8 additions & 9 deletions core/src/execution/proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,15 @@ message AggExpr {
BitXorAgg bitXorAgg = 11;
CovSample covSample = 12;
CovPopulation covPopulation = 13;
VarianceSample varianceSample = 14;
VariancePopulation variancePopulation = 15;
Variance variance = 14;
}
}

enum StatisticsType {
SAMPLE = 0;
POPULATION = 1;
}

message Count {
repeated Expr children = 1;
}
Expand Down Expand Up @@ -167,16 +171,11 @@ message CovPopulation {
DataType datatype = 4;
}

message VarianceSample {
Expr child = 1;
bool null_on_divide_by_zero = 2;
DataType datatype = 3;
}

message VariancePopulation {
message Variance {
Expr child = 1;
bool null_on_divide_by_zero = 2;
DataType datatype = 3;
StatisticsType stats_type = 4;
}

message Literal {
Expand Down
10 changes: 6 additions & 4 deletions spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
Original file line number Diff line number Diff line change
Expand Up @@ -431,15 +431,16 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde {
val dataType = serializeDataType(variance.dataType)

if (childExpr.isDefined && dataType.isDefined) {
val varBuilder = ExprOuterClass.VarianceSample.newBuilder()
val varBuilder = ExprOuterClass.Variance.newBuilder()
varBuilder.setChild(childExpr.get)
varBuilder.setNullOnDivideByZero(nullOnDivideByZero)
varBuilder.setDatatype(dataType.get)
varBuilder.setStatsTypeValue(0)

Some(
ExprOuterClass.AggExpr
.newBuilder()
.setVarianceSample(varBuilder)
.setVariance(varBuilder)
.build())
} else {
None
Expand All @@ -449,15 +450,16 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde {
val dataType = serializeDataType(variancePop.dataType)

if (childExpr.isDefined && dataType.isDefined) {
val varBuilder = ExprOuterClass.VariancePopulation.newBuilder()
val varBuilder = ExprOuterClass.Variance.newBuilder()
varBuilder.setChild(childExpr.get)
varBuilder.setNullOnDivideByZero(nullOnDivideByZero)
varBuilder.setDatatype(dataType.get)
varBuilder.setStatsTypeValue(1)

Some(
ExprOuterClass.AggExpr
.newBuilder()
.setVariancePopulation(varBuilder)
.setVariance(varBuilder)
.build())
} else {
None
Expand Down

0 comments on commit c250bea

Please sign in to comment.