Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add t_statistic(), paired_t_statistic() agg functions (#15798) #15817

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions presto-docs/src/main/sphinx/functions/aggregate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,15 @@ Statistical Aggregate Functions

Returns the sample variance of all input values.

.. function:: t_statistic(x) -> double

Returns the one-sample Student's t-statistic for all
input values.

.. function:: paired_t_statistic(x, y) -> double

Returns the paired Student's t-statistic for the
difference ``y - x`` for all input values.

Classification Metrics Aggregate Functions
------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import com.facebook.presto.operator.aggregation.MergeHyperLogLogAggregation;
import com.facebook.presto.operator.aggregation.MergeQuantileDigestFunction;
import com.facebook.presto.operator.aggregation.MergeTDigestFunction;
import com.facebook.presto.operator.aggregation.PairedTStatisticAggregation;
import com.facebook.presto.operator.aggregation.RealCorrelationAggregation;
import com.facebook.presto.operator.aggregation.RealCovarianceAggregation;
import com.facebook.presto.operator.aggregation.RealGeometricMeanAggregations;
Expand All @@ -75,6 +76,7 @@
import com.facebook.presto.operator.aggregation.RealSumAggregation;
import com.facebook.presto.operator.aggregation.ReduceAggregationFunction;
import com.facebook.presto.operator.aggregation.SumDataSizeForStats;
import com.facebook.presto.operator.aggregation.TStatisticAggregation;
import com.facebook.presto.operator.aggregation.VarianceAggregation;
import com.facebook.presto.operator.aggregation.approxmostfrequent.BigintApproximateMostFrequent;
import com.facebook.presto.operator.aggregation.approxmostfrequent.VarcharApproximateMostFrequent;
Expand Down Expand Up @@ -591,6 +593,8 @@ private List<? extends SqlFunction> getBuildInFunctions(FeaturesConfig featuresC
.aggregates(CentralMomentsAggregation.class)
.aggregates(ApproximateLongPercentileAggregations.class)
.aggregates(ApproximateLongPercentileArrayAggregations.class)
.aggregates(TStatisticAggregation.class)
.aggregates(PairedTStatisticAggregation.class)
.aggregates(ApproximateDoublePercentileAggregations.class)
.aggregates(ApproximateDoublePercentileArrayAggregations.class)
.aggregates(ApproximateRealPercentileAggregations.class)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation;

import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.operator.aggregation.state.CentralMomentsState;
import com.facebook.presto.spi.function.AggregationFunction;
import com.facebook.presto.spi.function.AggregationState;
import com.facebook.presto.spi.function.CombineFunction;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.InputFunction;
import com.facebook.presto.spi.function.OutputFunction;
import com.facebook.presto.spi.function.SqlType;

import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.operator.aggregation.AggregationUtils.mergeCentralMomentsState;
import static com.facebook.presto.operator.aggregation.AggregationUtils.updateCentralMomentsState;

@AggregationFunction
@Description("Returns the paired t-statistic")
public final class PairedTStatisticAggregation
{
private PairedTStatisticAggregation() {}

@InputFunction
public static void doubleInput(@AggregationState CentralMomentsState state, @SqlType(StandardTypes.DOUBLE) double x, @SqlType(StandardTypes.DOUBLE) double y)
{
double value = y - x;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the local variable is not necessary, just use updateCentralMomentsState(state, y - x); Generally speaking, if a variable is only used once, it's not necessary. Please update other places as well.

updateCentralMomentsState(state, value);
}

@InputFunction
public static void intInput(@AggregationState CentralMomentsState state, @SqlType(StandardTypes.INTEGER) long x, @SqlType(StandardTypes.INTEGER) long y)
{
double value = y - x;
updateCentralMomentsState(state, (double) value);
}

@CombineFunction
public static void combine(@AggregationState CentralMomentsState state, @AggregationState CentralMomentsState otherState)
{
mergeCentralMomentsState(state, otherState);
}

@AggregationFunction(value = "paired_t_statistic")
@Description("Returns the Student's t-statistic for the paired t-test")
@OutputFunction(StandardTypes.DOUBLE)
public static void paired_t_statistic(@AggregationState CentralMomentsState state, BlockBuilder out)
{
long n = state.getCount();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can either remove n and use state.getCount() through out, or update the other places using state.getCount to use n instead.


if (n < 2) {
out.appendNull();
}
else {
double sampStdDev = Math.sqrt(state.getM2() / (state.getCount() - 1));
double result = state.getM1() / (sampStdDev / Math.sqrt(state.getCount()));
DOUBLE.writeDouble(out, result);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.aggregation;

import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.operator.aggregation.state.CentralMomentsState;
import com.facebook.presto.spi.function.AggregationFunction;
import com.facebook.presto.spi.function.AggregationState;
import com.facebook.presto.spi.function.CombineFunction;
import com.facebook.presto.spi.function.Description;
import com.facebook.presto.spi.function.InputFunction;
import com.facebook.presto.spi.function.OutputFunction;
import com.facebook.presto.spi.function.SqlType;

import static com.facebook.presto.common.type.DoubleType.DOUBLE;
import static com.facebook.presto.operator.aggregation.AggregationUtils.mergeCentralMomentsState;
import static com.facebook.presto.operator.aggregation.AggregationUtils.updateCentralMomentsState;

@AggregationFunction
@Description("Returns the Student's t-statistic")
public final class TStatisticAggregation
{
private TStatisticAggregation() {}

@InputFunction
public static void input(@AggregationState CentralMomentsState state, @SqlType(StandardTypes.DOUBLE) double value)
{
updateCentralMomentsState(state, value);
}

@InputFunction
public static void input(@AggregationState CentralMomentsState state, @SqlType(StandardTypes.BIGINT) long value)
{
updateCentralMomentsState(state, (double) value);
}

@CombineFunction
public static void combine(@AggregationState CentralMomentsState state, @AggregationState CentralMomentsState otherState)
{
mergeCentralMomentsState(state, otherState);
}

@AggregationFunction(value = "t_statistic")
@Description("Returns the Student's t-statistic for the one-sample t-test")
@OutputFunction(StandardTypes.DOUBLE)
public static void t_statistic(@AggregationState CentralMomentsState state, BlockBuilder out)
{
long n = state.getCount();

if (n < 2) {
out.appendNull();
}
else {
double sampStdDev = Math.sqrt(state.getM2() / (state.getCount() - 1));
double result = state.getM1() / (sampStdDev / Math.sqrt(state.getCount()));
DOUBLE.writeDouble(out, result);
}
}
}