colexec: optimize distinct hash aggregation

This commit optimizes the handling of DISTINCT clauses by the hash aggregator. Previously, we would encode the combination of grouping and aggregation columns for each tuple to check whether it hasn't been seen yet. Now the encoding of the grouping columns is done once for the whole bucket, and that encoded value is reused later for every tuple to check. Additionally, this commit splits up the custom hash aggregator helper into two different implementations: one that handles only DISTINCT clauses with no FILTERs, and another that handles any combination of DISTINCT and FILTER clauses. The former can be optimized by converting the aggregation columns of all aggregate functions at once, before the new batch is being aggregated on. For the latter, however, we need to perform the conversion to datums after the filter has applied. Release note: None
cockroachdb · Jun 28, 2020 · 696dd3e · 696dd3e
1 parent 787fa3f
commit 696dd3e
Show file tree

Hide file tree

Showing 2 changed files with 270 additions and 101 deletions.
diff --git a/pkg/sql/colexec/hash_aggregator.go b/pkg/sql/colexec/hash_aggregator.go
@@ -360,7 +360,7 @@ func (op *hashAggregator) onlineAgg(ctx context.Context, b coldata.Batch) {
 					op.scratch.diff[:len(remaining)], false, /* firstDefiniteMatch */
 				)
 				if anyMatched {
-					op.aggHelper.performAggregation(ctx, b, aggFunc.fns)
+					op.aggHelper.performAggregation(ctx, b, aggFunc.fns, aggFunc.encodedGroupCols)
 				}
 			}
 		} else {
@@ -408,7 +408,8 @@ func (op *hashAggregator) onlineAgg(ctx context.Context, b coldata.Batch) {
 			)
 
 			aggFunc.init(op.output.Batch)
-			op.aggHelper.performAggregation(ctx, b, aggFunc.fns)
+			aggFunc.encodedGroupCols = op.aggHelper.encodeGroupCols(ctx, b)
+			op.aggHelper.performAggregation(ctx, b, aggFunc.fns, aggFunc.encodedGroupCols)
 		}
 
 		// We have processed all tuples with this hashCode, so we should reset
@@ -443,6 +444,9 @@ type hashAggFuncs struct {
 	keyIdx int
 
 	fns []aggregateFunc
+	// encodedGroupCols contains the encoded "signature" of the corresponding
+	// aggregating group. It should not be modified once set.
+	encodedGroupCols []byte
 }
 
 const (