Skip to content

Commit

Permalink
colexec: use separate maps for each hash bucket in distinct hash agg
Browse files Browse the repository at this point in the history
This commit changes the approach to how we verify whether a tuple is
distinct. Previously, there was a single map for each aggregate function
with DISTINCT clause that was shared among all groups. This required
that we encode both grouping and aggregation columns of a tuple. Now
each group (i.e. bucket) has a separate map for each distinct aggregate
function. Initially I thought that this would incur too much of an
overhead, but as it turns out, the "global" shared map grows a lot
bigger and actually worsens the performance.

Release note: None
  • Loading branch information
yuzefovich committed Jun 28, 2020
1 parent 696dd3e commit 8b9c8b4
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 124 deletions.
13 changes: 7 additions & 6 deletions pkg/sql/colexec/hash_aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ func (op *hashAggregator) onlineAgg(ctx context.Context, b coldata.Batch) {
op.scratch.diff[:len(remaining)], false, /* firstDefiniteMatch */
)
if anyMatched {
op.aggHelper.performAggregation(ctx, b, aggFunc.fns, aggFunc.encodedGroupCols)
op.aggHelper.performAggregation(ctx, b, aggFunc.fns, aggFunc.seen)
}
}
} else {
Expand All @@ -378,6 +378,7 @@ func (op *hashAggregator) onlineAgg(ctx context.Context, b coldata.Batch) {
keyIdx := op.keyMapping.Length()
aggFunc := op.hashAlloc.newHashAggFuncs()
aggFunc.keyIdx = keyIdx
aggFunc.seen = op.aggHelper.makeSeenMaps()

// Store the key of the current aggregating group into keyMapping.
op.allocator.PerformOperation(op.keyMapping.ColVecs(), func() {
Expand Down Expand Up @@ -408,8 +409,7 @@ func (op *hashAggregator) onlineAgg(ctx context.Context, b coldata.Batch) {
)

aggFunc.init(op.output.Batch)
aggFunc.encodedGroupCols = op.aggHelper.encodeGroupCols(ctx, b)
op.aggHelper.performAggregation(ctx, b, aggFunc.fns, aggFunc.encodedGroupCols)
op.aggHelper.performAggregation(ctx, b, aggFunc.fns, aggFunc.seen)
}

// We have processed all tuples with this hashCode, so we should reset
Expand Down Expand Up @@ -444,9 +444,10 @@ type hashAggFuncs struct {
keyIdx int

fns []aggregateFunc
// encodedGroupCols contains the encoded "signature" of the corresponding
// aggregating group. It should not be modified once set.
encodedGroupCols []byte
// seen is a dense slice of maps used to handle distinct aggregation (it is
// of the same length as the number of functions with DISTINCT clause). It
// will be nil whenever no aggregate function has a DISTINCT clause.
seen []map[string]struct{}
}

const (
Expand Down
Loading

0 comments on commit 8b9c8b4

Please sign in to comment.