temporalio · yycptt · May 26, 2023 · May 18, 2023 · May 18, 2023 · May 18, 2023
diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go
@@ -0,0 +1,47 @@
+// The MIT License
+//
+// Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
+//
+// Copyright (c) 2020 Uber Technologies, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package aggregate
+
+import (
+	"math/rand"
+	"testing"
+	"time"
+)
+
+// BenchmarkRingMovingWindowAvg
+// BenchmarkRingMovingWindowAvg-10    		12283236	        94.76 ns/op
+
+const (
+	testWindowSize = 3 * time.Second
+	testBufferSize = 200
+)
+
+func BenchmarkRingMovingWindowAvg(b *testing.B) {
+	avg := NewMovingWindowAvgImpl(testWindowSize, testBufferSize)
+	for i := 0; i < b.N; i++ {
+		avg.Record(rand.Int63())
+		avg.Average()
+	}
+}
diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go
@@ -0,0 +1,108 @@
+// The MIT License
+//
+// Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
+//
+// Copyright (c) 2020 Uber Technologies, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package aggregate
+
+import (
+	"container/ring"
+	"sync"
+	"time"
+)
+
+type (
+	MovingWindowAverage interface {
+		Record(val int64)
+		Average() float64
+	}
+
+	timestampedData struct {
+		value     int64
+		timestamp time.Time
+	}
+
+	MovingWindowAvgImpl struct {
+		sync.RWMutex
+		windowSize    time.Duration
+		maxBufferSize int
+		head          *ring.Ring
+		tail          *ring.Ring
+		sum           int64
+		count         int
+	}
+)
+
+func NewMovingWindowAvgImpl(
+	windowSize time.Duration,
+	maxBufferSize int,
+) *MovingWindowAvgImpl {
+	buffer := ring.New(maxBufferSize)
+	return &MovingWindowAvgImpl{
+		windowSize:    windowSize,
+		maxBufferSize: maxBufferSize,
+		head:          buffer,
+		tail:          buffer,
+	}
+}
+
+func (a *MovingWindowAvgImpl) Record(val int64) {
+	a.Lock()
+	defer a.Unlock()
+
+	a.expireOldValuesLocked()
+	if a.count == a.maxBufferSize {
+		a.expireOneLocked()
+	}
+
+	a.tail.Value = timestampedData{value: val, timestamp: time.Now()}
+	a.tail = a.tail.Next()
+
+	a.sum += val
+	a.count++
+}
+
+func (a *MovingWindowAvgImpl) Average() float64 {
+	a.RLock()
+	defer a.RUnlock()
+	if a.count == 0 {
+		return 0
+	}
+	return float64(a.sum / int64(a.count))
+}
+
+func (a *MovingWindowAvgImpl) expireOldValuesLocked() {
+	for ; a.head != a.tail; a.head = a.head.Next() {
+		if data, ok := a.head.Value.(timestampedData); ok && time.Since(data.timestamp) > a.windowSize {
+			a.sum -= data.value
+			a.count--
+		}
+	}
+}
+
+func (a *MovingWindowAvgImpl) expireOneLocked() {
+	if data, ok := a.head.Value.(timestampedData); ok {
+		a.sum -= data.value
+		a.count--
+	}
+	a.head = a.head.Next()
+}
diff --git a/common/aggregate/noop_signal_aggregator.go b/common/aggregate/noop_signal_aggregator.go
@@ -0,0 +1,47 @@
+// The MIT License
+//
+// Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
+//
+// Copyright (c) 2020 Uber Technologies, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package aggregate
+
+import "go.temporal.io/server/common/quotas"
+
+var NoopPersistenceHealthSignalAggregator SignalAggregator[quotas.Request] = newNoopSignalAggregator[quotas.Request]()
+
+type (
+	noopSignalAggregator[T any] struct{}
+)
+
+func newNoopSignalAggregator[T any]() *noopSignalAggregator[T] { return &noopSignalAggregator[T]{} }
+
+func (a *noopSignalAggregator[T]) GetRecordFn(T) func(error) {
+	return func(error) {}
+}
+
+func (a *noopSignalAggregator[T]) AverageLatency(T) float64 {
+	return 0
+}
+
+func (*noopSignalAggregator[T]) ErrorRatio(T) float64 {
+	return 0
+}
diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go
@@ -0,0 +1,180 @@
+// The MIT License
+//
+// Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
+//
+// Copyright (c) 2020 Uber Technologies, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package aggregate
+
+import (
+	"sync"
+	"time"
+
+	"go.temporal.io/server/common/dynamicconfig"
+	"go.temporal.io/server/common/metrics"
+	"go.temporal.io/server/common/quotas"
+)
+
+type (
+	PersistenceHealthSignalAggregator[K SignalKey] struct {
+		SignalAggregator[quotas.Request]
+
+		keyMapper SignalKeyMapperFn[quotas.Request, K]
+
+		latencyAverages map[K]MovingWindowAverage
+		latencyLock     sync.RWMutex
+
+		errorRatios map[K]MovingWindowAverage
+		errorLock   sync.RWMutex
+
+		windowSize    time.Duration
+		maxBufferSize int
+
+		metricsHandler   metrics.Handler
+		emitMetricsTimer *time.Ticker
+	}
+
+	perShardPerNsHealthSignalKey struct {
+		namespace string
+		shardID   int32
+	}
+)
+
+func NewPersistenceHealthSignalAggregator[K SignalKey](
+	keyMapper SignalKeyMapperFn[quotas.Request, K],
+	windowSize time.Duration,
+	maxBufferSize int,
+	metricsHandler metrics.Handler,
+) *PersistenceHealthSignalAggregator[K] {
+	ret := &PersistenceHealthSignalAggregator[K]{
+		keyMapper:        keyMapper,
+		latencyAverages:  make(map[K]MovingWindowAverage),
+		errorRatios:      make(map[K]MovingWindowAverage),
+		windowSize:       windowSize,
+		maxBufferSize:    maxBufferSize,
+		metricsHandler:   metricsHandler,
+		emitMetricsTimer: time.NewTicker(windowSize),
+	}
+	go ret.emitMetricsLoop()
+	return ret
+}
+
+func NewPerShardPerNsHealthSignalAggregator(
+	windowSize dynamicconfig.DurationPropertyFn,
+	maxBufferSize dynamicconfig.IntPropertyFn,
+	metricsHandler metrics.Handler,
+) *PersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey] {
+	return NewPersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey](
+		perShardPerNsKeyMapperFn,
+		windowSize(),
+		maxBufferSize(),
+		metricsHandler,
+	)
+}
+
+func perShardPerNsKeyMapperFn(req quotas.Request) perShardPerNsHealthSignalKey {
+	return perShardPerNsHealthSignalKey{
+		namespace: req.Caller,
+		shardID:   req.CallerSegment,
+	}
+}
+
+func (k perShardPerNsHealthSignalKey) GetMetricTags() []metrics.Tag {
+	nsTag := metrics.NamespaceTag(k.namespace)
+	shardTag := metrics.ShardTag(k.shardID)
+	return []metrics.Tag{nsTag, shardTag}
+}
+
+func (s *PersistenceHealthSignalAggregator[_]) GetRecordFn(req quotas.Request) func(err error) {
+	start := time.Now()
+	return func(err error) {
+		s.getOrInitLatencyAverage(req).Record(time.Since(start).Milliseconds())
+		errorRatio := s.getOrInitErrorRatio(req)
+		if err != nil {
+			errorRatio.Record(1)
+		} else {
+			errorRatio.Record(0)
+		}
+	}
+}
+
+func (s *PersistenceHealthSignalAggregator[_]) AverageLatency(req quotas.Request) float64 {
+	return s.getOrInitLatencyAverage(req).Average()
+}
+
+func (s *PersistenceHealthSignalAggregator[_]) ErrorRatio(req quotas.Request) float64 {
+	return s.getOrInitErrorRatio(req).Average()
+}
+
+func (s *PersistenceHealthSignalAggregator[_]) getOrInitLatencyAverage(req quotas.Request) MovingWindowAverage {
+	return s.getOrInitAverage(req, &s.latencyAverages, &s.latencyLock)
+}
+
+func (s *PersistenceHealthSignalAggregator[_]) getOrInitErrorRatio(req quotas.Request) MovingWindowAverage {
+	return s.getOrInitAverage(req, &s.errorRatios, &s.errorLock)
+}
+
+func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage(
+	req quotas.Request,
+	averages *map[K]MovingWindowAverage,
+	lock *sync.RWMutex,
+) MovingWindowAverage {
+	key := s.keyMapper(req)
+
+	lock.RLock()
+	avg, ok := (*averages)[key]
+	lock.RUnlock()
+	if ok {
+		return avg
+	}
+
+	newAvg := NewMovingWindowAvgImpl(s.windowSize, s.maxBufferSize)
+
+	lock.Lock()
+	defer lock.Unlock()
+
+	avg, ok = (*averages)[key]
+	if ok {
+		return avg
+	}
+
+	(*averages)[key] = newAvg
+	return newAvg
+}
+
+func (s *PersistenceHealthSignalAggregator[_]) emitMetricsLoop() {
+	for {
+		select {
+		case <-s.emitMetricsTimer.C:
+			s.latencyLock.RLock()
+			for key, avg := range s.latencyAverages {
+				s.metricsHandler.Gauge(metrics.PersistenceAvgLatencyPerShardPerNamespace.GetMetricName()).Record(avg.Average(), key.GetMetricTags()...)
+			}
+			s.latencyLock.RUnlock()
+
+			s.errorLock.RLock()
+			for key, ratio := range s.errorRatios {
+				s.metricsHandler.Gauge(metrics.PersistenceErrPerShardPerNamespace.GetMetricName()).Record(ratio.Average(), key.GetMetricTags()...)
+			}
+			s.errorLock.RUnlock()
+		}
+	}
+}