From 0045fb783c36a1b1aa27379760e5aec0655dd7c1 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 18 May 2023 12:07:14 -0700 Subject: [PATCH 01/36] moving window average --- .../aggregate/bench_moving_window_avg_test.go | 56 +++++++++++ common/aggregate/moving_window_average.go | 39 ++++++++ common/aggregate/moving_window_avg_chan.go | 95 ++++++++++++++++++ common/aggregate/moving_window_avg_ring.go | 98 +++++++++++++++++++ 4 files changed, 288 insertions(+) create mode 100644 common/aggregate/bench_moving_window_avg_test.go create mode 100644 common/aggregate/moving_window_average.go create mode 100644 common/aggregate/moving_window_avg_chan.go create mode 100644 common/aggregate/moving_window_avg_ring.go diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go new file mode 100644 index 00000000000..979461ff64d --- /dev/null +++ b/common/aggregate/bench_moving_window_avg_test.go @@ -0,0 +1,56 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package aggregate + +import ( + "math/rand" + "testing" + "time" +) + +// BenchmarkChannelMovingWindowAvg +// BenchmarkChannelMovingWindowAvg-10 1846735 657.0 ns/op +// BenchmarkRingMovingWindowAvg +// BenchmarkRingMovingWindowAvg-10 12283236 94.76 ns/op + +const ( + testWindowSize = 3 * time.Second + testBufferSize = 200 +) + +func BenchmarkChannelMovingWindowAvg(b *testing.B) { + benchmarkMovingWindowAvg(b, NewMovingWindowAvgChanImpl(testWindowSize, testBufferSize)) +} + +func BenchmarkRingMovingWindowAvg(b *testing.B) { + benchmarkMovingWindowAvg(b, NewMovingWindowAvgRingImpl(testWindowSize, testBufferSize)) +} + +func benchmarkMovingWindowAvg(b *testing.B, avg MovingWindowAverage) { + for i := 0; i < b.N; i++ { + avg.Record(rand.Int63()) + avg.Average() + } +} diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go new file mode 100644 index 00000000000..7031f658f6c --- /dev/null +++ b/common/aggregate/moving_window_average.go @@ -0,0 +1,39 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package aggregate + +import "time" + +type ( + MovingWindowAverage interface { + Record(val int64) + Average() float64 + } + + timestampedData struct { + value int64 + timestamp time.Time + } +) diff --git a/common/aggregate/moving_window_avg_chan.go b/common/aggregate/moving_window_avg_chan.go new file mode 100644 index 00000000000..113dc93228a --- /dev/null +++ b/common/aggregate/moving_window_avg_chan.go @@ -0,0 +1,95 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package aggregate + +import ( + "sync/atomic" + "time" +) + +type ( + MovingWindowAvgChanImpl struct { + windowSize time.Duration + buffer chan timestampedData + forceExpireCh chan interface{} + sum atomic.Int64 + count atomic.Int64 + } +) + +func NewMovingWindowAvgChanImpl( + windowSize time.Duration, + maxBufferSize int, +) *MovingWindowAvgChanImpl { + ret := &MovingWindowAvgChanImpl{ + windowSize: windowSize, + buffer: make(chan timestampedData, maxBufferSize), + forceExpireCh: make(chan interface{}), + } + go ret.expireLoop() // TODO: need a mechanism to cleanup this goroutine + return ret +} + +func (a *MovingWindowAvgChanImpl) expireLoop() { + for { + select { + case toExpire := <-a.buffer: + if time.Since(toExpire.timestamp) > a.windowSize { + // element already outside of window, remove from average + a.sum.Add(-toExpire.value) + a.count.Add(-1) + } else { + // first element out of the buffer should be the oldest so wait until + //it moves out of the window before trying to remove more elements + timer := time.NewTimer(a.windowSize - time.Since(toExpire.timestamp)) + select { + case <-timer.C: + case <-a.forceExpireCh: // if the buffer is full, remove one item so new adds don't get blocked + timer.Stop() + a.sum.Add(-toExpire.value) + a.count.Add(-1) + } + } + } + } +} + +func (a *MovingWindowAvgChanImpl) Record(val int64) { + if len(a.buffer) == cap(a.buffer) { + // blocks until there is room in the buffer to add more data + a.forceExpireCh <- struct{}{} + } + + a.sum.Add(val) + a.count.Add(1) + a.buffer <- timestampedData{value: val, timestamp: time.Now()} +} + +func (a *MovingWindowAvgChanImpl) Average() float64 { + if a.count.Load() == 0 { + return 0 + } + return float64(a.sum.Load() / a.count.Load()) +} diff --git a/common/aggregate/moving_window_avg_ring.go b/common/aggregate/moving_window_avg_ring.go new file mode 100644 index 00000000000..11d0f28873a --- /dev/null +++ b/common/aggregate/moving_window_avg_ring.go @@ -0,0 +1,98 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package aggregate + +import ( + "container/ring" + "sync" + "time" +) + +type ( + MovingWindowAvgRingImpl struct { + sync.RWMutex + windowSize time.Duration + maxBufferSize int + head *ring.Ring + tail *ring.Ring + sum int64 + count int + } +) + +func NewMovingWindowAvgRingImpl( + windowSize time.Duration, + maxBufferSize int, +) *MovingWindowAvgRingImpl { + buffer := ring.New(maxBufferSize) + return &MovingWindowAvgRingImpl{ + windowSize: windowSize, + maxBufferSize: maxBufferSize, + head: buffer, + tail: buffer, + } +} + +func (a *MovingWindowAvgRingImpl) Record(val int64) { + a.Lock() + defer a.Unlock() + + a.expireOldValuesLocked() + if a.count == a.maxBufferSize { + a.expireOneLocked() + } + + a.tail.Value = timestampedData{value: val, timestamp: time.Now()} + a.tail = a.tail.Next() + + a.sum += val + a.count++ +} + +func (a *MovingWindowAvgRingImpl) Average() float64 { + a.RLock() + defer a.RUnlock() + if a.count == 0 { + return 0 + } + return float64(a.sum / int64(a.count)) +} + +func (a *MovingWindowAvgRingImpl) expireOldValuesLocked() { + for ; a.head != a.tail; a.head = a.head.Next() { + if data, ok := a.head.Value.(timestampedData); ok && time.Since(data.timestamp) > a.windowSize { + a.sum -= data.value + a.count-- + } + } +} + +func (a *MovingWindowAvgRingImpl) expireOneLocked() { + if data, ok := a.head.Value.(timestampedData); ok { + a.sum -= data.value + a.count-- + } + a.head = a.head.Next() +} From ee44c22d9a698578ab32259113503d4921f7a9e3 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 18 May 2023 12:10:54 -0700 Subject: [PATCH 02/36] remove channel avg impl --- .../aggregate/bench_moving_window_avg_test.go | 11 +-- common/aggregate/moving_window_average.go | 71 +++++++++++++- common/aggregate/moving_window_avg_chan.go | 95 ------------------ common/aggregate/moving_window_avg_ring.go | 98 ------------------- 4 files changed, 71 insertions(+), 204 deletions(-) delete mode 100644 common/aggregate/moving_window_avg_chan.go delete mode 100644 common/aggregate/moving_window_avg_ring.go diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go index 979461ff64d..706460c2c30 100644 --- a/common/aggregate/bench_moving_window_avg_test.go +++ b/common/aggregate/bench_moving_window_avg_test.go @@ -30,8 +30,6 @@ import ( "time" ) -// BenchmarkChannelMovingWindowAvg -// BenchmarkChannelMovingWindowAvg-10 1846735 657.0 ns/op // BenchmarkRingMovingWindowAvg // BenchmarkRingMovingWindowAvg-10 12283236 94.76 ns/op @@ -40,15 +38,8 @@ const ( testBufferSize = 200 ) -func BenchmarkChannelMovingWindowAvg(b *testing.B) { - benchmarkMovingWindowAvg(b, NewMovingWindowAvgChanImpl(testWindowSize, testBufferSize)) -} - func BenchmarkRingMovingWindowAvg(b *testing.B) { - benchmarkMovingWindowAvg(b, NewMovingWindowAvgRingImpl(testWindowSize, testBufferSize)) -} - -func benchmarkMovingWindowAvg(b *testing.B, avg MovingWindowAverage) { + avg := NewMovingWindowAvgImpl(testWindowSize, testBufferSize) for i := 0; i < b.N; i++ { avg.Record(rand.Int63()) avg.Average() diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index 7031f658f6c..49758ec522a 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -24,7 +24,11 @@ package aggregate -import "time" +import ( + "container/ring" + "sync" + "time" +) type ( MovingWindowAverage interface { @@ -36,4 +40,69 @@ type ( value int64 timestamp time.Time } + + MovingWindowAvgImpl struct { + sync.RWMutex + windowSize time.Duration + maxBufferSize int + head *ring.Ring + tail *ring.Ring + sum int64 + count int + } ) + +func NewMovingWindowAvgImpl( + windowSize time.Duration, + maxBufferSize int, +) *MovingWindowAvgImpl { + buffer := ring.New(maxBufferSize) + return &MovingWindowAvgImpl{ + windowSize: windowSize, + maxBufferSize: maxBufferSize, + head: buffer, + tail: buffer, + } +} + +func (a *MovingWindowAvgImpl) Record(val int64) { + a.Lock() + defer a.Unlock() + + a.expireOldValuesLocked() + if a.count == a.maxBufferSize { + a.expireOneLocked() + } + + a.tail.Value = timestampedData{value: val, timestamp: time.Now()} + a.tail = a.tail.Next() + + a.sum += val + a.count++ +} + +func (a *MovingWindowAvgImpl) Average() float64 { + a.RLock() + defer a.RUnlock() + if a.count == 0 { + return 0 + } + return float64(a.sum / int64(a.count)) +} + +func (a *MovingWindowAvgImpl) expireOldValuesLocked() { + for ; a.head != a.tail; a.head = a.head.Next() { + if data, ok := a.head.Value.(timestampedData); ok && time.Since(data.timestamp) > a.windowSize { + a.sum -= data.value + a.count-- + } + } +} + +func (a *MovingWindowAvgImpl) expireOneLocked() { + if data, ok := a.head.Value.(timestampedData); ok { + a.sum -= data.value + a.count-- + } + a.head = a.head.Next() +} diff --git a/common/aggregate/moving_window_avg_chan.go b/common/aggregate/moving_window_avg_chan.go deleted file mode 100644 index 113dc93228a..00000000000 --- a/common/aggregate/moving_window_avg_chan.go +++ /dev/null @@ -1,95 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package aggregate - -import ( - "sync/atomic" - "time" -) - -type ( - MovingWindowAvgChanImpl struct { - windowSize time.Duration - buffer chan timestampedData - forceExpireCh chan interface{} - sum atomic.Int64 - count atomic.Int64 - } -) - -func NewMovingWindowAvgChanImpl( - windowSize time.Duration, - maxBufferSize int, -) *MovingWindowAvgChanImpl { - ret := &MovingWindowAvgChanImpl{ - windowSize: windowSize, - buffer: make(chan timestampedData, maxBufferSize), - forceExpireCh: make(chan interface{}), - } - go ret.expireLoop() // TODO: need a mechanism to cleanup this goroutine - return ret -} - -func (a *MovingWindowAvgChanImpl) expireLoop() { - for { - select { - case toExpire := <-a.buffer: - if time.Since(toExpire.timestamp) > a.windowSize { - // element already outside of window, remove from average - a.sum.Add(-toExpire.value) - a.count.Add(-1) - } else { - // first element out of the buffer should be the oldest so wait until - //it moves out of the window before trying to remove more elements - timer := time.NewTimer(a.windowSize - time.Since(toExpire.timestamp)) - select { - case <-timer.C: - case <-a.forceExpireCh: // if the buffer is full, remove one item so new adds don't get blocked - timer.Stop() - a.sum.Add(-toExpire.value) - a.count.Add(-1) - } - } - } - } -} - -func (a *MovingWindowAvgChanImpl) Record(val int64) { - if len(a.buffer) == cap(a.buffer) { - // blocks until there is room in the buffer to add more data - a.forceExpireCh <- struct{}{} - } - - a.sum.Add(val) - a.count.Add(1) - a.buffer <- timestampedData{value: val, timestamp: time.Now()} -} - -func (a *MovingWindowAvgChanImpl) Average() float64 { - if a.count.Load() == 0 { - return 0 - } - return float64(a.sum.Load() / a.count.Load()) -} diff --git a/common/aggregate/moving_window_avg_ring.go b/common/aggregate/moving_window_avg_ring.go deleted file mode 100644 index 11d0f28873a..00000000000 --- a/common/aggregate/moving_window_avg_ring.go +++ /dev/null @@ -1,98 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package aggregate - -import ( - "container/ring" - "sync" - "time" -) - -type ( - MovingWindowAvgRingImpl struct { - sync.RWMutex - windowSize time.Duration - maxBufferSize int - head *ring.Ring - tail *ring.Ring - sum int64 - count int - } -) - -func NewMovingWindowAvgRingImpl( - windowSize time.Duration, - maxBufferSize int, -) *MovingWindowAvgRingImpl { - buffer := ring.New(maxBufferSize) - return &MovingWindowAvgRingImpl{ - windowSize: windowSize, - maxBufferSize: maxBufferSize, - head: buffer, - tail: buffer, - } -} - -func (a *MovingWindowAvgRingImpl) Record(val int64) { - a.Lock() - defer a.Unlock() - - a.expireOldValuesLocked() - if a.count == a.maxBufferSize { - a.expireOneLocked() - } - - a.tail.Value = timestampedData{value: val, timestamp: time.Now()} - a.tail = a.tail.Next() - - a.sum += val - a.count++ -} - -func (a *MovingWindowAvgRingImpl) Average() float64 { - a.RLock() - defer a.RUnlock() - if a.count == 0 { - return 0 - } - return float64(a.sum / int64(a.count)) -} - -func (a *MovingWindowAvgRingImpl) expireOldValuesLocked() { - for ; a.head != a.tail; a.head = a.head.Next() { - if data, ok := a.head.Value.(timestampedData); ok && time.Since(data.timestamp) > a.windowSize { - a.sum -= data.value - a.count-- - } - } -} - -func (a *MovingWindowAvgRingImpl) expireOneLocked() { - if data, ok := a.head.Value.(timestampedData); ok { - a.sum -= data.value - a.count-- - } - a.head = a.head.Next() -} From 68766b7b60f416b5f5189485f9a7db69498e3de0 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 18 May 2023 13:18:23 -0700 Subject: [PATCH 03/36] add signal aggregator --- .../persistence_health_signal_aggregator.go | 141 ++++++++++++++++++ common/aggregate/signal_aggregator.go | 35 +++++ 2 files changed, 176 insertions(+) create mode 100644 common/aggregate/persistence_health_signal_aggregator.go create mode 100644 common/aggregate/signal_aggregator.go diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go new file mode 100644 index 00000000000..ef9a32f78b5 --- /dev/null +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -0,0 +1,141 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package aggregate + +import ( + "sync" + "time" + + "go.temporal.io/server/common/quotas" +) + +type ( + PersistenceHealthSignalAggregator[K comparable] struct { + SignalAggregator[quotas.Request] + keyMapper SignalKeyMapperFn[quotas.Request, K] + + latencyAverages map[K]MovingWindowAverage + latencyLock sync.RWMutex + + errorRatios map[K]MovingWindowAverage + errorLock sync.RWMutex + + windowSize time.Duration + maxBufferSize int + } + + perShardPerNsHealthSignalKey struct { + namespace string + shardID int32 + } +) + +func NewPersistenceHealthSignalAggregator[K comparable]( + keyMapper SignalKeyMapperFn[quotas.Request, K], + windowSize time.Duration, + maxBufferSize int, +) *PersistenceHealthSignalAggregator[K] { + return &PersistenceHealthSignalAggregator[K]{ + keyMapper: keyMapper, + latencyAverages: make(map[K]MovingWindowAverage), + errorRatios: make(map[K]MovingWindowAverage), + windowSize: windowSize, + maxBufferSize: maxBufferSize, + } +} + +func NewPerShardPerNsHealthSignalAggregator( + windowSize time.Duration, + maxBufferSize int, +) *PersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey] { + return NewPersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey]( + perShardPerNsKeyMapperFn, + windowSize, + maxBufferSize, + ) +} + +func perShardPerNsKeyMapperFn(req quotas.Request) perShardPerNsHealthSignalKey { + return perShardPerNsHealthSignalKey{ + namespace: req.Caller, + shardID: req.CallerSegment, + } +} + +func (s *PersistenceHealthSignalAggregator[_]) Record(req quotas.Request, latency int64, errored bool) { + latencyAvg := s.getOrInitLatencyAverage(req) + latencyAvg.Record(latency) + + errorRatio := s.getOrInitErrorRatio(req) + if errored { + errorRatio.Record(1) + } else { + errorRatio.Record(0) + } +} + +func (s *PersistenceHealthSignalAggregator[_]) AverageLatency(req quotas.Request) float64 { + return s.getOrInitLatencyAverage(req).Average() +} + +func (s *PersistenceHealthSignalAggregator[_]) ErrorRatio(req quotas.Request) float64 { + return s.getOrInitErrorRatio(req).Average() +} + +func (s *PersistenceHealthSignalAggregator[_]) getOrInitLatencyAverage(req quotas.Request) MovingWindowAverage { + return s.getOrInitAverage(req, &s.latencyAverages, &s.latencyLock) +} + +func (s *PersistenceHealthSignalAggregator[_]) getOrInitErrorRatio(req quotas.Request) MovingWindowAverage { + return s.getOrInitAverage(req, &s.errorRatios, &s.errorLock) +} + +func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( + req quotas.Request, + averages *map[K]MovingWindowAverage, + lock *sync.RWMutex, +) MovingWindowAverage { + key := s.keyMapper(req) + + lock.RLock() + avg, ok := (*averages)[key] + lock.RUnlock() + if ok { + return avg + } + + newAvg := NewMovingWindowAvgImpl(s.windowSize, s.maxBufferSize) + + lock.Lock() + defer lock.Unlock() + + avg, ok = (*averages)[key] + if ok { + return avg + } + + (*averages)[key] = newAvg + return newAvg +} diff --git a/common/aggregate/signal_aggregator.go b/common/aggregate/signal_aggregator.go new file mode 100644 index 00000000000..df650006fa3 --- /dev/null +++ b/common/aggregate/signal_aggregator.go @@ -0,0 +1,35 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package aggregate + +type ( + SignalKeyMapperFn[T any, K comparable] func(signalOrigin T) K + + SignalAggregator[T any] interface { + Record(key T, latency int64, errored bool) + AverageLatency(key T) float64 + ErrorRatio(key T) float64 + } +) From bfc700283a23d2c21df5d175d02f1672db189ccb Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 18 May 2023 14:51:37 -0700 Subject: [PATCH 04/36] adjust record fn --- .../persistence_health_signal_aggregator.go | 19 +++++++------- common/aggregate/signal_aggregator.go | 2 +- .../persistenceHealthSignalClients.go | 25 +++++++++++++++++++ 3 files changed, 36 insertions(+), 10 deletions(-) create mode 100644 common/persistence/persistenceHealthSignalClients.go diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index ef9a32f78b5..80b1375c75a 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -84,15 +84,16 @@ func perShardPerNsKeyMapperFn(req quotas.Request) perShardPerNsHealthSignalKey { } } -func (s *PersistenceHealthSignalAggregator[_]) Record(req quotas.Request, latency int64, errored bool) { - latencyAvg := s.getOrInitLatencyAverage(req) - latencyAvg.Record(latency) - - errorRatio := s.getOrInitErrorRatio(req) - if errored { - errorRatio.Record(1) - } else { - errorRatio.Record(0) +func (s *PersistenceHealthSignalAggregator[_]) GetRecordFn(req quotas.Request) func(err error) { + start := time.Now() + return func(err error) { + s.getOrInitLatencyAverage(req).Record(time.Since(start).Milliseconds()) + errorRatio := s.getOrInitErrorRatio(req) + if err != nil { + errorRatio.Record(1) + } else { + errorRatio.Record(0) + } } } diff --git a/common/aggregate/signal_aggregator.go b/common/aggregate/signal_aggregator.go index df650006fa3..ede2166e5fa 100644 --- a/common/aggregate/signal_aggregator.go +++ b/common/aggregate/signal_aggregator.go @@ -28,7 +28,7 @@ type ( SignalKeyMapperFn[T any, K comparable] func(signalOrigin T) K SignalAggregator[T any] interface { - Record(key T, latency int64, errored bool) + GetRecordFn(key T) func(err error) AverageLatency(key T) float64 ErrorRatio(key T) float64 } diff --git a/common/persistence/persistenceHealthSignalClients.go b/common/persistence/persistenceHealthSignalClients.go new file mode 100644 index 00000000000..885522f0f04 --- /dev/null +++ b/common/persistence/persistenceHealthSignalClients.go @@ -0,0 +1,25 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package persistence From 82abfa4e844684c335c0507947819cb7bcd46992 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Fri, 19 May 2023 09:39:11 -0700 Subject: [PATCH 05/36] add health signal clients --- .../persistenceHealthSignalClients.go | 889 ++++++++++++++++++ 1 file changed, 889 insertions(+) diff --git a/common/persistence/persistenceHealthSignalClients.go b/common/persistence/persistenceHealthSignalClients.go index 885522f0f04..938d6e6e33d 100644 --- a/common/persistence/persistenceHealthSignalClients.go +++ b/common/persistence/persistenceHealthSignalClients.go @@ -23,3 +23,892 @@ // THE SOFTWARE. package persistence + +import ( + "context" + + commonpb "go.temporal.io/api/common/v1" + "go.temporal.io/server/common/aggregate" + "go.temporal.io/server/common/headers" + "go.temporal.io/server/common/log" + "go.temporal.io/server/common/quotas" +) + +type ( + shardHealthSignalPersistenceClient struct { + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence ShardManager + logger log.Logger + } + + executionHealthSignalPersistenceClient struct { + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence ExecutionManager + logger log.Logger + } + + taskHealthSignalPersistenceClient struct { + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence TaskManager + logger log.Logger + } + + metadataHealthSignalPersistenceClient struct { + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence MetadataManager + logger log.Logger + } + + clusterMetadataHealthSignalPersistenceClient struct { + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence ClusterMetadataManager + logger log.Logger + } + + queueHealthSignalPersistenceClient struct { + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence Queue + logger log.Logger + } +) + +var _ ShardManager = (*shardHealthSignalPersistenceClient)(nil) +var _ ExecutionManager = (*executionHealthSignalPersistenceClient)(nil) +var _ TaskManager = (*taskHealthSignalPersistenceClient)(nil) +var _ MetadataManager = (*metadataHealthSignalPersistenceClient)(nil) +var _ ClusterMetadataManager = (*clusterMetadataHealthSignalPersistenceClient)(nil) +var _ Queue = (*queueHealthSignalPersistenceClient)(nil) + +func NewShardPersistenceHealthSignalClient(persistence ShardManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ShardManager { + return &shardHealthSignalPersistenceClient{ + persistence: persistence, + healthSignals: healthSignals, + logger: logger, + } +} + +func NewExecutionPersistenceHealthSignalClient(persistence ExecutionManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ExecutionManager { + return &executionHealthSignalPersistenceClient{ + persistence: persistence, + healthSignals: healthSignals, + logger: logger, + } +} + +func NewTaskPersistenceHealthSignalClient(persistence TaskManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) TaskManager { + return &taskHealthSignalPersistenceClient{ + persistence: persistence, + healthSignals: healthSignals, + logger: logger, + } +} + +func NewMetadataPersistenceHealthSignalClient(persistence MetadataManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) MetadataManager { + return &metadataHealthSignalPersistenceClient{ + persistence: persistence, + healthSignals: healthSignals, + logger: logger, + } +} + +func NewClusterMetadataPersistenceHealthSignalClient(persistence ClusterMetadataManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ClusterMetadataManager { + return &clusterMetadataHealthSignalPersistenceClient{ + persistence: persistence, + healthSignals: healthSignals, + logger: logger, + } +} + +func NewQueuePersistenceHealthSignalClient(persistence Queue, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) Queue { + return &queueHealthSignalPersistenceClient{ + persistence: persistence, + healthSignals: healthSignals, + logger: logger, + } +} + +func (p *shardHealthSignalPersistenceClient) GetName() string { + return p.persistence.GetName() +} + +func (p *shardHealthSignalPersistenceClient) GetOrCreateShard( + ctx context.Context, + request *GetOrCreateShardRequest, +) (*GetOrCreateShardResponse, error) { + record := recordFn(ctx, "GetOrCreateShard", request.ShardID, p.healthSignals) + response, err := p.persistence.GetOrCreateShard(ctx, request) + record(err) + return response, err +} + +func (p *shardHealthSignalPersistenceClient) UpdateShard( + ctx context.Context, + request *UpdateShardRequest, +) error { + record := recordFn(ctx, "UpdateShard", request.ShardInfo.GetShardId(), p.healthSignals) + err := p.persistence.UpdateShard(ctx, request) + record(err) + return err +} + +func (p *shardHealthSignalPersistenceClient) AssertShardOwnership( + ctx context.Context, + request *AssertShardOwnershipRequest, +) error { + record := recordFn(ctx, "AssertShardOwnership", request.ShardID, p.healthSignals) + err := p.persistence.AssertShardOwnership(ctx, request) + record(err) + return err +} + +func (p *shardHealthSignalPersistenceClient) Close() { + p.persistence.Close() +} + +func (p *executionHealthSignalPersistenceClient) GetName() string { + return p.persistence.GetName() +} + +func (p *executionHealthSignalPersistenceClient) GetHistoryBranchUtil() HistoryBranchUtil { + return p.persistence.GetHistoryBranchUtil() +} + +func (p *executionHealthSignalPersistenceClient) CreateWorkflowExecution( + ctx context.Context, + request *CreateWorkflowExecutionRequest, +) (*CreateWorkflowExecutionResponse, error) { + record := recordFn(ctx, "CreateWorkflowExecution", request.ShardID, p.healthSignals) + response, err := p.persistence.CreateWorkflowExecution(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) GetWorkflowExecution( + ctx context.Context, + request *GetWorkflowExecutionRequest, +) (*GetWorkflowExecutionResponse, error) { + record := recordFn(ctx, "GetWorkflowExecution", request.ShardID, p.healthSignals) + response, err := p.persistence.GetWorkflowExecution(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) SetWorkflowExecution( + ctx context.Context, + request *SetWorkflowExecutionRequest, +) (*SetWorkflowExecutionResponse, error) { + record := recordFn(ctx, "SetWorkflowExecution", request.ShardID, p.healthSignals) + response, err := p.persistence.SetWorkflowExecution(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) UpdateWorkflowExecution( + ctx context.Context, + request *UpdateWorkflowExecutionRequest, +) (*UpdateWorkflowExecutionResponse, error) { + record := recordFn(ctx, "UpdateWorkflowExecuton", request.ShardID, p.healthSignals) + response, err := p.persistence.UpdateWorkflowExecution(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) ConflictResolveWorkflowExecution( + ctx context.Context, + request *ConflictResolveWorkflowExecutionRequest, +) (*ConflictResolveWorkflowExecutionResponse, error) { + record := recordFn(ctx, "ConflictResolveWorkflowExecution", request.ShardID, p.healthSignals) + response, err := p.persistence.ConflictResolveWorkflowExecution(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) DeleteWorkflowExecution( + ctx context.Context, + request *DeleteWorkflowExecutionRequest, +) error { + record := recordFn(ctx, "DeleteWorkflowExecution", request.ShardID, p.healthSignals) + err := p.persistence.DeleteWorkflowExecution(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) DeleteCurrentWorkflowExecution( + ctx context.Context, + request *DeleteCurrentWorkflowExecutionRequest, +) error { + record := recordFn(ctx, "DeleteCurrentWorkflowExecution", request.ShardID, p.healthSignals) + err := p.persistence.DeleteCurrentWorkflowExecution(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) GetCurrentExecution( + ctx context.Context, + request *GetCurrentExecutionRequest, +) (*GetCurrentExecutionResponse, error) { + record := recordFn(ctx, "GetCurrentExecution", request.ShardID, p.healthSignals) + response, err := p.persistence.GetCurrentExecution(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) ListConcreteExecutions( + ctx context.Context, + request *ListConcreteExecutionsRequest, +) (*ListConcreteExecutionsResponse, error) { + record := recordFn(ctx, "ListConcreteExecutions", request.ShardID, p.healthSignals) + response, err := p.persistence.ListConcreteExecutions(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) RegisterHistoryTaskReader( + ctx context.Context, + request *RegisterHistoryTaskReaderRequest, +) error { + // hint methods don't actually hint DB, so don't go through persistence rate limiter + return p.persistence.RegisterHistoryTaskReader(ctx, request) +} + +func (p *executionHealthSignalPersistenceClient) UnregisterHistoryTaskReader( + ctx context.Context, + request *UnregisterHistoryTaskReaderRequest, +) { + // hint methods don't actually hint DB, so don't go through persistence rate limiter + p.persistence.UnregisterHistoryTaskReader(ctx, request) +} + +func (p *executionHealthSignalPersistenceClient) UpdateHistoryTaskReaderProgress( + ctx context.Context, + request *UpdateHistoryTaskReaderProgressRequest, +) { + // hint methods don't actually hint DB, so don't go through persistence rate limiter + p.persistence.UpdateHistoryTaskReaderProgress(ctx, request) +} + +func (p *executionHealthSignalPersistenceClient) AddHistoryTasks( + ctx context.Context, + request *AddHistoryTasksRequest, +) error { + record := recordFn(ctx, "AddHistoryTasks", request.ShardID, p.healthSignals) + err := p.persistence.AddHistoryTasks(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) GetHistoryTasks( + ctx context.Context, + request *GetHistoryTasksRequest, +) (*GetHistoryTasksResponse, error) { + record := recordFn(ctx, "GetHistoryTasks", request.ShardID, p.healthSignals) + response, err := p.persistence.GetHistoryTasks(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) CompleteHistoryTask( + ctx context.Context, + request *CompleteHistoryTaskRequest, +) error { + record := recordFn(ctx, "CompleteHistoryTask", request.ShardID, p.healthSignals) + err := p.persistence.CompleteHistoryTask(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) RangeCompleteHistoryTasks( + ctx context.Context, + request *RangeCompleteHistoryTasksRequest, +) error { + record := recordFn(ctx, "RangeCompleteHistoryTasks", request.ShardID, p.healthSignals) + err := p.persistence.RangeCompleteHistoryTasks(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) PutReplicationTaskToDLQ( + ctx context.Context, + request *PutReplicationTaskToDLQRequest, +) error { + record := recordFn(ctx, "PutReplicationTaskToDLQ", request.ShardID, p.healthSignals) + err := p.persistence.PutReplicationTaskToDLQ(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) GetReplicationTasksFromDLQ( + ctx context.Context, + request *GetReplicationTasksFromDLQRequest, +) (*GetHistoryTasksResponse, error) { + record := recordFn(ctx, "GetReplicationTasksFromDLQ", request.ShardID, p.healthSignals) + response, err := p.persistence.GetReplicationTasksFromDLQ(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) DeleteReplicationTaskFromDLQ( + ctx context.Context, + request *DeleteReplicationTaskFromDLQRequest, +) error { + record := recordFn(ctx, "DeleteReplicationTaskFromDLQ", request.ShardID, p.healthSignals) + err := p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) RangeDeleteReplicationTaskFromDLQ( + ctx context.Context, + request *RangeDeleteReplicationTaskFromDLQRequest, +) error { + record := recordFn(ctx, "RangeDeleteReplicationTaskFromDLQ", request.ShardID, p.healthSignals) + err := p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) IsReplicationDLQEmpty( + ctx context.Context, + request *GetReplicationTasksFromDLQRequest, +) (bool, error) { + record := recordFn(ctx, "IsReplicationDLQEmpty", request.ShardID, p.healthSignals) + response, err := p.persistence.IsReplicationDLQEmpty(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) AppendHistoryNodes( + ctx context.Context, + request *AppendHistoryNodesRequest, +) (*AppendHistoryNodesResponse, error) { + record := recordFn(ctx, "AppendHistoryNodes", request.ShardID, p.healthSignals) + response, err := p.persistence.AppendHistoryNodes(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) AppendRawHistoryNodes( + ctx context.Context, + request *AppendRawHistoryNodesRequest, +) (*AppendHistoryNodesResponse, error) { + record := recordFn(ctx, "AppendRawHistoryNodes", request.ShardID, p.healthSignals) + response, err := p.persistence.AppendRawHistoryNodes(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) ReadHistoryBranch( + ctx context.Context, + request *ReadHistoryBranchRequest, +) (*ReadHistoryBranchResponse, error) { + record := recordFn(ctx, "ReadHistoryBranch", request.ShardID, p.healthSignals) + response, err := p.persistence.ReadHistoryBranch(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) ReadHistoryBranchReverse( + ctx context.Context, + request *ReadHistoryBranchReverseRequest, +) (*ReadHistoryBranchReverseResponse, error) { + record := recordFn(ctx, "ReadHistoryBranchReverse", request.ShardID, p.healthSignals) + response, err := p.persistence.ReadHistoryBranchReverse(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) ReadHistoryBranchByBatch( + ctx context.Context, + request *ReadHistoryBranchRequest, +) (*ReadHistoryBranchByBatchResponse, error) { + record := recordFn(ctx, "ReadHistoryBranchByBatch", request.ShardID, p.healthSignals) + response, err := p.persistence.ReadHistoryBranchByBatch(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) ReadRawHistoryBranch( + ctx context.Context, + request *ReadHistoryBranchRequest, +) (*ReadRawHistoryBranchResponse, error) { + record := recordFn(ctx, "ReadRawHistoryBranch", request.ShardID, p.healthSignals) + response, err := p.persistence.ReadRawHistoryBranch(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) ForkHistoryBranch( + ctx context.Context, + request *ForkHistoryBranchRequest, +) (*ForkHistoryBranchResponse, error) { + record := recordFn(ctx, "ForkHistoryBranch", request.ShardID, p.healthSignals) + response, err := p.persistence.ForkHistoryBranch(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) DeleteHistoryBranch( + ctx context.Context, + request *DeleteHistoryBranchRequest, +) error { + record := recordFn(ctx, "DeleteHistoryBranch", request.ShardID, p.healthSignals) + err := p.persistence.DeleteHistoryBranch(ctx, request) + record(err) + return err +} + +func (p *executionHealthSignalPersistenceClient) TrimHistoryBranch( + ctx context.Context, + request *TrimHistoryBranchRequest, +) (*TrimHistoryBranchResponse, error) { + record := recordFn(ctx, "TrimHistoryBranch", request.ShardID, p.healthSignals) + resp, err := p.persistence.TrimHistoryBranch(ctx, request) + record(err) + return resp, err +} + +func (p *executionHealthSignalPersistenceClient) GetHistoryTree( + ctx context.Context, + request *GetHistoryTreeRequest, +) (*GetHistoryTreeResponse, error) { + record := recordFn(ctx, "GetHistoryTree", request.ShardID, p.healthSignals) + response, err := p.persistence.GetHistoryTree(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) GetAllHistoryTreeBranches( + ctx context.Context, + request *GetAllHistoryTreeBranchesRequest, +) (*GetAllHistoryTreeBranchesResponse, error) { + record := recordFn(ctx, "GetAllHistoryTreeBranches", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetAllHistoryTreeBranches(ctx, request) + record(err) + return response, err +} + +func (p *executionHealthSignalPersistenceClient) Close() { + p.persistence.Close() +} + +func (p *taskHealthSignalPersistenceClient) GetName() string { + return p.persistence.GetName() +} + +func (p *taskHealthSignalPersistenceClient) CreateTasks( + ctx context.Context, + request *CreateTasksRequest, +) (*CreateTasksResponse, error) { + record := recordFn(ctx, "CreateTasks", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.CreateTasks(ctx, request) + record(err) + return response, err +} + +func (p *taskHealthSignalPersistenceClient) GetTasks( + ctx context.Context, + request *GetTasksRequest, +) (*GetTasksResponse, error) { + record := recordFn(ctx, "GetTasks", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetTasks(ctx, request) + record(err) + return response, err +} + +func (p *taskHealthSignalPersistenceClient) CompleteTask( + ctx context.Context, + request *CompleteTaskRequest, +) error { + record := recordFn(ctx, "CompleteTask", CallerSegmentMissing, p.healthSignals) + err := p.persistence.CompleteTask(ctx, request) + record(err) + return err +} + +func (p *taskHealthSignalPersistenceClient) CompleteTasksLessThan( + ctx context.Context, + request *CompleteTasksLessThanRequest, +) (int, error) { + record := recordFn(ctx, "CompleteTasksLessThan", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.CompleteTasksLessThan(ctx, request) + record(err) + return response, err +} + +func (p *taskHealthSignalPersistenceClient) CreateTaskQueue( + ctx context.Context, + request *CreateTaskQueueRequest, +) (*CreateTaskQueueResponse, error) { + record := recordFn(ctx, "CreateTaskQueue", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.CreateTaskQueue(ctx, request) + record(err) + return response, err +} + +func (p *taskHealthSignalPersistenceClient) UpdateTaskQueue( + ctx context.Context, + request *UpdateTaskQueueRequest, +) (*UpdateTaskQueueResponse, error) { + record := recordFn(ctx, "UpdateTaskQueue", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.UpdateTaskQueue(ctx, request) + record(err) + return response, err +} + +func (p *taskHealthSignalPersistenceClient) GetTaskQueue( + ctx context.Context, + request *GetTaskQueueRequest, +) (*GetTaskQueueResponse, error) { + record := recordFn(ctx, "GetTaskQueue", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetTaskQueue(ctx, request) + record(err) + return response, err +} + +func (p *taskHealthSignalPersistenceClient) ListTaskQueue( + ctx context.Context, + request *ListTaskQueueRequest, +) (*ListTaskQueueResponse, error) { + record := recordFn(ctx, "ListTaskQueue", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.ListTaskQueue(ctx, request) + record(err) + return response, err +} + +func (p *taskHealthSignalPersistenceClient) DeleteTaskQueue( + ctx context.Context, + request *DeleteTaskQueueRequest, +) error { + record := recordFn(ctx, "DeleteTaskQueue", CallerSegmentMissing, p.healthSignals) + err := p.persistence.DeleteTaskQueue(ctx, request) + record(err) + return err +} + +func (p *taskHealthSignalPersistenceClient) Close() { + p.persistence.Close() +} + +func (p *metadataHealthSignalPersistenceClient) GetName() string { + return p.persistence.GetName() +} + +func (p *metadataHealthSignalPersistenceClient) CreateNamespace( + ctx context.Context, + request *CreateNamespaceRequest, +) (*CreateNamespaceResponse, error) { + record := recordFn(ctx, "CreateNamespace", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.CreateNamespace(ctx, request) + record(err) + return response, err +} + +func (p *metadataHealthSignalPersistenceClient) GetNamespace( + ctx context.Context, + request *GetNamespaceRequest, +) (*GetNamespaceResponse, error) { + record := recordFn(ctx, "GetNamespace", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetNamespace(ctx, request) + record(err) + return response, err +} + +func (p *metadataHealthSignalPersistenceClient) UpdateNamespace( + ctx context.Context, + request *UpdateNamespaceRequest, +) error { + record := recordFn(ctx, "UpdateNamespace", CallerSegmentMissing, p.healthSignals) + err := p.persistence.UpdateNamespace(ctx, request) + record(err) + return err +} + +func (p *metadataHealthSignalPersistenceClient) RenameNamespace( + ctx context.Context, + request *RenameNamespaceRequest, +) error { + record := recordFn(ctx, "RenameNamespace", CallerSegmentMissing, p.healthSignals) + err := p.persistence.RenameNamespace(ctx, request) + record(err) + return err +} + +func (p *metadataHealthSignalPersistenceClient) DeleteNamespace( + ctx context.Context, + request *DeleteNamespaceRequest, +) error { + record := recordFn(ctx, "DeleteNamespace", CallerSegmentMissing, p.healthSignals) + err := p.persistence.DeleteNamespace(ctx, request) + record(err) + return err +} + +func (p *metadataHealthSignalPersistenceClient) DeleteNamespaceByName( + ctx context.Context, + request *DeleteNamespaceByNameRequest, +) error { + record := recordFn(ctx, "DeleteNamespaceByName", CallerSegmentMissing, p.healthSignals) + err := p.persistence.DeleteNamespaceByName(ctx, request) + record(err) + return err +} + +func (p *metadataHealthSignalPersistenceClient) ListNamespaces( + ctx context.Context, + request *ListNamespacesRequest, +) (*ListNamespacesResponse, error) { + record := recordFn(ctx, "ListNamespaces", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.ListNamespaces(ctx, request) + record(err) + return response, err +} + +func (p *metadataHealthSignalPersistenceClient) GetMetadata( + ctx context.Context, +) (*GetMetadataResponse, error) { + record := recordFn(ctx, "GetMetadata", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetMetadata(ctx) + record(err) + return response, err +} + +func (p *metadataHealthSignalPersistenceClient) InitializeSystemNamespaces( + ctx context.Context, + currentClusterName string, +) error { + record := recordFn(ctx, "InitializeSystemNamespaces", CallerSegmentMissing, p.healthSignals) + err := p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) + record(err) + return err +} + +func (p *metadataHealthSignalPersistenceClient) Close() { + p.persistence.Close() +} + +func (p *clusterMetadataHealthSignalPersistenceClient) GetName() string { + return p.persistence.GetName() +} + +func (p *clusterMetadataHealthSignalPersistenceClient) GetClusterMembers( + ctx context.Context, + request *GetClusterMembersRequest, +) (*GetClusterMembersResponse, error) { + record := recordFn(ctx, "GetClusterMembers", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetClusterMembers(ctx, request) + record(err) + return response, err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) UpsertClusterMembership( + ctx context.Context, + request *UpsertClusterMembershipRequest, +) error { + record := recordFn(ctx, "UpsertClusterMembership", CallerSegmentMissing, p.healthSignals) + err := p.persistence.UpsertClusterMembership(ctx, request) + record(err) + return err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) PruneClusterMembership( + ctx context.Context, + request *PruneClusterMembershipRequest, +) error { + record := recordFn(ctx, "PruneClusterMembership", CallerSegmentMissing, p.healthSignals) + err := p.persistence.PruneClusterMembership(ctx, request) + record(err) + return err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) ListClusterMetadata( + ctx context.Context, + request *ListClusterMetadataRequest, +) (*ListClusterMetadataResponse, error) { + record := recordFn(ctx, "ListClusterMetadata", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.ListClusterMetadata(ctx, request) + record(err) + return response, err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) GetCurrentClusterMetadata( + ctx context.Context, +) (*GetClusterMetadataResponse, error) { + record := recordFn(ctx, "GetCurrentClusterMetadata", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetCurrentClusterMetadata(ctx) + record(err) + return response, err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) GetClusterMetadata( + ctx context.Context, + request *GetClusterMetadataRequest, +) (*GetClusterMetadataResponse, error) { + record := recordFn(ctx, "GetClusterMetadata", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetClusterMetadata(ctx, request) + record(err) + return response, err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) SaveClusterMetadata( + ctx context.Context, + request *SaveClusterMetadataRequest, +) (bool, error) { + record := recordFn(ctx, "SaveClusterMetadata", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.SaveClusterMetadata(ctx, request) + record(err) + return response, err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) DeleteClusterMetadata( + ctx context.Context, + request *DeleteClusterMetadataRequest, +) error { + record := recordFn(ctx, "DeleteClusterMetadata", CallerSegmentMissing, p.healthSignals) + err := p.persistence.DeleteClusterMetadata(ctx, request) + record(err) + return err +} + +func (p *clusterMetadataHealthSignalPersistenceClient) Close() { + p.persistence.Close() +} + +func (p *queueHealthSignalPersistenceClient) Init( + ctx context.Context, + blob *commonpb.DataBlob, +) error { + return p.persistence.Init(ctx, blob) +} + +func (p *queueHealthSignalPersistenceClient) EnqueueMessage( + ctx context.Context, + blob commonpb.DataBlob, +) error { + record := recordFn(ctx, "EnqueueMessage", CallerSegmentMissing, p.healthSignals) + err := p.persistence.EnqueueMessage(ctx, blob) + record(err) + return err +} + +func (p *queueHealthSignalPersistenceClient) ReadMessages( + ctx context.Context, + lastMessageID int64, + maxCount int, +) ([]*QueueMessage, error) { + record := recordFn(ctx, "ReadMessages", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.ReadMessages(ctx, lastMessageID, maxCount) + record(err) + return response, err +} + +func (p *queueHealthSignalPersistenceClient) UpdateAckLevel( + ctx context.Context, + metadata *InternalQueueMetadata, +) error { + record := recordFn(ctx, "UpdateAckLevel", CallerSegmentMissing, p.healthSignals) + err := p.persistence.UpdateAckLevel(ctx, metadata) + record(err) + return err +} + +func (p *queueHealthSignalPersistenceClient) GetAckLevels( + ctx context.Context, +) (*InternalQueueMetadata, error) { + record := recordFn(ctx, "GetAckLevels", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetAckLevels(ctx) + record(err) + return response, err +} + +func (p *queueHealthSignalPersistenceClient) DeleteMessagesBefore( + ctx context.Context, + messageID int64, +) error { + record := recordFn(ctx, "DeleteMessagesBefore", CallerSegmentMissing, p.healthSignals) + err := p.persistence.DeleteMessagesBefore(ctx, messageID) + record(err) + return err +} + +func (p *queueHealthSignalPersistenceClient) EnqueueMessageToDLQ( + ctx context.Context, + blob commonpb.DataBlob, +) (int64, error) { + record := recordFn(ctx, "EnqueueMessageToDLQ", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.EnqueueMessageToDLQ(ctx, blob) + record(err) + return response, err +} + +func (p *queueHealthSignalPersistenceClient) ReadMessagesFromDLQ( + ctx context.Context, + firstMessageID int64, + lastMessageID int64, + pageSize int, + pageToken []byte, +) ([]*QueueMessage, []byte, error) { + record := recordFn(ctx, "ReadMessagesFromDLQ", CallerSegmentMissing, p.healthSignals) + response, data, err := p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) + record(err) + return response, data, err +} + +func (p *queueHealthSignalPersistenceClient) RangeDeleteMessagesFromDLQ( + ctx context.Context, + firstMessageID int64, + lastMessageID int64, +) error { + record := recordFn(ctx, "RangeDeleteMessagesFromDLQ", CallerSegmentMissing, p.healthSignals) + err := p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) + record(err) + return err +} +func (p *queueHealthSignalPersistenceClient) UpdateDLQAckLevel( + ctx context.Context, + metadata *InternalQueueMetadata, +) error { + record := recordFn(ctx, "UpdateDLQAckLevel", CallerSegmentMissing, p.healthSignals) + err := p.persistence.UpdateDLQAckLevel(ctx, metadata) + record(err) + return err +} + +func (p *queueHealthSignalPersistenceClient) GetDLQAckLevels( + ctx context.Context, +) (*InternalQueueMetadata, error) { + record := recordFn(ctx, "GetDLQAckLevels", CallerSegmentMissing, p.healthSignals) + response, err := p.persistence.GetDLQAckLevels(ctx) + record(err) + return response, err +} + +func (p *queueHealthSignalPersistenceClient) DeleteMessageFromDLQ( + ctx context.Context, + messageID int64, +) error { + record := recordFn(ctx, "DeleteMessageFromDLQ", CallerSegmentMissing, p.healthSignals) + err := p.persistence.DeleteMessageFromDLQ(ctx, messageID) + record(err) + return err +} + +func (p *queueHealthSignalPersistenceClient) Close() { + p.persistence.Close() +} + +func recordFn( + ctx context.Context, + api string, + shardID int32, + healthSignals aggregate.SignalAggregator[quotas.Request], +) func(err error) { + callerInfo := headers.GetCallerInfo(ctx) + return healthSignals.GetRecordFn(quotas.NewRequest( + api, + RateLimitDefaultToken, + callerInfo.CallerName, + callerInfo.CallerType, + shardID, + callerInfo.CallOrigin, + )) +} From f38f1423dd7499343c37576dca16374499297a9c Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Fri, 19 May 2023 10:28:07 -0700 Subject: [PATCH 06/36] inject signal aggregator --- .../persistence_health_signal_aggregator.go | 9 ++++---- common/dynamicconfig/constants.go | 4 ++++ common/persistence/client/factory.go | 22 +++++++++++++++++++ common/persistence/client/fx.go | 3 +++ common/resource/fx.go | 11 ++++++++++ 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index 80b1375c75a..939f58e4b1d 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -28,6 +28,7 @@ import ( "sync" "time" + "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/quotas" ) @@ -67,13 +68,13 @@ func NewPersistenceHealthSignalAggregator[K comparable]( } func NewPerShardPerNsHealthSignalAggregator( - windowSize time.Duration, - maxBufferSize int, + windowSize dynamicconfig.DurationPropertyFn, + maxBufferSize dynamicconfig.IntPropertyFn, ) *PersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey] { return NewPersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey]( perShardPerNsKeyMapperFn, - windowSize, - maxBufferSize, + windowSize(), + maxBufferSize(), ) } diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 99bf76ba46c..9b140ebbdef 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -106,6 +106,10 @@ const ( EnableEagerWorkflowStart = "system.enableEagerWorkflowStart" // NamespaceCacheRefreshInterval is the key for namespace cache refresh interval dynamic config NamespaceCacheRefreshInterval = "system.namespaceCacheRefreshInterval" + // PersistenceHealthSignalWindowSize is the time window size in seconds for aggregating persistence signals + PersistenceHealthSignalWindowSize = "system.persistenceHealthSignalWindowSize" + // PersistenceHealthSignalBufferSize is the maximum number of persistence signals to buffer in memory per signal key + PersistenceHealthSignalBufferSize = "system.persistenceHealthSignalBufferSize" // Whether the deadlock detector should dump goroutines DeadlockDumpGoroutines = "system.deadlock.DumpGoroutines" diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index 47caabbd474..8f17dc5a184 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -26,6 +26,7 @@ package client import ( "go.temporal.io/api/serviceerror" + "go.temporal.io/server/common/aggregate" "go.temporal.io/server/common" "go.temporal.io/server/common/config" @@ -69,6 +70,7 @@ type ( logger log.Logger clusterName string ratelimiter quotas.RequestRateLimiter + healthSignals aggregate.SignalAggregator[quotas.Request] } ) @@ -87,6 +89,7 @@ func NewFactory( clusterName string, metricsHandler metrics.Handler, logger log.Logger, + healthSignals aggregate.SignalAggregator[quotas.Request], ) Factory { return &factoryImpl{ dataStoreFactory: dataStoreFactory, @@ -96,6 +99,7 @@ func NewFactory( logger: logger, clusterName: clusterName, ratelimiter: ratelimiter, + healthSignals: healthSignals, } } @@ -113,6 +117,9 @@ func (f *factoryImpl) NewTaskManager() (p.TaskManager, error) { if f.metricsHandler != nil { result = p.NewTaskPersistenceMetricsClient(result, f.metricsHandler, f.logger) } + if f.healthSignals != nil { + result = p.NewTaskPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + } return result, nil } @@ -130,6 +137,9 @@ func (f *factoryImpl) NewShardManager() (p.ShardManager, error) { if f.metricsHandler != nil { result = p.NewShardPersistenceMetricsClient(result, f.metricsHandler, f.logger) } + if f.healthSignals != nil { + result = p.NewShardPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + } result = p.NewShardPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil } @@ -148,6 +158,9 @@ func (f *factoryImpl) NewMetadataManager() (p.MetadataManager, error) { if f.metricsHandler != nil { result = p.NewMetadataPersistenceMetricsClient(result, f.metricsHandler, f.logger) } + if f.healthSignals != nil { + result = p.NewMetadataPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + } result = p.NewMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil } @@ -166,6 +179,9 @@ func (f *factoryImpl) NewClusterMetadataManager() (p.ClusterMetadataManager, err if f.metricsHandler != nil { result = p.NewClusterMetadataPersistenceMetricsClient(result, f.metricsHandler, f.logger) } + if f.healthSignals != nil { + result = p.NewClusterMetadataPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + } result = p.NewClusterMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil } @@ -184,6 +200,9 @@ func (f *factoryImpl) NewExecutionManager() (p.ExecutionManager, error) { if f.metricsHandler != nil { result = p.NewExecutionPersistenceMetricsClient(result, f.metricsHandler, f.logger) } + if f.healthSignals != nil { + result = p.NewExecutionPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + } result = p.NewExecutionPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil } @@ -200,6 +219,9 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu if f.metricsHandler != nil { result = p.NewQueuePersistenceMetricsClient(result, f.metricsHandler, f.logger) } + if f.healthSignals != nil { + result = p.NewQueuePersistenceHealthSignalClient(result, f.healthSignals, f.logger) + } result = p.NewQueuePersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return p.NewNamespaceReplicationQueue(result, f.serializer, f.clusterName, f.metricsHandler, f.logger) } diff --git a/common/persistence/client/fx.go b/common/persistence/client/fx.go index f2876f647b4..58a77127478 100644 --- a/common/persistence/client/fx.go +++ b/common/persistence/client/fx.go @@ -25,6 +25,7 @@ package client import ( + "go.temporal.io/server/common/aggregate" "go.uber.org/fx" "go.temporal.io/server/common/cluster" @@ -57,6 +58,7 @@ type ( ServiceName primitives.ServiceName MetricsHandler metrics.Handler Logger log.Logger + HealthSignals aggregate.SignalAggregator[quotas.Request] } FactoryProviderFn func(NewFactoryParams) Factory @@ -97,5 +99,6 @@ func FactoryProvider( string(params.ClusterName), params.MetricsHandler, params.Logger, + params.HealthSignals, ) } diff --git a/common/resource/fx.go b/common/resource/fx.go index 3f793afd476..c160c29e175 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -31,6 +31,7 @@ import ( "os" "time" + "go.temporal.io/server/common/aggregate" "go.uber.org/fx" "google.golang.org/grpc" "google.golang.org/grpc/health" @@ -105,6 +106,7 @@ var Module = fx.Options( func(p namespace.Registry) common.Pingable { return p }, fx.ResultTags(`group:"deadlockDetectorRoots"`), )), + fx.Provide(PersistenceHealthSignalAggregatorProvider), fx.Provide(serialization.NewSerializer), fx.Provide(HistoryBootstrapContainerProvider), fx.Provide(VisibilityBootstrapContainerProvider), @@ -215,6 +217,15 @@ func NamespaceRegistryProvider( ) } +func PersistenceHealthSignalAggregatorProvider( + dynamicCollection *dynamicconfig.Collection, +) aggregate.SignalAggregator[quotas.Request] { + return aggregate.NewPerShardPerNsHealthSignalAggregator( + dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), + dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), + ) +} + func ClientFactoryProvider( factoryProvider client.FactoryProvider, rpcFactory common.RPCFactory, From c037475358db300c4dcf240f7d8c058c4b011ef3 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Fri, 19 May 2023 11:24:57 -0700 Subject: [PATCH 07/36] fix tests --- common/aggregate/noop_signal_aggregator.go | 47 +++++++++++++++++++ .../persistence-tests/persistenceTestBase.go | 5 +- 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 common/aggregate/noop_signal_aggregator.go diff --git a/common/aggregate/noop_signal_aggregator.go b/common/aggregate/noop_signal_aggregator.go new file mode 100644 index 00000000000..3ff947b3ae0 --- /dev/null +++ b/common/aggregate/noop_signal_aggregator.go @@ -0,0 +1,47 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package aggregate + +import "go.temporal.io/server/common/quotas" + +var NoopPersistenceHealthSignalAggregator SignalAggregator[quotas.Request] = newNoopSignalAggregator[quotas.Request]() + +type ( + noopSignalAggregator[T any] struct{} +) + +func newNoopSignalAggregator[T any]() *noopSignalAggregator[T] { return &noopSignalAggregator[T]{} } + +func (a *noopSignalAggregator[T]) GetRecordFn(T) func(error) { + return func(error) {} +} + +func (a *noopSignalAggregator[T]) AverageLatency(T) float64 { + return 0 +} + +func (*noopSignalAggregator[T]) ErrorRatio(T) float64 { + return 0 +} diff --git a/common/persistence/persistence-tests/persistenceTestBase.go b/common/persistence/persistence-tests/persistenceTestBase.go index a1b3bc4b922..7a6f9d0cabd 100644 --- a/common/persistence/persistence-tests/persistenceTestBase.go +++ b/common/persistence/persistence-tests/persistenceTestBase.go @@ -33,6 +33,8 @@ import ( "time" "github.com/stretchr/testify/suite" + "go.temporal.io/server/common/aggregate" + "go.temporal.io/server/common/quotas" persistencespb "go.temporal.io/server/api/persistence/v1" replicationspb "go.temporal.io/server/api/replication/v1" @@ -98,6 +100,7 @@ type ( TaskIDGenerator TransferTaskIDGenerator ClusterMetadata cluster.Metadata SearchAttributesManager searchattribute.Manager + PersistenceHealthSignals aggregate.SignalAggregator[quotas.Request] ReadLevel int64 ReplicationReadLevel int64 DefaultTestCluster PersistenceTestCluster @@ -202,7 +205,7 @@ func (s *TestBase) Setup(clusterMetadataConfig *cluster.Config) { s.Logger, metrics.NoopMetricsHandler, ) - factory := client.NewFactory(dataStoreFactory, &cfg, nil, serialization.NewSerializer(), clusterName, metrics.NoopMetricsHandler, s.Logger) + factory := client.NewFactory(dataStoreFactory, &cfg, nil, serialization.NewSerializer(), clusterName, metrics.NoopMetricsHandler, s.Logger, aggregate.NoopPersistenceHealthSignalAggregator) s.TaskMgr, err = factory.NewTaskManager() s.fatalOnError("NewTaskManager", err) From 859950ee2965b5ac2144f46930e1308d95890e69 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Mon, 22 May 2023 12:26:00 -0700 Subject: [PATCH 08/36] add metric emission --- .../persistence_health_signal_aggregator.go | 48 +++++++++++++++---- common/aggregate/signal_aggregator.go | 15 ++++-- common/metrics/metric_defs.go | 2 + common/metrics/tags.go | 5 ++ common/resource/fx.go | 2 + 5 files changed, 60 insertions(+), 12 deletions(-) diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index 939f58e4b1d..b21ff056cb4 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -29,12 +29,14 @@ import ( "time" "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/quotas" ) type ( - PersistenceHealthSignalAggregator[K comparable] struct { + PersistenceHealthSignalAggregator[K SignalKey] struct { SignalAggregator[quotas.Request] + keyMapper SignalKeyMapperFn[quotas.Request, K] latencyAverages map[K]MovingWindowAverage @@ -45,6 +47,9 @@ type ( windowSize time.Duration maxBufferSize int + + metricsHandler metrics.Handler + emitMetricsTimer *time.Ticker } perShardPerNsHealthSignalKey struct { @@ -53,28 +58,35 @@ type ( } ) -func NewPersistenceHealthSignalAggregator[K comparable]( +func NewPersistenceHealthSignalAggregator[K SignalKey]( keyMapper SignalKeyMapperFn[quotas.Request, K], windowSize time.Duration, maxBufferSize int, + metricsHandler metrics.Handler, ) *PersistenceHealthSignalAggregator[K] { - return &PersistenceHealthSignalAggregator[K]{ - keyMapper: keyMapper, - latencyAverages: make(map[K]MovingWindowAverage), - errorRatios: make(map[K]MovingWindowAverage), - windowSize: windowSize, - maxBufferSize: maxBufferSize, + ret := &PersistenceHealthSignalAggregator[K]{ + keyMapper: keyMapper, + latencyAverages: make(map[K]MovingWindowAverage), + errorRatios: make(map[K]MovingWindowAverage), + windowSize: windowSize, + maxBufferSize: maxBufferSize, + metricsHandler: metricsHandler, + emitMetricsTimer: time.NewTicker(windowSize), } + go ret.emitMetricsLoop() + return ret } func NewPerShardPerNsHealthSignalAggregator( windowSize dynamicconfig.DurationPropertyFn, maxBufferSize dynamicconfig.IntPropertyFn, + metricsHandler metrics.Handler, ) *PersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey] { return NewPersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey]( perShardPerNsKeyMapperFn, windowSize(), maxBufferSize(), + metricsHandler, ) } @@ -85,6 +97,12 @@ func perShardPerNsKeyMapperFn(req quotas.Request) perShardPerNsHealthSignalKey { } } +func (k perShardPerNsHealthSignalKey) GetMetricTags() []metrics.Tag { + nsTag := metrics.NamespaceTag(k.namespace) + shardTag := metrics.ShardTag(k.shardID) + return []metrics.Tag{nsTag, shardTag} +} + func (s *PersistenceHealthSignalAggregator[_]) GetRecordFn(req quotas.Request) func(err error) { start := time.Now() return func(err error) { @@ -141,3 +159,17 @@ func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( (*averages)[key] = newAvg return newAvg } + +func (s *PersistenceHealthSignalAggregator[_]) emitMetricsLoop() { + for { + select { + case <-s.emitMetricsTimer.C: + for key, avg := range s.latencyAverages { + s.metricsHandler.Gauge(metrics.PersistenceAvgLatencyPerShardPerNamespace.GetMetricName()).Record(avg.Average(), key.GetMetricTags()...) + } + for key, ratio := range s.errorRatios { + s.metricsHandler.Gauge(metrics.PersistenceErrPerShardPerNamespace.GetMetricName()).Record(ratio.Average(), key.GetMetricTags()...) + } + } + } +} diff --git a/common/aggregate/signal_aggregator.go b/common/aggregate/signal_aggregator.go index ede2166e5fa..83951cb75f5 100644 --- a/common/aggregate/signal_aggregator.go +++ b/common/aggregate/signal_aggregator.go @@ -24,12 +24,19 @@ package aggregate +import "go.temporal.io/server/common/metrics" + type ( - SignalKeyMapperFn[T any, K comparable] func(signalOrigin T) K + SignalKey interface { + comparable + GetMetricTags() []metrics.Tag + } + + SignalKeyMapperFn[T any, K SignalKey] func(origin T) K SignalAggregator[T any] interface { - GetRecordFn(key T) func(err error) - AverageLatency(key T) float64 - ErrorRatio(key T) float64 + GetRecordFn(origin T) func(err error) + AverageLatency(origin T) float64 + ErrorRatio(origin T) float64 } ) diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index 7b1715eab43..9f6025008b9 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -1663,4 +1663,6 @@ var ( VisibilityPersistenceFailures = NewCounterDef("visibility_persistence_errors") VisibilityPersistenceResourceExhausted = NewCounterDef("visibility_persistence_resource_exhausted") VisibilityPersistenceLatency = NewTimerDef("visibility_persistence_latency") + PersistenceErrPerShardPerNamespace = NewDimensionlessHistogramDef("persistence_error_ratio_per_shard_per_ns") + PersistenceAvgLatencyPerShardPerNamespace = NewDimensionlessHistogramDef("persistence_average_latency_per_shard_per_namespace") ) diff --git a/common/metrics/tags.go b/common/metrics/tags.go index 24f4d5ba64e..0b96544a6d3 100644 --- a/common/metrics/tags.go +++ b/common/metrics/tags.go @@ -44,6 +44,7 @@ const ( instance = "instance" namespace = "namespace" namespaceState = "namespace_state" + shard = "shard" targetCluster = "target_cluster" fromCluster = "from_cluster" toCluster = "to_cluster" @@ -131,6 +132,10 @@ func InstanceTag(value string) Tag { return &tagImpl{key: instance, value: value} } +func ShardTag(value int32) Tag { + return &tagImpl{key: shard, value: string(value)} +} + // TargetClusterTag returns a new target cluster tag. func TargetClusterTag(value string) Tag { if len(value) == 0 { diff --git a/common/resource/fx.go b/common/resource/fx.go index c160c29e175..f443864f4db 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -219,10 +219,12 @@ func NamespaceRegistryProvider( func PersistenceHealthSignalAggregatorProvider( dynamicCollection *dynamicconfig.Collection, + metricsHandler metrics.Handler, ) aggregate.SignalAggregator[quotas.Request] { return aggregate.NewPerShardPerNsHealthSignalAggregator( dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), + metricsHandler, ) } From e283bfb3dca7228d715cd28235e436caafb34fa6 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Mon, 22 May 2023 17:42:19 -0700 Subject: [PATCH 09/36] race condition --- common/aggregate/persistence_health_signal_aggregator.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index b21ff056cb4..bad028614ae 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -164,12 +164,17 @@ func (s *PersistenceHealthSignalAggregator[_]) emitMetricsLoop() { for { select { case <-s.emitMetricsTimer.C: + s.latencyLock.RLock() for key, avg := range s.latencyAverages { s.metricsHandler.Gauge(metrics.PersistenceAvgLatencyPerShardPerNamespace.GetMetricName()).Record(avg.Average(), key.GetMetricTags()...) } + s.latencyLock.RUnlock() + + s.errorLock.RLock() for key, ratio := range s.errorRatios { s.metricsHandler.Gauge(metrics.PersistenceErrPerShardPerNamespace.GetMetricName()).Record(ratio.Average(), key.GetMetricTags()...) } + s.errorLock.RUnlock() } } } From c43f1a1821024b394a0dc266a969abfd789f20e4 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 17:54:31 -0700 Subject: [PATCH 10/36] Revert "race condition" This reverts commit e283bfb3dca7228d715cd28235e436caafb34fa6. --- common/aggregate/persistence_health_signal_aggregator.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index bad028614ae..b21ff056cb4 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -164,17 +164,12 @@ func (s *PersistenceHealthSignalAggregator[_]) emitMetricsLoop() { for { select { case <-s.emitMetricsTimer.C: - s.latencyLock.RLock() for key, avg := range s.latencyAverages { s.metricsHandler.Gauge(metrics.PersistenceAvgLatencyPerShardPerNamespace.GetMetricName()).Record(avg.Average(), key.GetMetricTags()...) } - s.latencyLock.RUnlock() - - s.errorLock.RLock() for key, ratio := range s.errorRatios { s.metricsHandler.Gauge(metrics.PersistenceErrPerShardPerNamespace.GetMetricName()).Record(ratio.Average(), key.GetMetricTags()...) } - s.errorLock.RUnlock() } } } From dc5eb2a75c4ff988f3b623dcff80d07099161818 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 17:54:49 -0700 Subject: [PATCH 11/36] Revert "add metric emission" This reverts commit 859950ee2965b5ac2144f46930e1308d95890e69. --- .../persistence_health_signal_aggregator.go | 48 ++++--------------- common/aggregate/signal_aggregator.go | 15 ++---- common/metrics/metric_defs.go | 2 - common/metrics/tags.go | 5 -- common/resource/fx.go | 2 - 5 files changed, 12 insertions(+), 60 deletions(-) diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index b21ff056cb4..939f58e4b1d 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -29,14 +29,12 @@ import ( "time" "go.temporal.io/server/common/dynamicconfig" - "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/quotas" ) type ( - PersistenceHealthSignalAggregator[K SignalKey] struct { + PersistenceHealthSignalAggregator[K comparable] struct { SignalAggregator[quotas.Request] - keyMapper SignalKeyMapperFn[quotas.Request, K] latencyAverages map[K]MovingWindowAverage @@ -47,9 +45,6 @@ type ( windowSize time.Duration maxBufferSize int - - metricsHandler metrics.Handler - emitMetricsTimer *time.Ticker } perShardPerNsHealthSignalKey struct { @@ -58,35 +53,28 @@ type ( } ) -func NewPersistenceHealthSignalAggregator[K SignalKey]( +func NewPersistenceHealthSignalAggregator[K comparable]( keyMapper SignalKeyMapperFn[quotas.Request, K], windowSize time.Duration, maxBufferSize int, - metricsHandler metrics.Handler, ) *PersistenceHealthSignalAggregator[K] { - ret := &PersistenceHealthSignalAggregator[K]{ - keyMapper: keyMapper, - latencyAverages: make(map[K]MovingWindowAverage), - errorRatios: make(map[K]MovingWindowAverage), - windowSize: windowSize, - maxBufferSize: maxBufferSize, - metricsHandler: metricsHandler, - emitMetricsTimer: time.NewTicker(windowSize), + return &PersistenceHealthSignalAggregator[K]{ + keyMapper: keyMapper, + latencyAverages: make(map[K]MovingWindowAverage), + errorRatios: make(map[K]MovingWindowAverage), + windowSize: windowSize, + maxBufferSize: maxBufferSize, } - go ret.emitMetricsLoop() - return ret } func NewPerShardPerNsHealthSignalAggregator( windowSize dynamicconfig.DurationPropertyFn, maxBufferSize dynamicconfig.IntPropertyFn, - metricsHandler metrics.Handler, ) *PersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey] { return NewPersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey]( perShardPerNsKeyMapperFn, windowSize(), maxBufferSize(), - metricsHandler, ) } @@ -97,12 +85,6 @@ func perShardPerNsKeyMapperFn(req quotas.Request) perShardPerNsHealthSignalKey { } } -func (k perShardPerNsHealthSignalKey) GetMetricTags() []metrics.Tag { - nsTag := metrics.NamespaceTag(k.namespace) - shardTag := metrics.ShardTag(k.shardID) - return []metrics.Tag{nsTag, shardTag} -} - func (s *PersistenceHealthSignalAggregator[_]) GetRecordFn(req quotas.Request) func(err error) { start := time.Now() return func(err error) { @@ -159,17 +141,3 @@ func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( (*averages)[key] = newAvg return newAvg } - -func (s *PersistenceHealthSignalAggregator[_]) emitMetricsLoop() { - for { - select { - case <-s.emitMetricsTimer.C: - for key, avg := range s.latencyAverages { - s.metricsHandler.Gauge(metrics.PersistenceAvgLatencyPerShardPerNamespace.GetMetricName()).Record(avg.Average(), key.GetMetricTags()...) - } - for key, ratio := range s.errorRatios { - s.metricsHandler.Gauge(metrics.PersistenceErrPerShardPerNamespace.GetMetricName()).Record(ratio.Average(), key.GetMetricTags()...) - } - } - } -} diff --git a/common/aggregate/signal_aggregator.go b/common/aggregate/signal_aggregator.go index 83951cb75f5..ede2166e5fa 100644 --- a/common/aggregate/signal_aggregator.go +++ b/common/aggregate/signal_aggregator.go @@ -24,19 +24,12 @@ package aggregate -import "go.temporal.io/server/common/metrics" - type ( - SignalKey interface { - comparable - GetMetricTags() []metrics.Tag - } - - SignalKeyMapperFn[T any, K SignalKey] func(origin T) K + SignalKeyMapperFn[T any, K comparable] func(signalOrigin T) K SignalAggregator[T any] interface { - GetRecordFn(origin T) func(err error) - AverageLatency(origin T) float64 - ErrorRatio(origin T) float64 + GetRecordFn(key T) func(err error) + AverageLatency(key T) float64 + ErrorRatio(key T) float64 } ) diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index 9f6025008b9..7b1715eab43 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -1663,6 +1663,4 @@ var ( VisibilityPersistenceFailures = NewCounterDef("visibility_persistence_errors") VisibilityPersistenceResourceExhausted = NewCounterDef("visibility_persistence_resource_exhausted") VisibilityPersistenceLatency = NewTimerDef("visibility_persistence_latency") - PersistenceErrPerShardPerNamespace = NewDimensionlessHistogramDef("persistence_error_ratio_per_shard_per_ns") - PersistenceAvgLatencyPerShardPerNamespace = NewDimensionlessHistogramDef("persistence_average_latency_per_shard_per_namespace") ) diff --git a/common/metrics/tags.go b/common/metrics/tags.go index 0b96544a6d3..24f4d5ba64e 100644 --- a/common/metrics/tags.go +++ b/common/metrics/tags.go @@ -44,7 +44,6 @@ const ( instance = "instance" namespace = "namespace" namespaceState = "namespace_state" - shard = "shard" targetCluster = "target_cluster" fromCluster = "from_cluster" toCluster = "to_cluster" @@ -132,10 +131,6 @@ func InstanceTag(value string) Tag { return &tagImpl{key: instance, value: value} } -func ShardTag(value int32) Tag { - return &tagImpl{key: shard, value: string(value)} -} - // TargetClusterTag returns a new target cluster tag. func TargetClusterTag(value string) Tag { if len(value) == 0 { diff --git a/common/resource/fx.go b/common/resource/fx.go index f443864f4db..c160c29e175 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -219,12 +219,10 @@ func NamespaceRegistryProvider( func PersistenceHealthSignalAggregatorProvider( dynamicCollection *dynamicconfig.Collection, - metricsHandler metrics.Handler, ) aggregate.SignalAggregator[quotas.Request] { return aggregate.NewPerShardPerNsHealthSignalAggregator( dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), - metricsHandler, ) } From 217cb9637ee8e82e341cec30dc36aec871e6206f Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 19:34:15 -0700 Subject: [PATCH 12/36] emit per shard RPS metric --- .../persistence_health_signal_aggregator.go | 73 +++++++++++++++++-- common/aggregate/signal_aggregator.go | 13 +++- common/metrics/metric_defs.go | 1 + common/resource/fx.go | 2 + 4 files changed, 78 insertions(+), 11 deletions(-) diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index 939f58e4b1d..1d3f56f3eb7 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -26,17 +26,22 @@ package aggregate import ( "sync" + "sync/atomic" "time" "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/quotas" ) type ( - PersistenceHealthSignalAggregator[K comparable] struct { + PersistenceHealthSignalAggregator[K SignalKey] struct { SignalAggregator[quotas.Request] keyMapper SignalKeyMapperFn[quotas.Request, K] + totalRequests map[K]*atomic.Int64 + totalRequestsLock sync.RWMutex + latencyAverages map[K]MovingWindowAverage latencyLock sync.RWMutex @@ -45,6 +50,9 @@ type ( windowSize time.Duration maxBufferSize int + + metricsHandler metrics.Handler + emitMetricsTimer *time.Ticker } perShardPerNsHealthSignalKey struct { @@ -53,28 +61,34 @@ type ( } ) -func NewPersistenceHealthSignalAggregator[K comparable]( +func NewPersistenceHealthSignalAggregator[K SignalKey]( keyMapper SignalKeyMapperFn[quotas.Request, K], windowSize time.Duration, maxBufferSize int, + metricsHandler metrics.Handler, ) *PersistenceHealthSignalAggregator[K] { return &PersistenceHealthSignalAggregator[K]{ - keyMapper: keyMapper, - latencyAverages: make(map[K]MovingWindowAverage), - errorRatios: make(map[K]MovingWindowAverage), - windowSize: windowSize, - maxBufferSize: maxBufferSize, + keyMapper: keyMapper, + totalRequests: make(map[K]*atomic.Int64), + latencyAverages: make(map[K]MovingWindowAverage), + errorRatios: make(map[K]MovingWindowAverage), + windowSize: windowSize, + maxBufferSize: maxBufferSize, + metricsHandler: metricsHandler, + emitMetricsTimer: time.NewTicker(windowSize), } } func NewPerShardPerNsHealthSignalAggregator( windowSize dynamicconfig.DurationPropertyFn, maxBufferSize dynamicconfig.IntPropertyFn, + metricsHandler metrics.Handler, ) *PersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey] { return NewPersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey]( perShardPerNsKeyMapperFn, windowSize(), maxBufferSize(), + metricsHandler, ) } @@ -85,9 +99,14 @@ func perShardPerNsKeyMapperFn(req quotas.Request) perShardPerNsHealthSignalKey { } } +func (k perShardPerNsHealthSignalKey) GetNamespace() string { + return k.namespace +} + func (s *PersistenceHealthSignalAggregator[_]) GetRecordFn(req quotas.Request) func(err error) { start := time.Now() return func(err error) { + s.getOrInitRequestCount(req).Add(1) s.getOrInitLatencyAverage(req).Record(time.Since(start).Milliseconds()) errorRatio := s.getOrInitErrorRatio(req) if err != nil { @@ -141,3 +160,43 @@ func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( (*averages)[key] = newAvg return newAvg } + +func (s *PersistenceHealthSignalAggregator[_]) getOrInitRequestCount( + req quotas.Request, +) *atomic.Int64 { + key := s.keyMapper(req) + + s.totalRequestsLock.RLock() + count, ok := s.totalRequests[key] + s.totalRequestsLock.RUnlock() + if ok { + return count + } + + newCount := &atomic.Int64{} + + s.totalRequestsLock.Lock() + defer s.totalRequestsLock.Unlock() + + count, ok = s.totalRequests[key] + if ok { + return count + } + + s.totalRequests[key] = newCount + return newCount +} + +func (s *PersistenceHealthSignalAggregator[_]) emitMetricsLoop() { + for { + select { + case <-s.emitMetricsTimer.C: + s.totalRequestsLock.RLock() + for key, count := range s.totalRequests { + shardRPS := int64(float64(count.Swap(0)) / s.windowSize.Seconds()) + s.metricsHandler.Histogram(metrics.PersistenceShardRPS.GetMetricName(), metrics.PersistenceShardRPS.GetMetricUnit()).Record(shardRPS, metrics.NamespaceTag(key.GetNamespace())) + } + s.totalRequestsLock.RUnlock() + } + } +} diff --git a/common/aggregate/signal_aggregator.go b/common/aggregate/signal_aggregator.go index ede2166e5fa..abcfb4dfb72 100644 --- a/common/aggregate/signal_aggregator.go +++ b/common/aggregate/signal_aggregator.go @@ -25,11 +25,16 @@ package aggregate type ( - SignalKeyMapperFn[T any, K comparable] func(signalOrigin T) K + SignalKey interface { + comparable + GetNamespace() string + } + + SignalKeyMapperFn[T any, K SignalKey] func(origin T) K SignalAggregator[T any] interface { - GetRecordFn(key T) func(err error) - AverageLatency(key T) float64 - ErrorRatio(key T) float64 + GetRecordFn(origin T) func(err error) + AverageLatency(origin T) float64 + ErrorRatio(origin T) float64 } ) diff --git a/common/metrics/metric_defs.go b/common/metrics/metric_defs.go index 7b1715eab43..be32ee11c12 100644 --- a/common/metrics/metric_defs.go +++ b/common/metrics/metric_defs.go @@ -1647,6 +1647,7 @@ var ( PersistenceFailures = NewCounterDef("persistence_errors") PersistenceErrorWithType = NewCounterDef("persistence_error_with_type") PersistenceLatency = NewTimerDef("persistence_latency") + PersistenceShardRPS = NewDimensionlessHistogramDef("persistence_shard_rps") PersistenceErrShardExistsCounter = NewCounterDef("persistence_errors_shard_exists") PersistenceErrShardOwnershipLostCounter = NewCounterDef("persistence_errors_shard_ownership_lost") PersistenceErrConditionFailedCounter = NewCounterDef("persistence_errors_condition_failed") diff --git a/common/resource/fx.go b/common/resource/fx.go index c160c29e175..f443864f4db 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -219,10 +219,12 @@ func NamespaceRegistryProvider( func PersistenceHealthSignalAggregatorProvider( dynamicCollection *dynamicconfig.Collection, + metricsHandler metrics.Handler, ) aggregate.SignalAggregator[quotas.Request] { return aggregate.NewPerShardPerNsHealthSignalAggregator( dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), + metricsHandler, ) } From bf205a40af33a66250c37a88757a5df7f9be36c4 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 19:41:28 -0700 Subject: [PATCH 13/36] cleanup --- .../aggregate/bench_moving_window_avg_test.go | 2 +- common/aggregate/moving_window_average.go | 25 ++++++++++++------- .../persistence_health_signal_aggregator.go | 3 +-- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go index 706460c2c30..50ad9d04359 100644 --- a/common/aggregate/bench_moving_window_avg_test.go +++ b/common/aggregate/bench_moving_window_avg_test.go @@ -39,7 +39,7 @@ const ( ) func BenchmarkRingMovingWindowAvg(b *testing.B) { - avg := NewMovingWindowAvgImpl(testWindowSize, testBufferSize) + avg := newMovingWindowAvgImpl(testWindowSize, testBufferSize) for i := 0; i < b.N; i++ { avg.Record(rand.Int63()) avg.Average() diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index 49758ec522a..178e1a94e01 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -41,7 +41,7 @@ type ( timestamp time.Time } - MovingWindowAvgImpl struct { + movingWindowAvgImpl struct { sync.RWMutex windowSize time.Duration maxBufferSize int @@ -52,12 +52,12 @@ type ( } ) -func NewMovingWindowAvgImpl( +func newMovingWindowAvgImpl( windowSize time.Duration, maxBufferSize int, -) *MovingWindowAvgImpl { +) *movingWindowAvgImpl { buffer := ring.New(maxBufferSize) - return &MovingWindowAvgImpl{ + return &movingWindowAvgImpl{ windowSize: windowSize, maxBufferSize: maxBufferSize, head: buffer, @@ -65,11 +65,10 @@ func NewMovingWindowAvgImpl( } } -func (a *MovingWindowAvgImpl) Record(val int64) { +func (a *movingWindowAvgImpl) Record(val int64) { a.Lock() defer a.Unlock() - a.expireOldValuesLocked() if a.count == a.maxBufferSize { a.expireOneLocked() } @@ -81,25 +80,33 @@ func (a *MovingWindowAvgImpl) Record(val int64) { a.count++ } -func (a *MovingWindowAvgImpl) Average() float64 { +func (a *movingWindowAvgImpl) Average() float64 { + a.expireOldValues() + a.RLock() defer a.RUnlock() + if a.count == 0 { return 0 } return float64(a.sum / int64(a.count)) } -func (a *MovingWindowAvgImpl) expireOldValuesLocked() { +func (a *movingWindowAvgImpl) expireOldValues() { + a.Lock() + defer a.Unlock() + for ; a.head != a.tail; a.head = a.head.Next() { if data, ok := a.head.Value.(timestampedData); ok && time.Since(data.timestamp) > a.windowSize { a.sum -= data.value a.count-- + } else { + break } } } -func (a *MovingWindowAvgImpl) expireOneLocked() { +func (a *movingWindowAvgImpl) expireOneLocked() { if data, ok := a.head.Value.(timestampedData); ok { a.sum -= data.value a.count-- diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/aggregate/persistence_health_signal_aggregator.go index 1d3f56f3eb7..ed1bf3a2ac4 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/aggregate/persistence_health_signal_aggregator.go @@ -36,7 +36,6 @@ import ( type ( PersistenceHealthSignalAggregator[K SignalKey] struct { - SignalAggregator[quotas.Request] keyMapper SignalKeyMapperFn[quotas.Request, K] totalRequests map[K]*atomic.Int64 @@ -147,7 +146,7 @@ func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( return avg } - newAvg := NewMovingWindowAvgImpl(s.windowSize, s.maxBufferSize) + newAvg := newMovingWindowAvgImpl(s.windowSize, s.maxBufferSize) lock.Lock() defer lock.Unlock() From b8293dcd316083422251ea21d90183ca16003769 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 20:59:20 -0700 Subject: [PATCH 14/36] merge metric and signal clients --- common/persistence/client/factory.go | 72 +- .../persistenceHealthSignalClients.go | 914 ------------------ .../persistence/persistenceMetricClients.go | 489 +++------- 3 files changed, 168 insertions(+), 1307 deletions(-) delete mode 100644 common/persistence/persistenceHealthSignalClients.go diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index 8f17dc5a184..662c507452f 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -114,11 +114,13 @@ func (f *factoryImpl) NewTaskManager() (p.TaskManager, error) { if f.ratelimiter != nil { result = p.NewTaskPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil { - result = p.NewTaskPersistenceMetricsClient(result, f.metricsHandler, f.logger) - } - if f.healthSignals != nil { - result = p.NewTaskPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + if f.metricsHandler != nil || f.healthSignals != nil { + if f.metricsHandler == nil { + f.metricsHandler = metrics.NoopMetricsHandler + } else { + f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + } + result = p.NewTaskPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } return result, nil } @@ -134,11 +136,13 @@ func (f *factoryImpl) NewShardManager() (p.ShardManager, error) { if f.ratelimiter != nil { result = p.NewShardPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil { - result = p.NewShardPersistenceMetricsClient(result, f.metricsHandler, f.logger) - } - if f.healthSignals != nil { - result = p.NewShardPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + if f.metricsHandler != nil || f.healthSignals != nil { + if f.metricsHandler == nil { + f.metricsHandler = metrics.NoopMetricsHandler + } else { + f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + } + result = p.NewShardPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewShardPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil @@ -155,11 +159,13 @@ func (f *factoryImpl) NewMetadataManager() (p.MetadataManager, error) { if f.ratelimiter != nil { result = p.NewMetadataPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil { - result = p.NewMetadataPersistenceMetricsClient(result, f.metricsHandler, f.logger) - } - if f.healthSignals != nil { - result = p.NewMetadataPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + if f.metricsHandler != nil || f.healthSignals != nil { + if f.metricsHandler == nil { + f.metricsHandler = metrics.NoopMetricsHandler + } else { + f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + } + result = p.NewMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil @@ -176,11 +182,13 @@ func (f *factoryImpl) NewClusterMetadataManager() (p.ClusterMetadataManager, err if f.ratelimiter != nil { result = p.NewClusterMetadataPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil { - result = p.NewClusterMetadataPersistenceMetricsClient(result, f.metricsHandler, f.logger) - } - if f.healthSignals != nil { - result = p.NewClusterMetadataPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + if f.metricsHandler != nil || f.healthSignals != nil { + if f.metricsHandler == nil { + f.metricsHandler = metrics.NoopMetricsHandler + } else { + f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + } + result = p.NewClusterMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewClusterMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil @@ -197,11 +205,13 @@ func (f *factoryImpl) NewExecutionManager() (p.ExecutionManager, error) { if f.ratelimiter != nil { result = p.NewExecutionPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil { - result = p.NewExecutionPersistenceMetricsClient(result, f.metricsHandler, f.logger) - } - if f.healthSignals != nil { - result = p.NewExecutionPersistenceHealthSignalClient(result, f.healthSignals, f.logger) + if f.metricsHandler != nil || f.healthSignals != nil { + if f.metricsHandler == nil { + f.metricsHandler = metrics.NoopMetricsHandler + } else { + f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + } + result = p.NewExecutionPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewExecutionPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return result, nil @@ -216,11 +226,13 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu if f.ratelimiter != nil { result = p.NewQueuePersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil { - result = p.NewQueuePersistenceMetricsClient(result, f.metricsHandler, f.logger) - } - if f.healthSignals != nil { - result = p.NewQueuePersistenceHealthSignalClient(result, f.healthSignals, f.logger) + if f.metricsHandler != nil || f.healthSignals != nil { + if f.metricsHandler == nil { + f.metricsHandler = metrics.NoopMetricsHandler + } else { + f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + } + result = p.NewQueuePersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewQueuePersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) return p.NewNamespaceReplicationQueue(result, f.serializer, f.clusterName, f.metricsHandler, f.logger) diff --git a/common/persistence/persistenceHealthSignalClients.go b/common/persistence/persistenceHealthSignalClients.go deleted file mode 100644 index 938d6e6e33d..00000000000 --- a/common/persistence/persistenceHealthSignalClients.go +++ /dev/null @@ -1,914 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package persistence - -import ( - "context" - - commonpb "go.temporal.io/api/common/v1" - "go.temporal.io/server/common/aggregate" - "go.temporal.io/server/common/headers" - "go.temporal.io/server/common/log" - "go.temporal.io/server/common/quotas" -) - -type ( - shardHealthSignalPersistenceClient struct { - healthSignals aggregate.SignalAggregator[quotas.Request] - persistence ShardManager - logger log.Logger - } - - executionHealthSignalPersistenceClient struct { - healthSignals aggregate.SignalAggregator[quotas.Request] - persistence ExecutionManager - logger log.Logger - } - - taskHealthSignalPersistenceClient struct { - healthSignals aggregate.SignalAggregator[quotas.Request] - persistence TaskManager - logger log.Logger - } - - metadataHealthSignalPersistenceClient struct { - healthSignals aggregate.SignalAggregator[quotas.Request] - persistence MetadataManager - logger log.Logger - } - - clusterMetadataHealthSignalPersistenceClient struct { - healthSignals aggregate.SignalAggregator[quotas.Request] - persistence ClusterMetadataManager - logger log.Logger - } - - queueHealthSignalPersistenceClient struct { - healthSignals aggregate.SignalAggregator[quotas.Request] - persistence Queue - logger log.Logger - } -) - -var _ ShardManager = (*shardHealthSignalPersistenceClient)(nil) -var _ ExecutionManager = (*executionHealthSignalPersistenceClient)(nil) -var _ TaskManager = (*taskHealthSignalPersistenceClient)(nil) -var _ MetadataManager = (*metadataHealthSignalPersistenceClient)(nil) -var _ ClusterMetadataManager = (*clusterMetadataHealthSignalPersistenceClient)(nil) -var _ Queue = (*queueHealthSignalPersistenceClient)(nil) - -func NewShardPersistenceHealthSignalClient(persistence ShardManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ShardManager { - return &shardHealthSignalPersistenceClient{ - persistence: persistence, - healthSignals: healthSignals, - logger: logger, - } -} - -func NewExecutionPersistenceHealthSignalClient(persistence ExecutionManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ExecutionManager { - return &executionHealthSignalPersistenceClient{ - persistence: persistence, - healthSignals: healthSignals, - logger: logger, - } -} - -func NewTaskPersistenceHealthSignalClient(persistence TaskManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) TaskManager { - return &taskHealthSignalPersistenceClient{ - persistence: persistence, - healthSignals: healthSignals, - logger: logger, - } -} - -func NewMetadataPersistenceHealthSignalClient(persistence MetadataManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) MetadataManager { - return &metadataHealthSignalPersistenceClient{ - persistence: persistence, - healthSignals: healthSignals, - logger: logger, - } -} - -func NewClusterMetadataPersistenceHealthSignalClient(persistence ClusterMetadataManager, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ClusterMetadataManager { - return &clusterMetadataHealthSignalPersistenceClient{ - persistence: persistence, - healthSignals: healthSignals, - logger: logger, - } -} - -func NewQueuePersistenceHealthSignalClient(persistence Queue, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) Queue { - return &queueHealthSignalPersistenceClient{ - persistence: persistence, - healthSignals: healthSignals, - logger: logger, - } -} - -func (p *shardHealthSignalPersistenceClient) GetName() string { - return p.persistence.GetName() -} - -func (p *shardHealthSignalPersistenceClient) GetOrCreateShard( - ctx context.Context, - request *GetOrCreateShardRequest, -) (*GetOrCreateShardResponse, error) { - record := recordFn(ctx, "GetOrCreateShard", request.ShardID, p.healthSignals) - response, err := p.persistence.GetOrCreateShard(ctx, request) - record(err) - return response, err -} - -func (p *shardHealthSignalPersistenceClient) UpdateShard( - ctx context.Context, - request *UpdateShardRequest, -) error { - record := recordFn(ctx, "UpdateShard", request.ShardInfo.GetShardId(), p.healthSignals) - err := p.persistence.UpdateShard(ctx, request) - record(err) - return err -} - -func (p *shardHealthSignalPersistenceClient) AssertShardOwnership( - ctx context.Context, - request *AssertShardOwnershipRequest, -) error { - record := recordFn(ctx, "AssertShardOwnership", request.ShardID, p.healthSignals) - err := p.persistence.AssertShardOwnership(ctx, request) - record(err) - return err -} - -func (p *shardHealthSignalPersistenceClient) Close() { - p.persistence.Close() -} - -func (p *executionHealthSignalPersistenceClient) GetName() string { - return p.persistence.GetName() -} - -func (p *executionHealthSignalPersistenceClient) GetHistoryBranchUtil() HistoryBranchUtil { - return p.persistence.GetHistoryBranchUtil() -} - -func (p *executionHealthSignalPersistenceClient) CreateWorkflowExecution( - ctx context.Context, - request *CreateWorkflowExecutionRequest, -) (*CreateWorkflowExecutionResponse, error) { - record := recordFn(ctx, "CreateWorkflowExecution", request.ShardID, p.healthSignals) - response, err := p.persistence.CreateWorkflowExecution(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) GetWorkflowExecution( - ctx context.Context, - request *GetWorkflowExecutionRequest, -) (*GetWorkflowExecutionResponse, error) { - record := recordFn(ctx, "GetWorkflowExecution", request.ShardID, p.healthSignals) - response, err := p.persistence.GetWorkflowExecution(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) SetWorkflowExecution( - ctx context.Context, - request *SetWorkflowExecutionRequest, -) (*SetWorkflowExecutionResponse, error) { - record := recordFn(ctx, "SetWorkflowExecution", request.ShardID, p.healthSignals) - response, err := p.persistence.SetWorkflowExecution(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) UpdateWorkflowExecution( - ctx context.Context, - request *UpdateWorkflowExecutionRequest, -) (*UpdateWorkflowExecutionResponse, error) { - record := recordFn(ctx, "UpdateWorkflowExecuton", request.ShardID, p.healthSignals) - response, err := p.persistence.UpdateWorkflowExecution(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) ConflictResolveWorkflowExecution( - ctx context.Context, - request *ConflictResolveWorkflowExecutionRequest, -) (*ConflictResolveWorkflowExecutionResponse, error) { - record := recordFn(ctx, "ConflictResolveWorkflowExecution", request.ShardID, p.healthSignals) - response, err := p.persistence.ConflictResolveWorkflowExecution(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) DeleteWorkflowExecution( - ctx context.Context, - request *DeleteWorkflowExecutionRequest, -) error { - record := recordFn(ctx, "DeleteWorkflowExecution", request.ShardID, p.healthSignals) - err := p.persistence.DeleteWorkflowExecution(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) DeleteCurrentWorkflowExecution( - ctx context.Context, - request *DeleteCurrentWorkflowExecutionRequest, -) error { - record := recordFn(ctx, "DeleteCurrentWorkflowExecution", request.ShardID, p.healthSignals) - err := p.persistence.DeleteCurrentWorkflowExecution(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) GetCurrentExecution( - ctx context.Context, - request *GetCurrentExecutionRequest, -) (*GetCurrentExecutionResponse, error) { - record := recordFn(ctx, "GetCurrentExecution", request.ShardID, p.healthSignals) - response, err := p.persistence.GetCurrentExecution(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) ListConcreteExecutions( - ctx context.Context, - request *ListConcreteExecutionsRequest, -) (*ListConcreteExecutionsResponse, error) { - record := recordFn(ctx, "ListConcreteExecutions", request.ShardID, p.healthSignals) - response, err := p.persistence.ListConcreteExecutions(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) RegisterHistoryTaskReader( - ctx context.Context, - request *RegisterHistoryTaskReaderRequest, -) error { - // hint methods don't actually hint DB, so don't go through persistence rate limiter - return p.persistence.RegisterHistoryTaskReader(ctx, request) -} - -func (p *executionHealthSignalPersistenceClient) UnregisterHistoryTaskReader( - ctx context.Context, - request *UnregisterHistoryTaskReaderRequest, -) { - // hint methods don't actually hint DB, so don't go through persistence rate limiter - p.persistence.UnregisterHistoryTaskReader(ctx, request) -} - -func (p *executionHealthSignalPersistenceClient) UpdateHistoryTaskReaderProgress( - ctx context.Context, - request *UpdateHistoryTaskReaderProgressRequest, -) { - // hint methods don't actually hint DB, so don't go through persistence rate limiter - p.persistence.UpdateHistoryTaskReaderProgress(ctx, request) -} - -func (p *executionHealthSignalPersistenceClient) AddHistoryTasks( - ctx context.Context, - request *AddHistoryTasksRequest, -) error { - record := recordFn(ctx, "AddHistoryTasks", request.ShardID, p.healthSignals) - err := p.persistence.AddHistoryTasks(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) GetHistoryTasks( - ctx context.Context, - request *GetHistoryTasksRequest, -) (*GetHistoryTasksResponse, error) { - record := recordFn(ctx, "GetHistoryTasks", request.ShardID, p.healthSignals) - response, err := p.persistence.GetHistoryTasks(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) CompleteHistoryTask( - ctx context.Context, - request *CompleteHistoryTaskRequest, -) error { - record := recordFn(ctx, "CompleteHistoryTask", request.ShardID, p.healthSignals) - err := p.persistence.CompleteHistoryTask(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) RangeCompleteHistoryTasks( - ctx context.Context, - request *RangeCompleteHistoryTasksRequest, -) error { - record := recordFn(ctx, "RangeCompleteHistoryTasks", request.ShardID, p.healthSignals) - err := p.persistence.RangeCompleteHistoryTasks(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) PutReplicationTaskToDLQ( - ctx context.Context, - request *PutReplicationTaskToDLQRequest, -) error { - record := recordFn(ctx, "PutReplicationTaskToDLQ", request.ShardID, p.healthSignals) - err := p.persistence.PutReplicationTaskToDLQ(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) GetReplicationTasksFromDLQ( - ctx context.Context, - request *GetReplicationTasksFromDLQRequest, -) (*GetHistoryTasksResponse, error) { - record := recordFn(ctx, "GetReplicationTasksFromDLQ", request.ShardID, p.healthSignals) - response, err := p.persistence.GetReplicationTasksFromDLQ(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) DeleteReplicationTaskFromDLQ( - ctx context.Context, - request *DeleteReplicationTaskFromDLQRequest, -) error { - record := recordFn(ctx, "DeleteReplicationTaskFromDLQ", request.ShardID, p.healthSignals) - err := p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) RangeDeleteReplicationTaskFromDLQ( - ctx context.Context, - request *RangeDeleteReplicationTaskFromDLQRequest, -) error { - record := recordFn(ctx, "RangeDeleteReplicationTaskFromDLQ", request.ShardID, p.healthSignals) - err := p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) IsReplicationDLQEmpty( - ctx context.Context, - request *GetReplicationTasksFromDLQRequest, -) (bool, error) { - record := recordFn(ctx, "IsReplicationDLQEmpty", request.ShardID, p.healthSignals) - response, err := p.persistence.IsReplicationDLQEmpty(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) AppendHistoryNodes( - ctx context.Context, - request *AppendHistoryNodesRequest, -) (*AppendHistoryNodesResponse, error) { - record := recordFn(ctx, "AppendHistoryNodes", request.ShardID, p.healthSignals) - response, err := p.persistence.AppendHistoryNodes(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) AppendRawHistoryNodes( - ctx context.Context, - request *AppendRawHistoryNodesRequest, -) (*AppendHistoryNodesResponse, error) { - record := recordFn(ctx, "AppendRawHistoryNodes", request.ShardID, p.healthSignals) - response, err := p.persistence.AppendRawHistoryNodes(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) ReadHistoryBranch( - ctx context.Context, - request *ReadHistoryBranchRequest, -) (*ReadHistoryBranchResponse, error) { - record := recordFn(ctx, "ReadHistoryBranch", request.ShardID, p.healthSignals) - response, err := p.persistence.ReadHistoryBranch(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) ReadHistoryBranchReverse( - ctx context.Context, - request *ReadHistoryBranchReverseRequest, -) (*ReadHistoryBranchReverseResponse, error) { - record := recordFn(ctx, "ReadHistoryBranchReverse", request.ShardID, p.healthSignals) - response, err := p.persistence.ReadHistoryBranchReverse(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) ReadHistoryBranchByBatch( - ctx context.Context, - request *ReadHistoryBranchRequest, -) (*ReadHistoryBranchByBatchResponse, error) { - record := recordFn(ctx, "ReadHistoryBranchByBatch", request.ShardID, p.healthSignals) - response, err := p.persistence.ReadHistoryBranchByBatch(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) ReadRawHistoryBranch( - ctx context.Context, - request *ReadHistoryBranchRequest, -) (*ReadRawHistoryBranchResponse, error) { - record := recordFn(ctx, "ReadRawHistoryBranch", request.ShardID, p.healthSignals) - response, err := p.persistence.ReadRawHistoryBranch(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) ForkHistoryBranch( - ctx context.Context, - request *ForkHistoryBranchRequest, -) (*ForkHistoryBranchResponse, error) { - record := recordFn(ctx, "ForkHistoryBranch", request.ShardID, p.healthSignals) - response, err := p.persistence.ForkHistoryBranch(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) DeleteHistoryBranch( - ctx context.Context, - request *DeleteHistoryBranchRequest, -) error { - record := recordFn(ctx, "DeleteHistoryBranch", request.ShardID, p.healthSignals) - err := p.persistence.DeleteHistoryBranch(ctx, request) - record(err) - return err -} - -func (p *executionHealthSignalPersistenceClient) TrimHistoryBranch( - ctx context.Context, - request *TrimHistoryBranchRequest, -) (*TrimHistoryBranchResponse, error) { - record := recordFn(ctx, "TrimHistoryBranch", request.ShardID, p.healthSignals) - resp, err := p.persistence.TrimHistoryBranch(ctx, request) - record(err) - return resp, err -} - -func (p *executionHealthSignalPersistenceClient) GetHistoryTree( - ctx context.Context, - request *GetHistoryTreeRequest, -) (*GetHistoryTreeResponse, error) { - record := recordFn(ctx, "GetHistoryTree", request.ShardID, p.healthSignals) - response, err := p.persistence.GetHistoryTree(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) GetAllHistoryTreeBranches( - ctx context.Context, - request *GetAllHistoryTreeBranchesRequest, -) (*GetAllHistoryTreeBranchesResponse, error) { - record := recordFn(ctx, "GetAllHistoryTreeBranches", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetAllHistoryTreeBranches(ctx, request) - record(err) - return response, err -} - -func (p *executionHealthSignalPersistenceClient) Close() { - p.persistence.Close() -} - -func (p *taskHealthSignalPersistenceClient) GetName() string { - return p.persistence.GetName() -} - -func (p *taskHealthSignalPersistenceClient) CreateTasks( - ctx context.Context, - request *CreateTasksRequest, -) (*CreateTasksResponse, error) { - record := recordFn(ctx, "CreateTasks", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.CreateTasks(ctx, request) - record(err) - return response, err -} - -func (p *taskHealthSignalPersistenceClient) GetTasks( - ctx context.Context, - request *GetTasksRequest, -) (*GetTasksResponse, error) { - record := recordFn(ctx, "GetTasks", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetTasks(ctx, request) - record(err) - return response, err -} - -func (p *taskHealthSignalPersistenceClient) CompleteTask( - ctx context.Context, - request *CompleteTaskRequest, -) error { - record := recordFn(ctx, "CompleteTask", CallerSegmentMissing, p.healthSignals) - err := p.persistence.CompleteTask(ctx, request) - record(err) - return err -} - -func (p *taskHealthSignalPersistenceClient) CompleteTasksLessThan( - ctx context.Context, - request *CompleteTasksLessThanRequest, -) (int, error) { - record := recordFn(ctx, "CompleteTasksLessThan", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.CompleteTasksLessThan(ctx, request) - record(err) - return response, err -} - -func (p *taskHealthSignalPersistenceClient) CreateTaskQueue( - ctx context.Context, - request *CreateTaskQueueRequest, -) (*CreateTaskQueueResponse, error) { - record := recordFn(ctx, "CreateTaskQueue", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.CreateTaskQueue(ctx, request) - record(err) - return response, err -} - -func (p *taskHealthSignalPersistenceClient) UpdateTaskQueue( - ctx context.Context, - request *UpdateTaskQueueRequest, -) (*UpdateTaskQueueResponse, error) { - record := recordFn(ctx, "UpdateTaskQueue", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.UpdateTaskQueue(ctx, request) - record(err) - return response, err -} - -func (p *taskHealthSignalPersistenceClient) GetTaskQueue( - ctx context.Context, - request *GetTaskQueueRequest, -) (*GetTaskQueueResponse, error) { - record := recordFn(ctx, "GetTaskQueue", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetTaskQueue(ctx, request) - record(err) - return response, err -} - -func (p *taskHealthSignalPersistenceClient) ListTaskQueue( - ctx context.Context, - request *ListTaskQueueRequest, -) (*ListTaskQueueResponse, error) { - record := recordFn(ctx, "ListTaskQueue", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.ListTaskQueue(ctx, request) - record(err) - return response, err -} - -func (p *taskHealthSignalPersistenceClient) DeleteTaskQueue( - ctx context.Context, - request *DeleteTaskQueueRequest, -) error { - record := recordFn(ctx, "DeleteTaskQueue", CallerSegmentMissing, p.healthSignals) - err := p.persistence.DeleteTaskQueue(ctx, request) - record(err) - return err -} - -func (p *taskHealthSignalPersistenceClient) Close() { - p.persistence.Close() -} - -func (p *metadataHealthSignalPersistenceClient) GetName() string { - return p.persistence.GetName() -} - -func (p *metadataHealthSignalPersistenceClient) CreateNamespace( - ctx context.Context, - request *CreateNamespaceRequest, -) (*CreateNamespaceResponse, error) { - record := recordFn(ctx, "CreateNamespace", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.CreateNamespace(ctx, request) - record(err) - return response, err -} - -func (p *metadataHealthSignalPersistenceClient) GetNamespace( - ctx context.Context, - request *GetNamespaceRequest, -) (*GetNamespaceResponse, error) { - record := recordFn(ctx, "GetNamespace", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetNamespace(ctx, request) - record(err) - return response, err -} - -func (p *metadataHealthSignalPersistenceClient) UpdateNamespace( - ctx context.Context, - request *UpdateNamespaceRequest, -) error { - record := recordFn(ctx, "UpdateNamespace", CallerSegmentMissing, p.healthSignals) - err := p.persistence.UpdateNamespace(ctx, request) - record(err) - return err -} - -func (p *metadataHealthSignalPersistenceClient) RenameNamespace( - ctx context.Context, - request *RenameNamespaceRequest, -) error { - record := recordFn(ctx, "RenameNamespace", CallerSegmentMissing, p.healthSignals) - err := p.persistence.RenameNamespace(ctx, request) - record(err) - return err -} - -func (p *metadataHealthSignalPersistenceClient) DeleteNamespace( - ctx context.Context, - request *DeleteNamespaceRequest, -) error { - record := recordFn(ctx, "DeleteNamespace", CallerSegmentMissing, p.healthSignals) - err := p.persistence.DeleteNamespace(ctx, request) - record(err) - return err -} - -func (p *metadataHealthSignalPersistenceClient) DeleteNamespaceByName( - ctx context.Context, - request *DeleteNamespaceByNameRequest, -) error { - record := recordFn(ctx, "DeleteNamespaceByName", CallerSegmentMissing, p.healthSignals) - err := p.persistence.DeleteNamespaceByName(ctx, request) - record(err) - return err -} - -func (p *metadataHealthSignalPersistenceClient) ListNamespaces( - ctx context.Context, - request *ListNamespacesRequest, -) (*ListNamespacesResponse, error) { - record := recordFn(ctx, "ListNamespaces", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.ListNamespaces(ctx, request) - record(err) - return response, err -} - -func (p *metadataHealthSignalPersistenceClient) GetMetadata( - ctx context.Context, -) (*GetMetadataResponse, error) { - record := recordFn(ctx, "GetMetadata", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetMetadata(ctx) - record(err) - return response, err -} - -func (p *metadataHealthSignalPersistenceClient) InitializeSystemNamespaces( - ctx context.Context, - currentClusterName string, -) error { - record := recordFn(ctx, "InitializeSystemNamespaces", CallerSegmentMissing, p.healthSignals) - err := p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) - record(err) - return err -} - -func (p *metadataHealthSignalPersistenceClient) Close() { - p.persistence.Close() -} - -func (p *clusterMetadataHealthSignalPersistenceClient) GetName() string { - return p.persistence.GetName() -} - -func (p *clusterMetadataHealthSignalPersistenceClient) GetClusterMembers( - ctx context.Context, - request *GetClusterMembersRequest, -) (*GetClusterMembersResponse, error) { - record := recordFn(ctx, "GetClusterMembers", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetClusterMembers(ctx, request) - record(err) - return response, err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) UpsertClusterMembership( - ctx context.Context, - request *UpsertClusterMembershipRequest, -) error { - record := recordFn(ctx, "UpsertClusterMembership", CallerSegmentMissing, p.healthSignals) - err := p.persistence.UpsertClusterMembership(ctx, request) - record(err) - return err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) PruneClusterMembership( - ctx context.Context, - request *PruneClusterMembershipRequest, -) error { - record := recordFn(ctx, "PruneClusterMembership", CallerSegmentMissing, p.healthSignals) - err := p.persistence.PruneClusterMembership(ctx, request) - record(err) - return err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) ListClusterMetadata( - ctx context.Context, - request *ListClusterMetadataRequest, -) (*ListClusterMetadataResponse, error) { - record := recordFn(ctx, "ListClusterMetadata", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.ListClusterMetadata(ctx, request) - record(err) - return response, err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) GetCurrentClusterMetadata( - ctx context.Context, -) (*GetClusterMetadataResponse, error) { - record := recordFn(ctx, "GetCurrentClusterMetadata", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetCurrentClusterMetadata(ctx) - record(err) - return response, err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) GetClusterMetadata( - ctx context.Context, - request *GetClusterMetadataRequest, -) (*GetClusterMetadataResponse, error) { - record := recordFn(ctx, "GetClusterMetadata", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetClusterMetadata(ctx, request) - record(err) - return response, err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) SaveClusterMetadata( - ctx context.Context, - request *SaveClusterMetadataRequest, -) (bool, error) { - record := recordFn(ctx, "SaveClusterMetadata", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.SaveClusterMetadata(ctx, request) - record(err) - return response, err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) DeleteClusterMetadata( - ctx context.Context, - request *DeleteClusterMetadataRequest, -) error { - record := recordFn(ctx, "DeleteClusterMetadata", CallerSegmentMissing, p.healthSignals) - err := p.persistence.DeleteClusterMetadata(ctx, request) - record(err) - return err -} - -func (p *clusterMetadataHealthSignalPersistenceClient) Close() { - p.persistence.Close() -} - -func (p *queueHealthSignalPersistenceClient) Init( - ctx context.Context, - blob *commonpb.DataBlob, -) error { - return p.persistence.Init(ctx, blob) -} - -func (p *queueHealthSignalPersistenceClient) EnqueueMessage( - ctx context.Context, - blob commonpb.DataBlob, -) error { - record := recordFn(ctx, "EnqueueMessage", CallerSegmentMissing, p.healthSignals) - err := p.persistence.EnqueueMessage(ctx, blob) - record(err) - return err -} - -func (p *queueHealthSignalPersistenceClient) ReadMessages( - ctx context.Context, - lastMessageID int64, - maxCount int, -) ([]*QueueMessage, error) { - record := recordFn(ctx, "ReadMessages", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.ReadMessages(ctx, lastMessageID, maxCount) - record(err) - return response, err -} - -func (p *queueHealthSignalPersistenceClient) UpdateAckLevel( - ctx context.Context, - metadata *InternalQueueMetadata, -) error { - record := recordFn(ctx, "UpdateAckLevel", CallerSegmentMissing, p.healthSignals) - err := p.persistence.UpdateAckLevel(ctx, metadata) - record(err) - return err -} - -func (p *queueHealthSignalPersistenceClient) GetAckLevels( - ctx context.Context, -) (*InternalQueueMetadata, error) { - record := recordFn(ctx, "GetAckLevels", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetAckLevels(ctx) - record(err) - return response, err -} - -func (p *queueHealthSignalPersistenceClient) DeleteMessagesBefore( - ctx context.Context, - messageID int64, -) error { - record := recordFn(ctx, "DeleteMessagesBefore", CallerSegmentMissing, p.healthSignals) - err := p.persistence.DeleteMessagesBefore(ctx, messageID) - record(err) - return err -} - -func (p *queueHealthSignalPersistenceClient) EnqueueMessageToDLQ( - ctx context.Context, - blob commonpb.DataBlob, -) (int64, error) { - record := recordFn(ctx, "EnqueueMessageToDLQ", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.EnqueueMessageToDLQ(ctx, blob) - record(err) - return response, err -} - -func (p *queueHealthSignalPersistenceClient) ReadMessagesFromDLQ( - ctx context.Context, - firstMessageID int64, - lastMessageID int64, - pageSize int, - pageToken []byte, -) ([]*QueueMessage, []byte, error) { - record := recordFn(ctx, "ReadMessagesFromDLQ", CallerSegmentMissing, p.healthSignals) - response, data, err := p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) - record(err) - return response, data, err -} - -func (p *queueHealthSignalPersistenceClient) RangeDeleteMessagesFromDLQ( - ctx context.Context, - firstMessageID int64, - lastMessageID int64, -) error { - record := recordFn(ctx, "RangeDeleteMessagesFromDLQ", CallerSegmentMissing, p.healthSignals) - err := p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) - record(err) - return err -} -func (p *queueHealthSignalPersistenceClient) UpdateDLQAckLevel( - ctx context.Context, - metadata *InternalQueueMetadata, -) error { - record := recordFn(ctx, "UpdateDLQAckLevel", CallerSegmentMissing, p.healthSignals) - err := p.persistence.UpdateDLQAckLevel(ctx, metadata) - record(err) - return err -} - -func (p *queueHealthSignalPersistenceClient) GetDLQAckLevels( - ctx context.Context, -) (*InternalQueueMetadata, error) { - record := recordFn(ctx, "GetDLQAckLevels", CallerSegmentMissing, p.healthSignals) - response, err := p.persistence.GetDLQAckLevels(ctx) - record(err) - return response, err -} - -func (p *queueHealthSignalPersistenceClient) DeleteMessageFromDLQ( - ctx context.Context, - messageID int64, -) error { - record := recordFn(ctx, "DeleteMessageFromDLQ", CallerSegmentMissing, p.healthSignals) - err := p.persistence.DeleteMessageFromDLQ(ctx, messageID) - record(err) - return err -} - -func (p *queueHealthSignalPersistenceClient) Close() { - p.persistence.Close() -} - -func recordFn( - ctx context.Context, - api string, - shardID int32, - healthSignals aggregate.SignalAggregator[quotas.Request], -) func(err error) { - callerInfo := headers.GetCallerInfo(ctx) - return healthSignals.GetRecordFn(quotas.NewRequest( - api, - RateLimitDefaultToken, - callerInfo.CallerName, - callerInfo.CallerType, - shardID, - callerInfo.CallOrigin, - )) -} diff --git a/common/persistence/persistenceMetricClients.go b/common/persistence/persistenceMetricClients.go index 703831d7713..fc30f76871e 100644 --- a/common/persistence/persistenceMetricClients.go +++ b/common/persistence/persistenceMetricClients.go @@ -31,6 +31,8 @@ import ( commonpb "go.temporal.io/api/common/v1" "go.temporal.io/api/serviceerror" + "go.temporal.io/server/common/aggregate" + "go.temporal.io/server/common/quotas" "go.temporal.io/server/common/headers" "go.temporal.io/server/common/log" @@ -47,32 +49,38 @@ type ( shardPersistenceClient struct { metricEmitter - persistence ShardManager + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence ShardManager } executionPersistenceClient struct { metricEmitter - persistence ExecutionManager + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence ExecutionManager } taskPersistenceClient struct { metricEmitter - persistence TaskManager + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence TaskManager } metadataPersistenceClient struct { metricEmitter - persistence MetadataManager + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence MetadataManager } clusterMetadataPersistenceClient struct { metricEmitter - persistence ClusterMetadataManager + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence ClusterMetadataManager } queuePersistenceClient struct { metricEmitter - persistence Queue + healthSignals aggregate.SignalAggregator[quotas.Request] + persistence Queue } ) @@ -84,68 +92,74 @@ var _ ClusterMetadataManager = (*clusterMetadataPersistenceClient)(nil) var _ Queue = (*queuePersistenceClient)(nil) // NewShardPersistenceMetricsClient creates a client to manage shards -func NewShardPersistenceMetricsClient(persistence ShardManager, metricsHandler metrics.Handler, logger log.Logger) ShardManager { +func NewShardPersistenceMetricsClient(persistence ShardManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ShardManager { return &shardPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, logger: logger, }, - persistence: persistence, + healthSignals: healthSignals, + persistence: persistence, } } // NewExecutionPersistenceMetricsClient creates a client to manage executions -func NewExecutionPersistenceMetricsClient(persistence ExecutionManager, metricsHandler metrics.Handler, logger log.Logger) ExecutionManager { +func NewExecutionPersistenceMetricsClient(persistence ExecutionManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ExecutionManager { return &executionPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, logger: logger, }, - persistence: persistence, + healthSignals: healthSignals, + persistence: persistence, } } // NewTaskPersistenceMetricsClient creates a client to manage tasks -func NewTaskPersistenceMetricsClient(persistence TaskManager, metricsHandler metrics.Handler, logger log.Logger) TaskManager { +func NewTaskPersistenceMetricsClient(persistence TaskManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) TaskManager { return &taskPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, logger: logger, }, - persistence: persistence, + healthSignals: healthSignals, + persistence: persistence, } } // NewMetadataPersistenceMetricsClient creates a MetadataManager client to manage metadata -func NewMetadataPersistenceMetricsClient(persistence MetadataManager, metricsHandler metrics.Handler, logger log.Logger) MetadataManager { +func NewMetadataPersistenceMetricsClient(persistence MetadataManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) MetadataManager { return &metadataPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, logger: logger, }, - persistence: persistence, + healthSignals: healthSignals, + persistence: persistence, } } // NewClusterMetadataPersistenceMetricsClient creates a ClusterMetadataManager client to manage cluster metadata -func NewClusterMetadataPersistenceMetricsClient(persistence ClusterMetadataManager, metricsHandler metrics.Handler, logger log.Logger) ClusterMetadataManager { +func NewClusterMetadataPersistenceMetricsClient(persistence ClusterMetadataManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ClusterMetadataManager { return &clusterMetadataPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, logger: logger, }, - persistence: persistence, + healthSignals: healthSignals, + persistence: persistence, } } // NewQueuePersistenceMetricsClient creates a client to manage queue -func NewQueuePersistenceMetricsClient(persistence Queue, metricsHandler metrics.Handler, logger log.Logger) Queue { +func NewQueuePersistenceMetricsClient(persistence Queue, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) Queue { return &queuePersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, logger: logger, }, - persistence: persistence, + healthSignals: healthSignals, + persistence: persistence, } } @@ -157,11 +171,7 @@ func (p *shardPersistenceClient) GetOrCreateShard( ctx context.Context, request *GetOrCreateShardRequest, ) (_ *GetOrCreateShardResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetOrCreateShardScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetOrCreateShardScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetOrCreateShard(ctx, request) } @@ -169,11 +179,7 @@ func (p *shardPersistenceClient) UpdateShard( ctx context.Context, request *UpdateShardRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceUpdateShardScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardInfo.GetShardId(), metrics.PersistenceUpdateShardScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.UpdateShard(ctx, request) } @@ -181,11 +187,7 @@ func (p *shardPersistenceClient) AssertShardOwnership( ctx context.Context, request *AssertShardOwnershipRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceAssertShardOwnershipScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAssertShardOwnershipScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.AssertShardOwnership(ctx, request) } @@ -205,11 +207,7 @@ func (p *executionPersistenceClient) CreateWorkflowExecution( ctx context.Context, request *CreateWorkflowExecutionRequest, ) (_ *CreateWorkflowExecutionResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceCreateWorkflowExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceCreateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.CreateWorkflowExecution(ctx, request) } @@ -217,11 +215,7 @@ func (p *executionPersistenceClient) GetWorkflowExecution( ctx context.Context, request *GetWorkflowExecutionRequest, ) (_ *GetWorkflowExecutionResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetWorkflowExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetWorkflowExecution(ctx, request) } @@ -229,11 +223,7 @@ func (p *executionPersistenceClient) SetWorkflowExecution( ctx context.Context, request *SetWorkflowExecutionRequest, ) (_ *SetWorkflowExecutionResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceSetWorkflowExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceSetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.SetWorkflowExecution(ctx, request) } @@ -241,11 +231,7 @@ func (p *executionPersistenceClient) UpdateWorkflowExecution( ctx context.Context, request *UpdateWorkflowExecutionRequest, ) (_ *UpdateWorkflowExecutionResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceUpdateWorkflowExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceUpdateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.UpdateWorkflowExecution(ctx, request) } @@ -253,11 +239,7 @@ func (p *executionPersistenceClient) ConflictResolveWorkflowExecution( ctx context.Context, request *ConflictResolveWorkflowExecutionRequest, ) (_ *ConflictResolveWorkflowExecutionResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceConflictResolveWorkflowExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceConflictResolveWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ConflictResolveWorkflowExecution(ctx, request) } @@ -265,11 +247,7 @@ func (p *executionPersistenceClient) DeleteWorkflowExecution( ctx context.Context, request *DeleteWorkflowExecutionRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteWorkflowExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteWorkflowExecution(ctx, request) } @@ -277,11 +255,7 @@ func (p *executionPersistenceClient) DeleteCurrentWorkflowExecution( ctx context.Context, request *DeleteCurrentWorkflowExecutionRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteCurrentWorkflowExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteCurrentWorkflowExecution(ctx, request) } @@ -289,11 +263,7 @@ func (p *executionPersistenceClient) GetCurrentExecution( ctx context.Context, request *GetCurrentExecutionRequest, ) (_ *GetCurrentExecutionResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetCurrentExecutionScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetCurrentExecutionScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetCurrentExecution(ctx, request) } @@ -301,11 +271,7 @@ func (p *executionPersistenceClient) ListConcreteExecutions( ctx context.Context, request *ListConcreteExecutionsRequest, ) (_ *ListConcreteExecutionsResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceListConcreteExecutionsScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceListConcreteExecutionsScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ListConcreteExecutions(ctx, request) } @@ -340,11 +306,7 @@ func (p *executionPersistenceClient) AddHistoryTasks( ctx context.Context, request *AddHistoryTasksRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceAddTasksScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAddTasksScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.AddHistoryTasks(ctx, request) } @@ -368,11 +330,7 @@ func (p *executionPersistenceClient) GetHistoryTasks( return nil, serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(operation, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetHistoryTasks(ctx, request) } @@ -396,11 +354,7 @@ func (p *executionPersistenceClient) CompleteHistoryTask( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(operation, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr) return p.persistence.CompleteHistoryTask(ctx, request) } @@ -424,11 +378,7 @@ func (p *executionPersistenceClient) RangeCompleteHistoryTasks( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(operation, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr) return p.persistence.RangeCompleteHistoryTasks(ctx, request) } @@ -436,11 +386,7 @@ func (p *executionPersistenceClient) PutReplicationTaskToDLQ( ctx context.Context, request *PutReplicationTaskToDLQRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistencePutReplicationTaskToDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistencePutReplicationTaskToDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.PutReplicationTaskToDLQ(ctx, request) } @@ -448,11 +394,7 @@ func (p *executionPersistenceClient) GetReplicationTasksFromDLQ( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ *GetHistoryTasksResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetReplicationTasksFromDLQ(ctx, request) } @@ -460,11 +402,7 @@ func (p *executionPersistenceClient) DeleteReplicationTaskFromDLQ( ctx context.Context, request *DeleteReplicationTaskFromDLQRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteReplicationTaskFromDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) } @@ -472,11 +410,7 @@ func (p *executionPersistenceClient) RangeDeleteReplicationTaskFromDLQ( ctx context.Context, request *RangeDeleteReplicationTaskFromDLQRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) } @@ -484,11 +418,7 @@ func (p *executionPersistenceClient) IsReplicationDLQEmpty( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ bool, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.IsReplicationDLQEmpty(ctx, request) } @@ -504,11 +434,7 @@ func (p *taskPersistenceClient) CreateTasks( ctx context.Context, request *CreateTasksRequest, ) (_ *CreateTasksResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceCreateTasksScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTasksScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.CreateTasks(ctx, request) } @@ -516,11 +442,7 @@ func (p *taskPersistenceClient) GetTasks( ctx context.Context, request *GetTasksRequest, ) (_ *GetTasksResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetTasksScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTasksScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetTasks(ctx, request) } @@ -528,11 +450,7 @@ func (p *taskPersistenceClient) CompleteTask( ctx context.Context, request *CompleteTaskRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceCompleteTaskScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTaskScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.CompleteTask(ctx, request) } @@ -540,11 +458,7 @@ func (p *taskPersistenceClient) CompleteTasksLessThan( ctx context.Context, request *CompleteTasksLessThanRequest, ) (_ int, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceCompleteTasksLessThanScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTasksLessThanScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.CompleteTasksLessThan(ctx, request) } @@ -552,11 +466,7 @@ func (p *taskPersistenceClient) CreateTaskQueue( ctx context.Context, request *CreateTaskQueueRequest, ) (_ *CreateTaskQueueResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceCreateTaskQueueScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.CreateTaskQueue(ctx, request) } @@ -564,11 +474,7 @@ func (p *taskPersistenceClient) UpdateTaskQueue( ctx context.Context, request *UpdateTaskQueueRequest, ) (_ *UpdateTaskQueueResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceUpdateTaskQueueScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.UpdateTaskQueue(ctx, request) } @@ -576,11 +482,7 @@ func (p *taskPersistenceClient) GetTaskQueue( ctx context.Context, request *GetTaskQueueRequest, ) (_ *GetTaskQueueResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetTaskQueueScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetTaskQueue(ctx, request) } @@ -588,11 +490,7 @@ func (p *taskPersistenceClient) ListTaskQueue( ctx context.Context, request *ListTaskQueueRequest, ) (_ *ListTaskQueueResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceListTaskQueueScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ListTaskQueue(ctx, request) } @@ -600,11 +498,7 @@ func (p *taskPersistenceClient) DeleteTaskQueue( ctx context.Context, request *DeleteTaskQueueRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteTaskQueueScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteTaskQueue(ctx, request) } @@ -620,11 +514,7 @@ func (p *metadataPersistenceClient) CreateNamespace( ctx context.Context, request *CreateNamespaceRequest, ) (_ *CreateNamespaceResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceCreateNamespaceScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateNamespaceScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.CreateNamespace(ctx, request) } @@ -632,11 +522,7 @@ func (p *metadataPersistenceClient) GetNamespace( ctx context.Context, request *GetNamespaceRequest, ) (_ *GetNamespaceResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetNamespaceScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetNamespaceScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetNamespace(ctx, request) } @@ -644,11 +530,7 @@ func (p *metadataPersistenceClient) UpdateNamespace( ctx context.Context, request *UpdateNamespaceRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceUpdateNamespaceScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateNamespaceScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.UpdateNamespace(ctx, request) } @@ -656,11 +538,7 @@ func (p *metadataPersistenceClient) RenameNamespace( ctx context.Context, request *RenameNamespaceRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceRenameNamespaceScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRenameNamespaceScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.RenameNamespace(ctx, request) } @@ -668,11 +546,7 @@ func (p *metadataPersistenceClient) DeleteNamespace( ctx context.Context, request *DeleteNamespaceRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteNamespace(ctx, request) } @@ -680,11 +554,7 @@ func (p *metadataPersistenceClient) DeleteNamespaceByName( ctx context.Context, request *DeleteNamespaceByNameRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceByNameScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceByNameScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteNamespaceByName(ctx, request) } @@ -692,22 +562,14 @@ func (p *metadataPersistenceClient) ListNamespaces( ctx context.Context, request *ListNamespacesRequest, ) (_ *ListNamespacesResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceListNamespacesScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListNamespacesScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ListNamespaces(ctx, request) } func (p *metadataPersistenceClient) GetMetadata( ctx context.Context, ) (_ *GetMetadataResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetMetadataScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetMetadataScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetMetadata(ctx) } @@ -720,11 +582,7 @@ func (p *executionPersistenceClient) AppendHistoryNodes( ctx context.Context, request *AppendHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceAppendHistoryNodesScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.AppendHistoryNodes(ctx, request) } @@ -733,11 +591,7 @@ func (p *executionPersistenceClient) AppendRawHistoryNodes( ctx context.Context, request *AppendRawHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceAppendRawHistoryNodesScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendRawHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.AppendRawHistoryNodes(ctx, request) } @@ -746,11 +600,7 @@ func (p *executionPersistenceClient) ReadHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ReadHistoryBranch(ctx, request) } @@ -758,11 +608,7 @@ func (p *executionPersistenceClient) ReadHistoryBranchReverse( ctx context.Context, request *ReadHistoryBranchReverseRequest, ) (_ *ReadHistoryBranchReverseResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchReverseScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchReverseScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ReadHistoryBranchReverse(ctx, request) } @@ -771,11 +617,7 @@ func (p *executionPersistenceClient) ReadHistoryBranchByBatch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchByBatchResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ReadHistoryBranchByBatch(ctx, request) } @@ -784,11 +626,7 @@ func (p *executionPersistenceClient) ReadRawHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadRawHistoryBranchResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceReadRawHistoryBranchScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadRawHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ReadRawHistoryBranch(ctx, request) } @@ -797,11 +635,7 @@ func (p *executionPersistenceClient) ForkHistoryBranch( ctx context.Context, request *ForkHistoryBranchRequest, ) (_ *ForkHistoryBranchResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceForkHistoryBranchScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceForkHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ForkHistoryBranch(ctx, request) } @@ -810,11 +644,7 @@ func (p *executionPersistenceClient) DeleteHistoryBranch( ctx context.Context, request *DeleteHistoryBranchRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteHistoryBranchScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteHistoryBranch(ctx, request) } @@ -823,11 +653,7 @@ func (p *executionPersistenceClient) TrimHistoryBranch( ctx context.Context, request *TrimHistoryBranchRequest, ) (_ *TrimHistoryBranchResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceTrimHistoryBranchScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceTrimHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.TrimHistoryBranch(ctx, request) } @@ -835,11 +661,7 @@ func (p *executionPersistenceClient) GetAllHistoryTreeBranches( ctx context.Context, request *GetAllHistoryTreeBranchesRequest, ) (_ *GetAllHistoryTreeBranchesResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetAllHistoryTreeBranchesScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAllHistoryTreeBranchesScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetAllHistoryTreeBranches(ctx, request) } @@ -848,11 +670,7 @@ func (p *executionPersistenceClient) GetHistoryTree( ctx context.Context, request *GetHistoryTreeRequest, ) (_ *GetHistoryTreeResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetHistoryTreeScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetHistoryTreeScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetHistoryTree(ctx, request) } @@ -867,11 +685,7 @@ func (p *queuePersistenceClient) EnqueueMessage( ctx context.Context, blob commonpb.DataBlob, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceEnqueueMessageScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.EnqueueMessage(ctx, blob) } @@ -880,11 +694,7 @@ func (p *queuePersistenceClient) ReadMessages( lastMessageID int64, maxCount int, ) (_ []*QueueMessage, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceReadQueueMessagesScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadQueueMessagesScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ReadMessages(ctx, lastMessageID, maxCount) } @@ -892,22 +702,14 @@ func (p *queuePersistenceClient) UpdateAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceUpdateAckLevelScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateAckLevelScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.UpdateAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetAckLevelScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAckLevelScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetAckLevels(ctx) } @@ -915,11 +717,7 @@ func (p *queuePersistenceClient) DeleteMessagesBefore( ctx context.Context, messageID int64, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteMessagesBeforeScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessagesBeforeScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteMessagesBefore(ctx, messageID) } @@ -927,11 +725,7 @@ func (p *queuePersistenceClient) EnqueueMessageToDLQ( ctx context.Context, blob commonpb.DataBlob, ) (_ int64, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceEnqueueMessageToDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageToDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.EnqueueMessageToDLQ(ctx, blob) } @@ -942,11 +736,7 @@ func (p *queuePersistenceClient) ReadMessagesFromDLQ( pageSize int, pageToken []byte, ) (_ []*QueueMessage, _ []byte, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceReadMessagesFromDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) } @@ -954,11 +744,7 @@ func (p *queuePersistenceClient) DeleteMessageFromDLQ( ctx context.Context, messageID int64, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteMessageFromDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessageFromDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteMessageFromDLQ(ctx, messageID) } @@ -967,11 +753,7 @@ func (p *queuePersistenceClient) RangeDeleteMessagesFromDLQ( firstMessageID int64, lastMessageID int64, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceRangeDeleteMessagesFromDLQScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRangeDeleteMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) } @@ -979,22 +761,14 @@ func (p *queuePersistenceClient) UpdateDLQAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceUpdateDLQAckLevelScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.UpdateDLQAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetDLQAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetDLQAckLevelScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetDLQAckLevels(ctx) } @@ -1010,22 +784,14 @@ func (p *clusterMetadataPersistenceClient) ListClusterMetadata( ctx context.Context, request *ListClusterMetadataRequest, ) (_ *ListClusterMetadataResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceListClusterMetadataScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.ListClusterMetadata(ctx, request) } func (p *clusterMetadataPersistenceClient) GetCurrentClusterMetadata( ctx context.Context, ) (_ *GetClusterMetadataResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetCurrentClusterMetadataScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetCurrentClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetCurrentClusterMetadata(ctx) } @@ -1033,11 +799,7 @@ func (p *clusterMetadataPersistenceClient) GetClusterMetadata( ctx context.Context, request *GetClusterMetadataRequest, ) (_ *GetClusterMetadataResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetClusterMetadataScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetClusterMetadata(ctx, request) } @@ -1045,11 +807,7 @@ func (p *clusterMetadataPersistenceClient) SaveClusterMetadata( ctx context.Context, request *SaveClusterMetadataRequest, ) (_ bool, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceSaveClusterMetadataScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceSaveClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.SaveClusterMetadata(ctx, request) } @@ -1057,11 +815,7 @@ func (p *clusterMetadataPersistenceClient) DeleteClusterMetadata( ctx context.Context, request *DeleteClusterMetadataRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceDeleteClusterMetadataScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.DeleteClusterMetadata(ctx, request) } @@ -1073,11 +827,7 @@ func (p *clusterMetadataPersistenceClient) GetClusterMembers( ctx context.Context, request *GetClusterMembersRequest, ) (_ *GetClusterMembersResponse, retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceGetClusterMembersScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMembersScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.GetClusterMembers(ctx, request) } @@ -1085,11 +835,7 @@ func (p *clusterMetadataPersistenceClient) UpsertClusterMembership( ctx context.Context, request *UpsertClusterMembershipRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceUpsertClusterMembershipScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpsertClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.UpsertClusterMembership(ctx, request) } @@ -1097,11 +843,7 @@ func (p *clusterMetadataPersistenceClient) PruneClusterMembership( ctx context.Context, request *PruneClusterMembershipRequest, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistencePruneClusterMembershipScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistencePruneClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.PruneClusterMembership(ctx, request) } @@ -1109,14 +851,35 @@ func (p *metadataPersistenceClient) InitializeSystemNamespaces( ctx context.Context, currentClusterName string, ) (retErr error) { - caller := headers.GetCallerInfo(ctx).CallerName - startTime := time.Now().UTC() - defer func() { - p.recordRequestMetrics(metrics.PersistenceInitializeSystemNamespaceScope, caller, startTime, retErr) - }() + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceInitializeSystemNamespaceScope, p.metricEmitter, p.healthSignals, retErr) return p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) } +func recordMetricsAndSignalsFn( + ctx context.Context, + shardID int32, + scope string, + emitter metricEmitter, + healthSignals aggregate.SignalAggregator[quotas.Request], + err error, +) func(error) { + startTime := time.Now().UTC() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := healthSignals.GetRecordFn(quotas.NewRequest( + scope, + RateLimitDefaultToken, + callerInfo.CallerName, + callerInfo.CallerType, + shardID, + callerInfo.CallOrigin, + )) + + return func(error) { + signalFn(err) + emitter.recordRequestMetrics(scope, callerInfo.CallerName, startTime, err) + } +} + func (p *metricEmitter) recordRequestMetrics(operation string, caller string, startTime time.Time, err error) { handler := p.metricsHandler.WithTags(metrics.OperationTag(operation), metrics.NamespaceTag(caller)) handler.Counter(metrics.PersistenceRequests.GetMetricName()).Record(1) From 27ec4c5bff272b538507d46dc42da4d5979aaaa6 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 21:01:55 -0700 Subject: [PATCH 15/36] cleanup --- .../aggregate/bench_moving_window_avg_test.go | 2 +- common/aggregate/moving_window_average.go | 16 +++++----- .../persistence_health_signal_aggregator.go | 29 ++++++++++--------- common/resource/fx.go | 2 +- 4 files changed, 25 insertions(+), 24 deletions(-) rename common/{aggregate => persistence/client}/persistence_health_signal_aggregator.go (86%) diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go index 50ad9d04359..706460c2c30 100644 --- a/common/aggregate/bench_moving_window_avg_test.go +++ b/common/aggregate/bench_moving_window_avg_test.go @@ -39,7 +39,7 @@ const ( ) func BenchmarkRingMovingWindowAvg(b *testing.B) { - avg := newMovingWindowAvgImpl(testWindowSize, testBufferSize) + avg := NewMovingWindowAvgImpl(testWindowSize, testBufferSize) for i := 0; i < b.N; i++ { avg.Record(rand.Int63()) avg.Average() diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index 178e1a94e01..01077de65b9 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -41,7 +41,7 @@ type ( timestamp time.Time } - movingWindowAvgImpl struct { + MovingWindowAvgImpl struct { sync.RWMutex windowSize time.Duration maxBufferSize int @@ -52,12 +52,12 @@ type ( } ) -func newMovingWindowAvgImpl( +func NewMovingWindowAvgImpl( windowSize time.Duration, maxBufferSize int, -) *movingWindowAvgImpl { +) *MovingWindowAvgImpl { buffer := ring.New(maxBufferSize) - return &movingWindowAvgImpl{ + return &MovingWindowAvgImpl{ windowSize: windowSize, maxBufferSize: maxBufferSize, head: buffer, @@ -65,7 +65,7 @@ func newMovingWindowAvgImpl( } } -func (a *movingWindowAvgImpl) Record(val int64) { +func (a *MovingWindowAvgImpl) Record(val int64) { a.Lock() defer a.Unlock() @@ -80,7 +80,7 @@ func (a *movingWindowAvgImpl) Record(val int64) { a.count++ } -func (a *movingWindowAvgImpl) Average() float64 { +func (a *MovingWindowAvgImpl) Average() float64 { a.expireOldValues() a.RLock() @@ -92,7 +92,7 @@ func (a *movingWindowAvgImpl) Average() float64 { return float64(a.sum / int64(a.count)) } -func (a *movingWindowAvgImpl) expireOldValues() { +func (a *MovingWindowAvgImpl) expireOldValues() { a.Lock() defer a.Unlock() @@ -106,7 +106,7 @@ func (a *movingWindowAvgImpl) expireOldValues() { } } -func (a *movingWindowAvgImpl) expireOneLocked() { +func (a *MovingWindowAvgImpl) expireOneLocked() { if data, ok := a.head.Value.(timestampedData); ok { a.sum -= data.value a.count-- diff --git a/common/aggregate/persistence_health_signal_aggregator.go b/common/persistence/client/persistence_health_signal_aggregator.go similarity index 86% rename from common/aggregate/persistence_health_signal_aggregator.go rename to common/persistence/client/persistence_health_signal_aggregator.go index ed1bf3a2ac4..55f98588b91 100644 --- a/common/aggregate/persistence_health_signal_aggregator.go +++ b/common/persistence/client/persistence_health_signal_aggregator.go @@ -22,29 +22,30 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -package aggregate +package client import ( "sync" "sync/atomic" "time" + "go.temporal.io/server/common/aggregate" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/quotas" ) type ( - PersistenceHealthSignalAggregator[K SignalKey] struct { - keyMapper SignalKeyMapperFn[quotas.Request, K] + PersistenceHealthSignalAggregator[K aggregate.SignalKey] struct { + keyMapper aggregate.SignalKeyMapperFn[quotas.Request, K] totalRequests map[K]*atomic.Int64 totalRequestsLock sync.RWMutex - latencyAverages map[K]MovingWindowAverage + latencyAverages map[K]aggregate.MovingWindowAverage latencyLock sync.RWMutex - errorRatios map[K]MovingWindowAverage + errorRatios map[K]aggregate.MovingWindowAverage errorLock sync.RWMutex windowSize time.Duration @@ -60,8 +61,8 @@ type ( } ) -func NewPersistenceHealthSignalAggregator[K SignalKey]( - keyMapper SignalKeyMapperFn[quotas.Request, K], +func NewPersistenceHealthSignalAggregator[K aggregate.SignalKey]( + keyMapper aggregate.SignalKeyMapperFn[quotas.Request, K], windowSize time.Duration, maxBufferSize int, metricsHandler metrics.Handler, @@ -69,8 +70,8 @@ func NewPersistenceHealthSignalAggregator[K SignalKey]( return &PersistenceHealthSignalAggregator[K]{ keyMapper: keyMapper, totalRequests: make(map[K]*atomic.Int64), - latencyAverages: make(map[K]MovingWindowAverage), - errorRatios: make(map[K]MovingWindowAverage), + latencyAverages: make(map[K]aggregate.MovingWindowAverage), + errorRatios: make(map[K]aggregate.MovingWindowAverage), windowSize: windowSize, maxBufferSize: maxBufferSize, metricsHandler: metricsHandler, @@ -124,19 +125,19 @@ func (s *PersistenceHealthSignalAggregator[_]) ErrorRatio(req quotas.Request) fl return s.getOrInitErrorRatio(req).Average() } -func (s *PersistenceHealthSignalAggregator[_]) getOrInitLatencyAverage(req quotas.Request) MovingWindowAverage { +func (s *PersistenceHealthSignalAggregator[_]) getOrInitLatencyAverage(req quotas.Request) aggregate.MovingWindowAverage { return s.getOrInitAverage(req, &s.latencyAverages, &s.latencyLock) } -func (s *PersistenceHealthSignalAggregator[_]) getOrInitErrorRatio(req quotas.Request) MovingWindowAverage { +func (s *PersistenceHealthSignalAggregator[_]) getOrInitErrorRatio(req quotas.Request) aggregate.MovingWindowAverage { return s.getOrInitAverage(req, &s.errorRatios, &s.errorLock) } func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( req quotas.Request, - averages *map[K]MovingWindowAverage, + averages *map[K]aggregate.MovingWindowAverage, lock *sync.RWMutex, -) MovingWindowAverage { +) aggregate.MovingWindowAverage { key := s.keyMapper(req) lock.RLock() @@ -146,7 +147,7 @@ func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( return avg } - newAvg := newMovingWindowAvgImpl(s.windowSize, s.maxBufferSize) + newAvg := aggregate.NewMovingWindowAvgImpl(s.windowSize, s.maxBufferSize) lock.Lock() defer lock.Unlock() diff --git a/common/resource/fx.go b/common/resource/fx.go index f443864f4db..3c0f956b6ef 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -221,7 +221,7 @@ func PersistenceHealthSignalAggregatorProvider( dynamicCollection *dynamicconfig.Collection, metricsHandler metrics.Handler, ) aggregate.SignalAggregator[quotas.Request] { - return aggregate.NewPerShardPerNsHealthSignalAggregator( + return persistenceClient.NewPerShardPerNsHealthSignalAggregator( dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), metricsHandler, From df989eb429e08068315479dbec3f540b741677ea Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 21:03:49 -0700 Subject: [PATCH 16/36] cleanup --- .../client/persistence_health_signal_aggregator.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common/persistence/client/persistence_health_signal_aggregator.go b/common/persistence/client/persistence_health_signal_aggregator.go index 55f98588b91..bebe0bf9529 100644 --- a/common/persistence/client/persistence_health_signal_aggregator.go +++ b/common/persistence/client/persistence_health_signal_aggregator.go @@ -126,22 +126,22 @@ func (s *PersistenceHealthSignalAggregator[_]) ErrorRatio(req quotas.Request) fl } func (s *PersistenceHealthSignalAggregator[_]) getOrInitLatencyAverage(req quotas.Request) aggregate.MovingWindowAverage { - return s.getOrInitAverage(req, &s.latencyAverages, &s.latencyLock) + return s.getOrInitAverage(req, s.latencyAverages, &s.latencyLock) } func (s *PersistenceHealthSignalAggregator[_]) getOrInitErrorRatio(req quotas.Request) aggregate.MovingWindowAverage { - return s.getOrInitAverage(req, &s.errorRatios, &s.errorLock) + return s.getOrInitAverage(req, s.errorRatios, &s.errorLock) } func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( req quotas.Request, - averages *map[K]aggregate.MovingWindowAverage, + averages map[K]aggregate.MovingWindowAverage, lock *sync.RWMutex, ) aggregate.MovingWindowAverage { key := s.keyMapper(req) lock.RLock() - avg, ok := (*averages)[key] + avg, ok := averages[key] lock.RUnlock() if ok { return avg @@ -152,12 +152,12 @@ func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( lock.Lock() defer lock.Unlock() - avg, ok = (*averages)[key] + avg, ok = averages[key] if ok { return avg } - (*averages)[key] = newAvg + averages[key] = newAvg return newAvg } From f960396f4c9a37017caf278530196409b2d6f7af Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Tue, 23 May 2023 21:16:29 -0700 Subject: [PATCH 17/36] linting --- common/aggregate/moving_window_average.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index 01077de65b9..6d3960e834e 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -97,12 +97,12 @@ func (a *MovingWindowAvgImpl) expireOldValues() { defer a.Unlock() for ; a.head != a.tail; a.head = a.head.Next() { - if data, ok := a.head.Value.(timestampedData); ok && time.Since(data.timestamp) > a.windowSize { - a.sum -= data.value - a.count-- - } else { + data, ok := a.head.Value.(timestampedData) + if !ok || time.Since(data.timestamp) < a.windowSize { break } + a.sum -= data.value + a.count-- } } From 3c409ce78310a86cee852d89735defd74cfb2eda Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 04:51:20 -0700 Subject: [PATCH 18/36] remove generics --- common/aggregate/signal_aggregator.go | 40 ---- common/persistence/client/factory.go | 18 +- common/persistence/client/fx.go | 4 +- .../persistence_health_signal_aggregator.go | 202 ------------------ .../persistence/health_signal_aggregator.go | 157 ++++++++++++++ .../noop_health_signal_aggregator.go} | 18 +- .../persistence-tests/persistenceTestBase.go | 7 +- .../persistence/persistenceMetricClients.go | 27 ++- common/resource/fx.go | 5 +- 9 files changed, 194 insertions(+), 284 deletions(-) delete mode 100644 common/aggregate/signal_aggregator.go delete mode 100644 common/persistence/client/persistence_health_signal_aggregator.go create mode 100644 common/persistence/health_signal_aggregator.go rename common/{aggregate/noop_signal_aggregator.go => persistence/noop_health_signal_aggregator.go} (71%) diff --git a/common/aggregate/signal_aggregator.go b/common/aggregate/signal_aggregator.go deleted file mode 100644 index abcfb4dfb72..00000000000 --- a/common/aggregate/signal_aggregator.go +++ /dev/null @@ -1,40 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package aggregate - -type ( - SignalKey interface { - comparable - GetNamespace() string - } - - SignalKeyMapperFn[T any, K SignalKey] func(origin T) K - - SignalAggregator[T any] interface { - GetRecordFn(origin T) func(err error) - AverageLatency(origin T) float64 - ErrorRatio(origin T) float64 - } -) diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index 662c507452f..b8b60018a3b 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -26,8 +26,6 @@ package client import ( "go.temporal.io/api/serviceerror" - "go.temporal.io/server/common/aggregate" - "go.temporal.io/server/common" "go.temporal.io/server/common/config" "go.temporal.io/server/common/log" @@ -70,7 +68,7 @@ type ( logger log.Logger clusterName string ratelimiter quotas.RequestRateLimiter - healthSignals aggregate.SignalAggregator[quotas.Request] + healthSignals p.HealthSignalAggregator } ) @@ -89,7 +87,7 @@ func NewFactory( clusterName string, metricsHandler metrics.Handler, logger log.Logger, - healthSignals aggregate.SignalAggregator[quotas.Request], + healthSignals p.HealthSignalAggregator, ) Factory { return &factoryImpl{ dataStoreFactory: dataStoreFactory, @@ -118,7 +116,7 @@ func (f *factoryImpl) NewTaskManager() (p.TaskManager, error) { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler } else { - f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewTaskPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } @@ -140,7 +138,7 @@ func (f *factoryImpl) NewShardManager() (p.ShardManager, error) { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler } else { - f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewShardPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } @@ -163,7 +161,7 @@ func (f *factoryImpl) NewMetadataManager() (p.MetadataManager, error) { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler } else { - f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } @@ -186,7 +184,7 @@ func (f *factoryImpl) NewClusterMetadataManager() (p.ClusterMetadataManager, err if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler } else { - f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewClusterMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } @@ -209,7 +207,7 @@ func (f *factoryImpl) NewExecutionManager() (p.ExecutionManager, error) { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler } else { - f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewExecutionPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } @@ -230,7 +228,7 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler } else { - f.healthSignals = aggregate.NoopPersistenceHealthSignalAggregator + f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewQueuePersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } diff --git a/common/persistence/client/fx.go b/common/persistence/client/fx.go index 58a77127478..0bd5d4ac239 100644 --- a/common/persistence/client/fx.go +++ b/common/persistence/client/fx.go @@ -25,7 +25,7 @@ package client import ( - "go.temporal.io/server/common/aggregate" + "go.temporal.io/server/common/persistence" "go.uber.org/fx" "go.temporal.io/server/common/cluster" @@ -58,7 +58,7 @@ type ( ServiceName primitives.ServiceName MetricsHandler metrics.Handler Logger log.Logger - HealthSignals aggregate.SignalAggregator[quotas.Request] + HealthSignals persistence.HealthSignalAggregator } FactoryProviderFn func(NewFactoryParams) Factory diff --git a/common/persistence/client/persistence_health_signal_aggregator.go b/common/persistence/client/persistence_health_signal_aggregator.go deleted file mode 100644 index bebe0bf9529..00000000000 --- a/common/persistence/client/persistence_health_signal_aggregator.go +++ /dev/null @@ -1,202 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package client - -import ( - "sync" - "sync/atomic" - "time" - - "go.temporal.io/server/common/aggregate" - "go.temporal.io/server/common/dynamicconfig" - "go.temporal.io/server/common/metrics" - "go.temporal.io/server/common/quotas" -) - -type ( - PersistenceHealthSignalAggregator[K aggregate.SignalKey] struct { - keyMapper aggregate.SignalKeyMapperFn[quotas.Request, K] - - totalRequests map[K]*atomic.Int64 - totalRequestsLock sync.RWMutex - - latencyAverages map[K]aggregate.MovingWindowAverage - latencyLock sync.RWMutex - - errorRatios map[K]aggregate.MovingWindowAverage - errorLock sync.RWMutex - - windowSize time.Duration - maxBufferSize int - - metricsHandler metrics.Handler - emitMetricsTimer *time.Ticker - } - - perShardPerNsHealthSignalKey struct { - namespace string - shardID int32 - } -) - -func NewPersistenceHealthSignalAggregator[K aggregate.SignalKey]( - keyMapper aggregate.SignalKeyMapperFn[quotas.Request, K], - windowSize time.Duration, - maxBufferSize int, - metricsHandler metrics.Handler, -) *PersistenceHealthSignalAggregator[K] { - return &PersistenceHealthSignalAggregator[K]{ - keyMapper: keyMapper, - totalRequests: make(map[K]*atomic.Int64), - latencyAverages: make(map[K]aggregate.MovingWindowAverage), - errorRatios: make(map[K]aggregate.MovingWindowAverage), - windowSize: windowSize, - maxBufferSize: maxBufferSize, - metricsHandler: metricsHandler, - emitMetricsTimer: time.NewTicker(windowSize), - } -} - -func NewPerShardPerNsHealthSignalAggregator( - windowSize dynamicconfig.DurationPropertyFn, - maxBufferSize dynamicconfig.IntPropertyFn, - metricsHandler metrics.Handler, -) *PersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey] { - return NewPersistenceHealthSignalAggregator[perShardPerNsHealthSignalKey]( - perShardPerNsKeyMapperFn, - windowSize(), - maxBufferSize(), - metricsHandler, - ) -} - -func perShardPerNsKeyMapperFn(req quotas.Request) perShardPerNsHealthSignalKey { - return perShardPerNsHealthSignalKey{ - namespace: req.Caller, - shardID: req.CallerSegment, - } -} - -func (k perShardPerNsHealthSignalKey) GetNamespace() string { - return k.namespace -} - -func (s *PersistenceHealthSignalAggregator[_]) GetRecordFn(req quotas.Request) func(err error) { - start := time.Now() - return func(err error) { - s.getOrInitRequestCount(req).Add(1) - s.getOrInitLatencyAverage(req).Record(time.Since(start).Milliseconds()) - errorRatio := s.getOrInitErrorRatio(req) - if err != nil { - errorRatio.Record(1) - } else { - errorRatio.Record(0) - } - } -} - -func (s *PersistenceHealthSignalAggregator[_]) AverageLatency(req quotas.Request) float64 { - return s.getOrInitLatencyAverage(req).Average() -} - -func (s *PersistenceHealthSignalAggregator[_]) ErrorRatio(req quotas.Request) float64 { - return s.getOrInitErrorRatio(req).Average() -} - -func (s *PersistenceHealthSignalAggregator[_]) getOrInitLatencyAverage(req quotas.Request) aggregate.MovingWindowAverage { - return s.getOrInitAverage(req, s.latencyAverages, &s.latencyLock) -} - -func (s *PersistenceHealthSignalAggregator[_]) getOrInitErrorRatio(req quotas.Request) aggregate.MovingWindowAverage { - return s.getOrInitAverage(req, s.errorRatios, &s.errorLock) -} - -func (s *PersistenceHealthSignalAggregator[K]) getOrInitAverage( - req quotas.Request, - averages map[K]aggregate.MovingWindowAverage, - lock *sync.RWMutex, -) aggregate.MovingWindowAverage { - key := s.keyMapper(req) - - lock.RLock() - avg, ok := averages[key] - lock.RUnlock() - if ok { - return avg - } - - newAvg := aggregate.NewMovingWindowAvgImpl(s.windowSize, s.maxBufferSize) - - lock.Lock() - defer lock.Unlock() - - avg, ok = averages[key] - if ok { - return avg - } - - averages[key] = newAvg - return newAvg -} - -func (s *PersistenceHealthSignalAggregator[_]) getOrInitRequestCount( - req quotas.Request, -) *atomic.Int64 { - key := s.keyMapper(req) - - s.totalRequestsLock.RLock() - count, ok := s.totalRequests[key] - s.totalRequestsLock.RUnlock() - if ok { - return count - } - - newCount := &atomic.Int64{} - - s.totalRequestsLock.Lock() - defer s.totalRequestsLock.Unlock() - - count, ok = s.totalRequests[key] - if ok { - return count - } - - s.totalRequests[key] = newCount - return newCount -} - -func (s *PersistenceHealthSignalAggregator[_]) emitMetricsLoop() { - for { - select { - case <-s.emitMetricsTimer.C: - s.totalRequestsLock.RLock() - for key, count := range s.totalRequests { - shardRPS := int64(float64(count.Swap(0)) / s.windowSize.Seconds()) - s.metricsHandler.Histogram(metrics.PersistenceShardRPS.GetMetricName(), metrics.PersistenceShardRPS.GetMetricUnit()).Record(shardRPS, metrics.NamespaceTag(key.GetNamespace())) - } - s.totalRequestsLock.RUnlock() - } - } -} diff --git a/common/persistence/health_signal_aggregator.go b/common/persistence/health_signal_aggregator.go new file mode 100644 index 00000000000..09b47194163 --- /dev/null +++ b/common/persistence/health_signal_aggregator.go @@ -0,0 +1,157 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package persistence + +import ( + "sync" + "sync/atomic" + "time" + + "go.temporal.io/server/common/aggregate" + "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/metrics" + "go.temporal.io/server/common/quotas" +) + +type ( + HealthSignalAggregator interface { + GetRecordFn(req quotas.Request) func(err error) + AverageLatency() float64 + ErrorRatio() float64 + } + + HealthSignalAggregatorImpl struct { + requestsPerShardAndNs map[perShardPerNamespaceKey]*atomic.Int64 + requestsLock sync.RWMutex + + latencyAverage aggregate.MovingWindowAverage + errorRatio aggregate.MovingWindowAverage + + metricsHandler metrics.Handler + emitMetricsInterval time.Duration + emitMetricsTimer *time.Ticker + } + + perShardPerNamespaceKey struct { + namespace string + shard int32 + } +) + +func NewHealthSignalAggregatorImpl( + windowSize dynamicconfig.DurationPropertyFn, + maxBufferSize dynamicconfig.IntPropertyFn, + metricsHandler metrics.Handler, +) *HealthSignalAggregatorImpl { + return &HealthSignalAggregatorImpl{ + requestsPerShardAndNs: make(map[perShardPerNamespaceKey]*atomic.Int64), + latencyAverage: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), + errorRatio: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), + metricsHandler: metricsHandler, + emitMetricsInterval: windowSize(), + emitMetricsTimer: time.NewTicker(windowSize()), + } +} + +func (s *HealthSignalAggregatorImpl) GetRecordFn(req quotas.Request) func(err error) { + start := time.Now() + return func(err error) { + s.getOrInitRequestCount(req).Add(1) + s.latencyAverage.Record(time.Since(start).Milliseconds()) + if isUnhealthyError(err) { + s.errorRatio.Record(1) + } else { + s.errorRatio.Record(0) + } + } +} + +func (s *HealthSignalAggregatorImpl) AverageLatency() float64 { + return s.latencyAverage.Average() +} + +func (s *HealthSignalAggregatorImpl) ErrorRatio() float64 { + return s.errorRatio.Average() +} + +func (s *HealthSignalAggregatorImpl) getOrInitRequestCount(req quotas.Request) *atomic.Int64 { + key := getPerShardPerNsKey(req) + + s.requestsLock.RLock() + count, ok := s.requestsPerShardAndNs[key] + s.requestsLock.RUnlock() + if ok { + return count + } + + newCount := &atomic.Int64{} + + s.requestsLock.Lock() + defer s.requestsLock.Unlock() + + count, ok = s.requestsPerShardAndNs[key] + if ok { + return count + } + + s.requestsPerShardAndNs[key] = newCount + return newCount +} + +func (s *HealthSignalAggregatorImpl) emitMetricsLoop() { + for { + select { + case <-s.emitMetricsTimer.C: + s.requestsLock.RLock() + for key, count := range s.requestsPerShardAndNs { + shardRPS := int64(float64(count.Swap(0)) / s.emitMetricsInterval.Seconds()) + s.metricsHandler.Histogram(metrics.PersistenceShardRPS.GetMetricName(), metrics.PersistenceShardRPS.GetMetricUnit()).Record(shardRPS, metrics.NamespaceTag(key.namespace)) + } + s.requestsLock.RUnlock() + } + } +} + +func getPerShardPerNsKey(req quotas.Request) perShardPerNamespaceKey { + return perShardPerNamespaceKey{ + namespace: req.Caller, + shard: req.CallerSegment, + } +} + +func isUnhealthyError(err error) bool { + if err == nil { + return false + } + switch err.(type) { + case *ShardOwnershipLostError, + *AppendHistoryTimeoutError, + *TimeoutError: + return true + + default: + return false + } +} diff --git a/common/aggregate/noop_signal_aggregator.go b/common/persistence/noop_health_signal_aggregator.go similarity index 71% rename from common/aggregate/noop_signal_aggregator.go rename to common/persistence/noop_health_signal_aggregator.go index 3ff947b3ae0..915b35599bf 100644 --- a/common/aggregate/noop_signal_aggregator.go +++ b/common/persistence/noop_health_signal_aggregator.go @@ -22,26 +22,28 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -package aggregate +package persistence -import "go.temporal.io/server/common/quotas" +import ( + "go.temporal.io/server/common/quotas" +) -var NoopPersistenceHealthSignalAggregator SignalAggregator[quotas.Request] = newNoopSignalAggregator[quotas.Request]() +var NoopHealthSignalAggregator HealthSignalAggregator = newNoopSignalAggregator() type ( - noopSignalAggregator[T any] struct{} + noopSignalAggregator struct{} ) -func newNoopSignalAggregator[T any]() *noopSignalAggregator[T] { return &noopSignalAggregator[T]{} } +func newNoopSignalAggregator() *noopSignalAggregator { return &noopSignalAggregator{} } -func (a *noopSignalAggregator[T]) GetRecordFn(T) func(error) { +func (a *noopSignalAggregator) GetRecordFn(_ quotas.Request) func(error) { return func(error) {} } -func (a *noopSignalAggregator[T]) AverageLatency(T) float64 { +func (a *noopSignalAggregator) AverageLatency() float64 { return 0 } -func (*noopSignalAggregator[T]) ErrorRatio(T) float64 { +func (*noopSignalAggregator) ErrorRatio() float64 { return 0 } diff --git a/common/persistence/persistence-tests/persistenceTestBase.go b/common/persistence/persistence-tests/persistenceTestBase.go index 7a6f9d0cabd..02a88204a09 100644 --- a/common/persistence/persistence-tests/persistenceTestBase.go +++ b/common/persistence/persistence-tests/persistenceTestBase.go @@ -33,9 +33,6 @@ import ( "time" "github.com/stretchr/testify/suite" - "go.temporal.io/server/common/aggregate" - "go.temporal.io/server/common/quotas" - persistencespb "go.temporal.io/server/api/persistence/v1" replicationspb "go.temporal.io/server/api/replication/v1" "go.temporal.io/server/common" @@ -100,7 +97,7 @@ type ( TaskIDGenerator TransferTaskIDGenerator ClusterMetadata cluster.Metadata SearchAttributesManager searchattribute.Manager - PersistenceHealthSignals aggregate.SignalAggregator[quotas.Request] + PersistenceHealthSignals persistence.HealthSignalAggregator ReadLevel int64 ReplicationReadLevel int64 DefaultTestCluster PersistenceTestCluster @@ -205,7 +202,7 @@ func (s *TestBase) Setup(clusterMetadataConfig *cluster.Config) { s.Logger, metrics.NoopMetricsHandler, ) - factory := client.NewFactory(dataStoreFactory, &cfg, nil, serialization.NewSerializer(), clusterName, metrics.NoopMetricsHandler, s.Logger, aggregate.NoopPersistenceHealthSignalAggregator) + factory := client.NewFactory(dataStoreFactory, &cfg, nil, serialization.NewSerializer(), clusterName, metrics.NoopMetricsHandler, s.Logger, persistence.NoopHealthSignalAggregator) s.TaskMgr, err = factory.NewTaskManager() s.fatalOnError("NewTaskManager", err) diff --git a/common/persistence/persistenceMetricClients.go b/common/persistence/persistenceMetricClients.go index fc30f76871e..bff163ea231 100644 --- a/common/persistence/persistenceMetricClients.go +++ b/common/persistence/persistenceMetricClients.go @@ -31,7 +31,6 @@ import ( commonpb "go.temporal.io/api/common/v1" "go.temporal.io/api/serviceerror" - "go.temporal.io/server/common/aggregate" "go.temporal.io/server/common/quotas" "go.temporal.io/server/common/headers" @@ -49,37 +48,37 @@ type ( shardPersistenceClient struct { metricEmitter - healthSignals aggregate.SignalAggregator[quotas.Request] + healthSignals HealthSignalAggregator persistence ShardManager } executionPersistenceClient struct { metricEmitter - healthSignals aggregate.SignalAggregator[quotas.Request] + healthSignals HealthSignalAggregator persistence ExecutionManager } taskPersistenceClient struct { metricEmitter - healthSignals aggregate.SignalAggregator[quotas.Request] + healthSignals HealthSignalAggregator persistence TaskManager } metadataPersistenceClient struct { metricEmitter - healthSignals aggregate.SignalAggregator[quotas.Request] + healthSignals HealthSignalAggregator persistence MetadataManager } clusterMetadataPersistenceClient struct { metricEmitter - healthSignals aggregate.SignalAggregator[quotas.Request] + healthSignals HealthSignalAggregator persistence ClusterMetadataManager } queuePersistenceClient struct { metricEmitter - healthSignals aggregate.SignalAggregator[quotas.Request] + healthSignals HealthSignalAggregator persistence Queue } ) @@ -92,7 +91,7 @@ var _ ClusterMetadataManager = (*clusterMetadataPersistenceClient)(nil) var _ Queue = (*queuePersistenceClient)(nil) // NewShardPersistenceMetricsClient creates a client to manage shards -func NewShardPersistenceMetricsClient(persistence ShardManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ShardManager { +func NewShardPersistenceMetricsClient(persistence ShardManager, metricsHandler metrics.Handler, healthSignals HealthSignalAggregator, logger log.Logger) ShardManager { return &shardPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, @@ -104,7 +103,7 @@ func NewShardPersistenceMetricsClient(persistence ShardManager, metricsHandler m } // NewExecutionPersistenceMetricsClient creates a client to manage executions -func NewExecutionPersistenceMetricsClient(persistence ExecutionManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ExecutionManager { +func NewExecutionPersistenceMetricsClient(persistence ExecutionManager, metricsHandler metrics.Handler, healthSignals HealthSignalAggregator, logger log.Logger) ExecutionManager { return &executionPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, @@ -116,7 +115,7 @@ func NewExecutionPersistenceMetricsClient(persistence ExecutionManager, metricsH } // NewTaskPersistenceMetricsClient creates a client to manage tasks -func NewTaskPersistenceMetricsClient(persistence TaskManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) TaskManager { +func NewTaskPersistenceMetricsClient(persistence TaskManager, metricsHandler metrics.Handler, healthSignals HealthSignalAggregator, logger log.Logger) TaskManager { return &taskPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, @@ -128,7 +127,7 @@ func NewTaskPersistenceMetricsClient(persistence TaskManager, metricsHandler met } // NewMetadataPersistenceMetricsClient creates a MetadataManager client to manage metadata -func NewMetadataPersistenceMetricsClient(persistence MetadataManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) MetadataManager { +func NewMetadataPersistenceMetricsClient(persistence MetadataManager, metricsHandler metrics.Handler, healthSignals HealthSignalAggregator, logger log.Logger) MetadataManager { return &metadataPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, @@ -140,7 +139,7 @@ func NewMetadataPersistenceMetricsClient(persistence MetadataManager, metricsHan } // NewClusterMetadataPersistenceMetricsClient creates a ClusterMetadataManager client to manage cluster metadata -func NewClusterMetadataPersistenceMetricsClient(persistence ClusterMetadataManager, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) ClusterMetadataManager { +func NewClusterMetadataPersistenceMetricsClient(persistence ClusterMetadataManager, metricsHandler metrics.Handler, healthSignals HealthSignalAggregator, logger log.Logger) ClusterMetadataManager { return &clusterMetadataPersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, @@ -152,7 +151,7 @@ func NewClusterMetadataPersistenceMetricsClient(persistence ClusterMetadataManag } // NewQueuePersistenceMetricsClient creates a client to manage queue -func NewQueuePersistenceMetricsClient(persistence Queue, metricsHandler metrics.Handler, healthSignals aggregate.SignalAggregator[quotas.Request], logger log.Logger) Queue { +func NewQueuePersistenceMetricsClient(persistence Queue, metricsHandler metrics.Handler, healthSignals HealthSignalAggregator, logger log.Logger) Queue { return &queuePersistenceClient{ metricEmitter: metricEmitter{ metricsHandler: metricsHandler, @@ -860,7 +859,7 @@ func recordMetricsAndSignalsFn( shardID int32, scope string, emitter metricEmitter, - healthSignals aggregate.SignalAggregator[quotas.Request], + healthSignals HealthSignalAggregator, err error, ) func(error) { startTime := time.Now().UTC() diff --git a/common/resource/fx.go b/common/resource/fx.go index 3c0f956b6ef..a10cf60adcf 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -31,7 +31,6 @@ import ( "os" "time" - "go.temporal.io/server/common/aggregate" "go.uber.org/fx" "google.golang.org/grpc" "google.golang.org/grpc/health" @@ -220,8 +219,8 @@ func NamespaceRegistryProvider( func PersistenceHealthSignalAggregatorProvider( dynamicCollection *dynamicconfig.Collection, metricsHandler metrics.Handler, -) aggregate.SignalAggregator[quotas.Request] { - return persistenceClient.NewPerShardPerNsHealthSignalAggregator( +) persistence.HealthSignalAggregator { + return persistence.NewHealthSignalAggregatorImpl( dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), metricsHandler, From fe2955dc28b8a0ea5ad10826f232c10d4f379b98 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 06:09:00 -0700 Subject: [PATCH 19/36] cleanup --- common/persistence/client/factory.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index b8b60018a3b..188a598a796 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -115,7 +115,7 @@ func (f *factoryImpl) NewTaskManager() (p.TaskManager, error) { if f.metricsHandler != nil || f.healthSignals != nil { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler - } else { + } else if f.healthSignals == nil { f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewTaskPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) @@ -137,7 +137,7 @@ func (f *factoryImpl) NewShardManager() (p.ShardManager, error) { if f.metricsHandler != nil || f.healthSignals != nil { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler - } else { + } else if f.healthSignals == nil { f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewShardPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) @@ -160,7 +160,7 @@ func (f *factoryImpl) NewMetadataManager() (p.MetadataManager, error) { if f.metricsHandler != nil || f.healthSignals != nil { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler - } else { + } else if f.healthSignals == nil { f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) @@ -183,7 +183,7 @@ func (f *factoryImpl) NewClusterMetadataManager() (p.ClusterMetadataManager, err if f.metricsHandler != nil || f.healthSignals != nil { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler - } else { + } else if f.healthSignals == nil { f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewClusterMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) @@ -206,7 +206,7 @@ func (f *factoryImpl) NewExecutionManager() (p.ExecutionManager, error) { if f.metricsHandler != nil || f.healthSignals != nil { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler - } else { + } else if f.healthSignals == nil { f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewExecutionPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) @@ -227,7 +227,7 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu if f.metricsHandler != nil || f.healthSignals != nil { if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler - } else { + } else if f.healthSignals == nil { f.healthSignals = p.NoopHealthSignalAggregator } result = p.NewQueuePersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) From f412f60a75655c1206f0f90dae329896b7506200 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 06:19:12 -0700 Subject: [PATCH 20/36] fix deferred metric fn --- .../persistence/persistenceMetricClients.go | 141 +++++++++--------- 1 file changed, 70 insertions(+), 71 deletions(-) diff --git a/common/persistence/persistenceMetricClients.go b/common/persistence/persistenceMetricClients.go index bff163ea231..1c7bf405ff0 100644 --- a/common/persistence/persistenceMetricClients.go +++ b/common/persistence/persistenceMetricClients.go @@ -170,7 +170,7 @@ func (p *shardPersistenceClient) GetOrCreateShard( ctx context.Context, request *GetOrCreateShardRequest, ) (_ *GetOrCreateShardResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetOrCreateShardScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetOrCreateShardScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetOrCreateShard(ctx, request) } @@ -178,7 +178,7 @@ func (p *shardPersistenceClient) UpdateShard( ctx context.Context, request *UpdateShardRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardInfo.GetShardId(), metrics.PersistenceUpdateShardScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardInfo.GetShardId(), metrics.PersistenceUpdateShardScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.UpdateShard(ctx, request) } @@ -186,7 +186,7 @@ func (p *shardPersistenceClient) AssertShardOwnership( ctx context.Context, request *AssertShardOwnershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAssertShardOwnershipScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAssertShardOwnershipScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.AssertShardOwnership(ctx, request) } @@ -206,7 +206,7 @@ func (p *executionPersistenceClient) CreateWorkflowExecution( ctx context.Context, request *CreateWorkflowExecutionRequest, ) (_ *CreateWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceCreateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceCreateWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.CreateWorkflowExecution(ctx, request) } @@ -214,7 +214,7 @@ func (p *executionPersistenceClient) GetWorkflowExecution( ctx context.Context, request *GetWorkflowExecutionRequest, ) (_ *GetWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetWorkflowExecution(ctx, request) } @@ -222,7 +222,7 @@ func (p *executionPersistenceClient) SetWorkflowExecution( ctx context.Context, request *SetWorkflowExecutionRequest, ) (_ *SetWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceSetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceSetWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.SetWorkflowExecution(ctx, request) } @@ -230,7 +230,7 @@ func (p *executionPersistenceClient) UpdateWorkflowExecution( ctx context.Context, request *UpdateWorkflowExecutionRequest, ) (_ *UpdateWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceUpdateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceUpdateWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.UpdateWorkflowExecution(ctx, request) } @@ -238,7 +238,7 @@ func (p *executionPersistenceClient) ConflictResolveWorkflowExecution( ctx context.Context, request *ConflictResolveWorkflowExecutionRequest, ) (_ *ConflictResolveWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceConflictResolveWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceConflictResolveWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ConflictResolveWorkflowExecution(ctx, request) } @@ -246,7 +246,7 @@ func (p *executionPersistenceClient) DeleteWorkflowExecution( ctx context.Context, request *DeleteWorkflowExecutionRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteWorkflowExecution(ctx, request) } @@ -254,7 +254,7 @@ func (p *executionPersistenceClient) DeleteCurrentWorkflowExecution( ctx context.Context, request *DeleteCurrentWorkflowExecutionRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteCurrentWorkflowExecution(ctx, request) } @@ -262,7 +262,7 @@ func (p *executionPersistenceClient) GetCurrentExecution( ctx context.Context, request *GetCurrentExecutionRequest, ) (_ *GetCurrentExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetCurrentExecutionScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetCurrentExecutionScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetCurrentExecution(ctx, request) } @@ -270,7 +270,7 @@ func (p *executionPersistenceClient) ListConcreteExecutions( ctx context.Context, request *ListConcreteExecutionsRequest, ) (_ *ListConcreteExecutionsResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceListConcreteExecutionsScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceListConcreteExecutionsScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ListConcreteExecutions(ctx, request) } @@ -305,7 +305,7 @@ func (p *executionPersistenceClient) AddHistoryTasks( ctx context.Context, request *AddHistoryTasksRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAddTasksScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAddTasksScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.AddHistoryTasks(ctx, request) } @@ -329,7 +329,7 @@ func (p *executionPersistenceClient) GetHistoryTasks( return nil, serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetHistoryTasks(ctx, request) } @@ -353,7 +353,7 @@ func (p *executionPersistenceClient) CompleteHistoryTask( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.CompleteHistoryTask(ctx, request) } @@ -377,7 +377,7 @@ func (p *executionPersistenceClient) RangeCompleteHistoryTasks( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.RangeCompleteHistoryTasks(ctx, request) } @@ -385,7 +385,7 @@ func (p *executionPersistenceClient) PutReplicationTaskToDLQ( ctx context.Context, request *PutReplicationTaskToDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistencePutReplicationTaskToDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistencePutReplicationTaskToDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.PutReplicationTaskToDLQ(ctx, request) } @@ -393,7 +393,7 @@ func (p *executionPersistenceClient) GetReplicationTasksFromDLQ( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ *GetHistoryTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetReplicationTasksFromDLQ(ctx, request) } @@ -401,7 +401,7 @@ func (p *executionPersistenceClient) DeleteReplicationTaskFromDLQ( ctx context.Context, request *DeleteReplicationTaskFromDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) } @@ -409,7 +409,7 @@ func (p *executionPersistenceClient) RangeDeleteReplicationTaskFromDLQ( ctx context.Context, request *RangeDeleteReplicationTaskFromDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) } @@ -417,7 +417,7 @@ func (p *executionPersistenceClient) IsReplicationDLQEmpty( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ bool, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.IsReplicationDLQEmpty(ctx, request) } @@ -433,7 +433,7 @@ func (p *taskPersistenceClient) CreateTasks( ctx context.Context, request *CreateTasksRequest, ) (_ *CreateTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTasksScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTasksScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.CreateTasks(ctx, request) } @@ -441,7 +441,7 @@ func (p *taskPersistenceClient) GetTasks( ctx context.Context, request *GetTasksRequest, ) (_ *GetTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTasksScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTasksScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetTasks(ctx, request) } @@ -449,7 +449,7 @@ func (p *taskPersistenceClient) CompleteTask( ctx context.Context, request *CompleteTaskRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTaskScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTaskScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.CompleteTask(ctx, request) } @@ -457,7 +457,7 @@ func (p *taskPersistenceClient) CompleteTasksLessThan( ctx context.Context, request *CompleteTasksLessThanRequest, ) (_ int, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTasksLessThanScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTasksLessThanScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.CompleteTasksLessThan(ctx, request) } @@ -465,7 +465,7 @@ func (p *taskPersistenceClient) CreateTaskQueue( ctx context.Context, request *CreateTaskQueueRequest, ) (_ *CreateTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.CreateTaskQueue(ctx, request) } @@ -473,7 +473,7 @@ func (p *taskPersistenceClient) UpdateTaskQueue( ctx context.Context, request *UpdateTaskQueueRequest, ) (_ *UpdateTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.UpdateTaskQueue(ctx, request) } @@ -481,7 +481,7 @@ func (p *taskPersistenceClient) GetTaskQueue( ctx context.Context, request *GetTaskQueueRequest, ) (_ *GetTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetTaskQueue(ctx, request) } @@ -489,7 +489,7 @@ func (p *taskPersistenceClient) ListTaskQueue( ctx context.Context, request *ListTaskQueueRequest, ) (_ *ListTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ListTaskQueue(ctx, request) } @@ -497,7 +497,7 @@ func (p *taskPersistenceClient) DeleteTaskQueue( ctx context.Context, request *DeleteTaskQueueRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteTaskQueueScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteTaskQueue(ctx, request) } @@ -513,7 +513,7 @@ func (p *metadataPersistenceClient) CreateNamespace( ctx context.Context, request *CreateNamespaceRequest, ) (_ *CreateNamespaceResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateNamespaceScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.CreateNamespace(ctx, request) } @@ -521,7 +521,7 @@ func (p *metadataPersistenceClient) GetNamespace( ctx context.Context, request *GetNamespaceRequest, ) (_ *GetNamespaceResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetNamespaceScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetNamespace(ctx, request) } @@ -529,7 +529,7 @@ func (p *metadataPersistenceClient) UpdateNamespace( ctx context.Context, request *UpdateNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateNamespaceScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.UpdateNamespace(ctx, request) } @@ -537,7 +537,7 @@ func (p *metadataPersistenceClient) RenameNamespace( ctx context.Context, request *RenameNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRenameNamespaceScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRenameNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.RenameNamespace(ctx, request) } @@ -545,7 +545,7 @@ func (p *metadataPersistenceClient) DeleteNamespace( ctx context.Context, request *DeleteNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteNamespace(ctx, request) } @@ -553,7 +553,7 @@ func (p *metadataPersistenceClient) DeleteNamespaceByName( ctx context.Context, request *DeleteNamespaceByNameRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceByNameScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceByNameScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteNamespaceByName(ctx, request) } @@ -561,14 +561,14 @@ func (p *metadataPersistenceClient) ListNamespaces( ctx context.Context, request *ListNamespacesRequest, ) (_ *ListNamespacesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListNamespacesScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListNamespacesScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ListNamespaces(ctx, request) } func (p *metadataPersistenceClient) GetMetadata( ctx context.Context, ) (_ *GetMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetMetadataScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetMetadataScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetMetadata(ctx) } @@ -581,7 +581,7 @@ func (p *executionPersistenceClient) AppendHistoryNodes( ctx context.Context, request *AppendHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendHistoryNodesScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.AppendHistoryNodes(ctx, request) } @@ -590,7 +590,7 @@ func (p *executionPersistenceClient) AppendRawHistoryNodes( ctx context.Context, request *AppendRawHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendRawHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendRawHistoryNodesScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.AppendRawHistoryNodes(ctx, request) } @@ -599,7 +599,7 @@ func (p *executionPersistenceClient) ReadHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ReadHistoryBranch(ctx, request) } @@ -607,7 +607,7 @@ func (p *executionPersistenceClient) ReadHistoryBranchReverse( ctx context.Context, request *ReadHistoryBranchReverseRequest, ) (_ *ReadHistoryBranchReverseResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchReverseScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchReverseScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ReadHistoryBranchReverse(ctx, request) } @@ -616,7 +616,7 @@ func (p *executionPersistenceClient) ReadHistoryBranchByBatch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchByBatchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ReadHistoryBranchByBatch(ctx, request) } @@ -625,7 +625,7 @@ func (p *executionPersistenceClient) ReadRawHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadRawHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadRawHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadRawHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ReadRawHistoryBranch(ctx, request) } @@ -634,7 +634,7 @@ func (p *executionPersistenceClient) ForkHistoryBranch( ctx context.Context, request *ForkHistoryBranchRequest, ) (_ *ForkHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceForkHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceForkHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ForkHistoryBranch(ctx, request) } @@ -643,7 +643,7 @@ func (p *executionPersistenceClient) DeleteHistoryBranch( ctx context.Context, request *DeleteHistoryBranchRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteHistoryBranch(ctx, request) } @@ -652,7 +652,7 @@ func (p *executionPersistenceClient) TrimHistoryBranch( ctx context.Context, request *TrimHistoryBranchRequest, ) (_ *TrimHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceTrimHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceTrimHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.TrimHistoryBranch(ctx, request) } @@ -660,7 +660,7 @@ func (p *executionPersistenceClient) GetAllHistoryTreeBranches( ctx context.Context, request *GetAllHistoryTreeBranchesRequest, ) (_ *GetAllHistoryTreeBranchesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAllHistoryTreeBranchesScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAllHistoryTreeBranchesScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetAllHistoryTreeBranches(ctx, request) } @@ -669,7 +669,7 @@ func (p *executionPersistenceClient) GetHistoryTree( ctx context.Context, request *GetHistoryTreeRequest, ) (_ *GetHistoryTreeResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetHistoryTreeScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetHistoryTreeScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetHistoryTree(ctx, request) } @@ -684,7 +684,7 @@ func (p *queuePersistenceClient) EnqueueMessage( ctx context.Context, blob commonpb.DataBlob, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.EnqueueMessage(ctx, blob) } @@ -693,7 +693,7 @@ func (p *queuePersistenceClient) ReadMessages( lastMessageID int64, maxCount int, ) (_ []*QueueMessage, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadQueueMessagesScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadQueueMessagesScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ReadMessages(ctx, lastMessageID, maxCount) } @@ -701,14 +701,14 @@ func (p *queuePersistenceClient) UpdateAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateAckLevelScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.UpdateAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAckLevelScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetAckLevels(ctx) } @@ -716,7 +716,7 @@ func (p *queuePersistenceClient) DeleteMessagesBefore( ctx context.Context, messageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessagesBeforeScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessagesBeforeScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteMessagesBefore(ctx, messageID) } @@ -724,7 +724,7 @@ func (p *queuePersistenceClient) EnqueueMessageToDLQ( ctx context.Context, blob commonpb.DataBlob, ) (_ int64, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageToDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageToDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.EnqueueMessageToDLQ(ctx, blob) } @@ -735,7 +735,7 @@ func (p *queuePersistenceClient) ReadMessagesFromDLQ( pageSize int, pageToken []byte, ) (_ []*QueueMessage, _ []byte, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadMessagesFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) } @@ -743,7 +743,7 @@ func (p *queuePersistenceClient) DeleteMessageFromDLQ( ctx context.Context, messageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessageFromDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessageFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteMessageFromDLQ(ctx, messageID) } @@ -752,7 +752,7 @@ func (p *queuePersistenceClient) RangeDeleteMessagesFromDLQ( firstMessageID int64, lastMessageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRangeDeleteMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRangeDeleteMessagesFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) } @@ -760,14 +760,14 @@ func (p *queuePersistenceClient) UpdateDLQAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateDLQAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.UpdateDLQAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetDLQAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetDLQAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetDLQAckLevels(ctx) } @@ -783,14 +783,14 @@ func (p *clusterMetadataPersistenceClient) ListClusterMetadata( ctx context.Context, request *ListClusterMetadataRequest, ) (_ *ListClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.ListClusterMetadata(ctx, request) } func (p *clusterMetadataPersistenceClient) GetCurrentClusterMetadata( ctx context.Context, ) (_ *GetClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetCurrentClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetCurrentClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetCurrentClusterMetadata(ctx) } @@ -798,7 +798,7 @@ func (p *clusterMetadataPersistenceClient) GetClusterMetadata( ctx context.Context, request *GetClusterMetadataRequest, ) (_ *GetClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetClusterMetadata(ctx, request) } @@ -806,7 +806,7 @@ func (p *clusterMetadataPersistenceClient) SaveClusterMetadata( ctx context.Context, request *SaveClusterMetadataRequest, ) (_ bool, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceSaveClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceSaveClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.SaveClusterMetadata(ctx, request) } @@ -814,7 +814,7 @@ func (p *clusterMetadataPersistenceClient) DeleteClusterMetadata( ctx context.Context, request *DeleteClusterMetadataRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.DeleteClusterMetadata(ctx, request) } @@ -826,7 +826,7 @@ func (p *clusterMetadataPersistenceClient) GetClusterMembers( ctx context.Context, request *GetClusterMembersRequest, ) (_ *GetClusterMembersResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMembersScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMembersScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.GetClusterMembers(ctx, request) } @@ -834,7 +834,7 @@ func (p *clusterMetadataPersistenceClient) UpsertClusterMembership( ctx context.Context, request *UpsertClusterMembershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpsertClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpsertClusterMembershipScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.UpsertClusterMembership(ctx, request) } @@ -842,7 +842,7 @@ func (p *clusterMetadataPersistenceClient) PruneClusterMembership( ctx context.Context, request *PruneClusterMembershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistencePruneClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistencePruneClusterMembershipScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.PruneClusterMembership(ctx, request) } @@ -850,7 +850,7 @@ func (p *metadataPersistenceClient) InitializeSystemNamespaces( ctx context.Context, currentClusterName string, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceInitializeSystemNamespaceScope, p.metricEmitter, p.healthSignals, retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceInitializeSystemNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) return p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) } @@ -860,7 +860,6 @@ func recordMetricsAndSignalsFn( scope string, emitter metricEmitter, healthSignals HealthSignalAggregator, - err error, ) func(error) { startTime := time.Now().UTC() callerInfo := headers.GetCallerInfo(ctx) @@ -873,7 +872,7 @@ func recordMetricsAndSignalsFn( callerInfo.CallOrigin, )) - return func(error) { + return func(err error) { signalFn(err) emitter.recordRequestMetrics(scope, callerInfo.CallerName, startTime, err) } From 3caed40d38800e4c6526c4f58f04761afaa05c17 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 06:36:32 -0700 Subject: [PATCH 21/36] fix defer metric fn --- .../persistence/persistenceMetricClients.go | 143 +++++++++--------- 1 file changed, 72 insertions(+), 71 deletions(-) diff --git a/common/persistence/persistenceMetricClients.go b/common/persistence/persistenceMetricClients.go index 1c7bf405ff0..74eb8849054 100644 --- a/common/persistence/persistenceMetricClients.go +++ b/common/persistence/persistenceMetricClients.go @@ -170,7 +170,7 @@ func (p *shardPersistenceClient) GetOrCreateShard( ctx context.Context, request *GetOrCreateShardRequest, ) (_ *GetOrCreateShardResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetOrCreateShardScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetOrCreateShardScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetOrCreateShard(ctx, request) } @@ -178,7 +178,7 @@ func (p *shardPersistenceClient) UpdateShard( ctx context.Context, request *UpdateShardRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardInfo.GetShardId(), metrics.PersistenceUpdateShardScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardInfo.GetShardId(), metrics.PersistenceUpdateShardScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.UpdateShard(ctx, request) } @@ -186,7 +186,7 @@ func (p *shardPersistenceClient) AssertShardOwnership( ctx context.Context, request *AssertShardOwnershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAssertShardOwnershipScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAssertShardOwnershipScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.AssertShardOwnership(ctx, request) } @@ -206,7 +206,7 @@ func (p *executionPersistenceClient) CreateWorkflowExecution( ctx context.Context, request *CreateWorkflowExecutionRequest, ) (_ *CreateWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceCreateWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceCreateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.CreateWorkflowExecution(ctx, request) } @@ -214,7 +214,7 @@ func (p *executionPersistenceClient) GetWorkflowExecution( ctx context.Context, request *GetWorkflowExecutionRequest, ) (_ *GetWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetWorkflowExecution(ctx, request) } @@ -222,7 +222,7 @@ func (p *executionPersistenceClient) SetWorkflowExecution( ctx context.Context, request *SetWorkflowExecutionRequest, ) (_ *SetWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceSetWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceSetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.SetWorkflowExecution(ctx, request) } @@ -230,7 +230,7 @@ func (p *executionPersistenceClient) UpdateWorkflowExecution( ctx context.Context, request *UpdateWorkflowExecutionRequest, ) (_ *UpdateWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceUpdateWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceUpdateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.UpdateWorkflowExecution(ctx, request) } @@ -238,7 +238,7 @@ func (p *executionPersistenceClient) ConflictResolveWorkflowExecution( ctx context.Context, request *ConflictResolveWorkflowExecutionRequest, ) (_ *ConflictResolveWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceConflictResolveWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceConflictResolveWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ConflictResolveWorkflowExecution(ctx, request) } @@ -246,7 +246,7 @@ func (p *executionPersistenceClient) DeleteWorkflowExecution( ctx context.Context, request *DeleteWorkflowExecutionRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteWorkflowExecution(ctx, request) } @@ -254,7 +254,7 @@ func (p *executionPersistenceClient) DeleteCurrentWorkflowExecution( ctx context.Context, request *DeleteCurrentWorkflowExecutionRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteCurrentWorkflowExecution(ctx, request) } @@ -262,7 +262,7 @@ func (p *executionPersistenceClient) GetCurrentExecution( ctx context.Context, request *GetCurrentExecutionRequest, ) (_ *GetCurrentExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetCurrentExecutionScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetCurrentExecutionScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetCurrentExecution(ctx, request) } @@ -270,7 +270,7 @@ func (p *executionPersistenceClient) ListConcreteExecutions( ctx context.Context, request *ListConcreteExecutionsRequest, ) (_ *ListConcreteExecutionsResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceListConcreteExecutionsScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceListConcreteExecutionsScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ListConcreteExecutions(ctx, request) } @@ -305,7 +305,7 @@ func (p *executionPersistenceClient) AddHistoryTasks( ctx context.Context, request *AddHistoryTasksRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAddTasksScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAddTasksScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.AddHistoryTasks(ctx, request) } @@ -329,7 +329,7 @@ func (p *executionPersistenceClient) GetHistoryTasks( return nil, serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetHistoryTasks(ctx, request) } @@ -353,7 +353,7 @@ func (p *executionPersistenceClient) CompleteHistoryTask( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.CompleteHistoryTask(ctx, request) } @@ -377,7 +377,7 @@ func (p *executionPersistenceClient) RangeCompleteHistoryTasks( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.RangeCompleteHistoryTasks(ctx, request) } @@ -385,7 +385,7 @@ func (p *executionPersistenceClient) PutReplicationTaskToDLQ( ctx context.Context, request *PutReplicationTaskToDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistencePutReplicationTaskToDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistencePutReplicationTaskToDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.PutReplicationTaskToDLQ(ctx, request) } @@ -393,7 +393,7 @@ func (p *executionPersistenceClient) GetReplicationTasksFromDLQ( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ *GetHistoryTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetReplicationTasksFromDLQ(ctx, request) } @@ -401,7 +401,7 @@ func (p *executionPersistenceClient) DeleteReplicationTaskFromDLQ( ctx context.Context, request *DeleteReplicationTaskFromDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) } @@ -409,7 +409,7 @@ func (p *executionPersistenceClient) RangeDeleteReplicationTaskFromDLQ( ctx context.Context, request *RangeDeleteReplicationTaskFromDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) } @@ -417,7 +417,7 @@ func (p *executionPersistenceClient) IsReplicationDLQEmpty( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ bool, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.IsReplicationDLQEmpty(ctx, request) } @@ -433,7 +433,7 @@ func (p *taskPersistenceClient) CreateTasks( ctx context.Context, request *CreateTasksRequest, ) (_ *CreateTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTasksScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTasksScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.CreateTasks(ctx, request) } @@ -441,7 +441,7 @@ func (p *taskPersistenceClient) GetTasks( ctx context.Context, request *GetTasksRequest, ) (_ *GetTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTasksScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTasksScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetTasks(ctx, request) } @@ -449,7 +449,7 @@ func (p *taskPersistenceClient) CompleteTask( ctx context.Context, request *CompleteTaskRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTaskScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTaskScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.CompleteTask(ctx, request) } @@ -457,7 +457,7 @@ func (p *taskPersistenceClient) CompleteTasksLessThan( ctx context.Context, request *CompleteTasksLessThanRequest, ) (_ int, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTasksLessThanScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTasksLessThanScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.CompleteTasksLessThan(ctx, request) } @@ -465,7 +465,7 @@ func (p *taskPersistenceClient) CreateTaskQueue( ctx context.Context, request *CreateTaskQueueRequest, ) (_ *CreateTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.CreateTaskQueue(ctx, request) } @@ -473,7 +473,7 @@ func (p *taskPersistenceClient) UpdateTaskQueue( ctx context.Context, request *UpdateTaskQueueRequest, ) (_ *UpdateTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.UpdateTaskQueue(ctx, request) } @@ -481,7 +481,7 @@ func (p *taskPersistenceClient) GetTaskQueue( ctx context.Context, request *GetTaskQueueRequest, ) (_ *GetTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetTaskQueue(ctx, request) } @@ -489,7 +489,7 @@ func (p *taskPersistenceClient) ListTaskQueue( ctx context.Context, request *ListTaskQueueRequest, ) (_ *ListTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ListTaskQueue(ctx, request) } @@ -497,7 +497,7 @@ func (p *taskPersistenceClient) DeleteTaskQueue( ctx context.Context, request *DeleteTaskQueueRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteTaskQueueScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteTaskQueue(ctx, request) } @@ -513,7 +513,7 @@ func (p *metadataPersistenceClient) CreateNamespace( ctx context.Context, request *CreateNamespaceRequest, ) (_ *CreateNamespaceResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.CreateNamespace(ctx, request) } @@ -521,7 +521,7 @@ func (p *metadataPersistenceClient) GetNamespace( ctx context.Context, request *GetNamespaceRequest, ) (_ *GetNamespaceResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetNamespace(ctx, request) } @@ -529,7 +529,7 @@ func (p *metadataPersistenceClient) UpdateNamespace( ctx context.Context, request *UpdateNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.UpdateNamespace(ctx, request) } @@ -537,7 +537,7 @@ func (p *metadataPersistenceClient) RenameNamespace( ctx context.Context, request *RenameNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRenameNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRenameNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.RenameNamespace(ctx, request) } @@ -545,7 +545,7 @@ func (p *metadataPersistenceClient) DeleteNamespace( ctx context.Context, request *DeleteNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteNamespace(ctx, request) } @@ -553,7 +553,7 @@ func (p *metadataPersistenceClient) DeleteNamespaceByName( ctx context.Context, request *DeleteNamespaceByNameRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceByNameScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceByNameScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteNamespaceByName(ctx, request) } @@ -561,14 +561,14 @@ func (p *metadataPersistenceClient) ListNamespaces( ctx context.Context, request *ListNamespacesRequest, ) (_ *ListNamespacesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListNamespacesScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListNamespacesScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ListNamespaces(ctx, request) } func (p *metadataPersistenceClient) GetMetadata( ctx context.Context, ) (_ *GetMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetMetadataScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetMetadataScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetMetadata(ctx) } @@ -581,7 +581,7 @@ func (p *executionPersistenceClient) AppendHistoryNodes( ctx context.Context, request *AppendHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendHistoryNodesScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.AppendHistoryNodes(ctx, request) } @@ -590,7 +590,7 @@ func (p *executionPersistenceClient) AppendRawHistoryNodes( ctx context.Context, request *AppendRawHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendRawHistoryNodesScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendRawHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.AppendRawHistoryNodes(ctx, request) } @@ -599,7 +599,7 @@ func (p *executionPersistenceClient) ReadHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ReadHistoryBranch(ctx, request) } @@ -607,7 +607,7 @@ func (p *executionPersistenceClient) ReadHistoryBranchReverse( ctx context.Context, request *ReadHistoryBranchReverseRequest, ) (_ *ReadHistoryBranchReverseResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchReverseScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchReverseScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ReadHistoryBranchReverse(ctx, request) } @@ -616,7 +616,7 @@ func (p *executionPersistenceClient) ReadHistoryBranchByBatch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchByBatchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ReadHistoryBranchByBatch(ctx, request) } @@ -625,7 +625,7 @@ func (p *executionPersistenceClient) ReadRawHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadRawHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadRawHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadRawHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ReadRawHistoryBranch(ctx, request) } @@ -634,7 +634,7 @@ func (p *executionPersistenceClient) ForkHistoryBranch( ctx context.Context, request *ForkHistoryBranchRequest, ) (_ *ForkHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceForkHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceForkHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ForkHistoryBranch(ctx, request) } @@ -643,7 +643,7 @@ func (p *executionPersistenceClient) DeleteHistoryBranch( ctx context.Context, request *DeleteHistoryBranchRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteHistoryBranch(ctx, request) } @@ -652,7 +652,7 @@ func (p *executionPersistenceClient) TrimHistoryBranch( ctx context.Context, request *TrimHistoryBranchRequest, ) (_ *TrimHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceTrimHistoryBranchScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceTrimHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.TrimHistoryBranch(ctx, request) } @@ -660,7 +660,7 @@ func (p *executionPersistenceClient) GetAllHistoryTreeBranches( ctx context.Context, request *GetAllHistoryTreeBranchesRequest, ) (_ *GetAllHistoryTreeBranchesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAllHistoryTreeBranchesScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAllHistoryTreeBranchesScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetAllHistoryTreeBranches(ctx, request) } @@ -669,7 +669,7 @@ func (p *executionPersistenceClient) GetHistoryTree( ctx context.Context, request *GetHistoryTreeRequest, ) (_ *GetHistoryTreeResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetHistoryTreeScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetHistoryTreeScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetHistoryTree(ctx, request) } @@ -684,7 +684,7 @@ func (p *queuePersistenceClient) EnqueueMessage( ctx context.Context, blob commonpb.DataBlob, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.EnqueueMessage(ctx, blob) } @@ -693,7 +693,7 @@ func (p *queuePersistenceClient) ReadMessages( lastMessageID int64, maxCount int, ) (_ []*QueueMessage, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadQueueMessagesScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadQueueMessagesScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ReadMessages(ctx, lastMessageID, maxCount) } @@ -701,14 +701,14 @@ func (p *queuePersistenceClient) UpdateAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.UpdateAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetAckLevels(ctx) } @@ -716,7 +716,7 @@ func (p *queuePersistenceClient) DeleteMessagesBefore( ctx context.Context, messageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessagesBeforeScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessagesBeforeScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteMessagesBefore(ctx, messageID) } @@ -724,7 +724,7 @@ func (p *queuePersistenceClient) EnqueueMessageToDLQ( ctx context.Context, blob commonpb.DataBlob, ) (_ int64, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageToDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageToDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.EnqueueMessageToDLQ(ctx, blob) } @@ -735,7 +735,7 @@ func (p *queuePersistenceClient) ReadMessagesFromDLQ( pageSize int, pageToken []byte, ) (_ []*QueueMessage, _ []byte, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadMessagesFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) } @@ -743,7 +743,7 @@ func (p *queuePersistenceClient) DeleteMessageFromDLQ( ctx context.Context, messageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessageFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessageFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteMessageFromDLQ(ctx, messageID) } @@ -752,7 +752,7 @@ func (p *queuePersistenceClient) RangeDeleteMessagesFromDLQ( firstMessageID int64, lastMessageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRangeDeleteMessagesFromDLQScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRangeDeleteMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) } @@ -760,14 +760,14 @@ func (p *queuePersistenceClient) UpdateDLQAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateDLQAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.UpdateDLQAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetDLQAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetDLQAckLevelScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetDLQAckLevels(ctx) } @@ -783,14 +783,14 @@ func (p *clusterMetadataPersistenceClient) ListClusterMetadata( ctx context.Context, request *ListClusterMetadataRequest, ) (_ *ListClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.ListClusterMetadata(ctx, request) } func (p *clusterMetadataPersistenceClient) GetCurrentClusterMetadata( ctx context.Context, ) (_ *GetClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetCurrentClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetCurrentClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetCurrentClusterMetadata(ctx) } @@ -798,7 +798,7 @@ func (p *clusterMetadataPersistenceClient) GetClusterMetadata( ctx context.Context, request *GetClusterMetadataRequest, ) (_ *GetClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetClusterMetadata(ctx, request) } @@ -806,7 +806,7 @@ func (p *clusterMetadataPersistenceClient) SaveClusterMetadata( ctx context.Context, request *SaveClusterMetadataRequest, ) (_ bool, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceSaveClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceSaveClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.SaveClusterMetadata(ctx, request) } @@ -814,7 +814,7 @@ func (p *clusterMetadataPersistenceClient) DeleteClusterMetadata( ctx context.Context, request *DeleteClusterMetadataRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteClusterMetadataScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.DeleteClusterMetadata(ctx, request) } @@ -826,7 +826,7 @@ func (p *clusterMetadataPersistenceClient) GetClusterMembers( ctx context.Context, request *GetClusterMembersRequest, ) (_ *GetClusterMembersResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMembersScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMembersScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.GetClusterMembers(ctx, request) } @@ -834,7 +834,7 @@ func (p *clusterMetadataPersistenceClient) UpsertClusterMembership( ctx context.Context, request *UpsertClusterMembershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpsertClusterMembershipScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpsertClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.UpsertClusterMembership(ctx, request) } @@ -842,7 +842,7 @@ func (p *clusterMetadataPersistenceClient) PruneClusterMembership( ctx context.Context, request *PruneClusterMembershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistencePruneClusterMembershipScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistencePruneClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.PruneClusterMembership(ctx, request) } @@ -850,7 +850,7 @@ func (p *metadataPersistenceClient) InitializeSystemNamespaces( ctx context.Context, currentClusterName string, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceInitializeSystemNamespaceScope, p.metricEmitter, p.healthSignals)(retErr) + defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceInitializeSystemNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() return p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) } @@ -860,7 +860,8 @@ func recordMetricsAndSignalsFn( scope string, emitter metricEmitter, healthSignals HealthSignalAggregator, -) func(error) { + err error, +) func() { startTime := time.Now().UTC() callerInfo := headers.GetCallerInfo(ctx) signalFn := healthSignals.GetRecordFn(quotas.NewRequest( @@ -872,7 +873,7 @@ func recordMetricsAndSignalsFn( callerInfo.CallOrigin, )) - return func(err error) { + return func() { signalFn(err) emitter.recordRequestMetrics(scope, callerInfo.CallerName, startTime, err) } From d64ff92bddf4979c63948885ff310f33732f2602 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 07:27:45 -0700 Subject: [PATCH 22/36] fix clients --- .../persistence/persistenceMetricClients.go | 596 +++++++++++++++--- 1 file changed, 500 insertions(+), 96 deletions(-) diff --git a/common/persistence/persistenceMetricClients.go b/common/persistence/persistenceMetricClients.go index 74eb8849054..5e50dfd4a44 100644 --- a/common/persistence/persistenceMetricClients.go +++ b/common/persistence/persistenceMetricClients.go @@ -31,12 +31,11 @@ import ( commonpb "go.temporal.io/api/common/v1" "go.temporal.io/api/serviceerror" - "go.temporal.io/server/common/quotas" - "go.temporal.io/server/common/headers" "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" + "go.temporal.io/server/common/quotas" "go.temporal.io/server/service/history/tasks" ) @@ -170,7 +169,13 @@ func (p *shardPersistenceClient) GetOrCreateShard( ctx context.Context, request *GetOrCreateShardRequest, ) (_ *GetOrCreateShardResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetOrCreateShardScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetOrCreateShardScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetOrCreateShardScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetOrCreateShard(ctx, request) } @@ -178,7 +183,13 @@ func (p *shardPersistenceClient) UpdateShard( ctx context.Context, request *UpdateShardRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardInfo.GetShardId(), metrics.PersistenceUpdateShardScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateShardScope, request.ShardInfo.GetShardId(), p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateShardScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.UpdateShard(ctx, request) } @@ -186,7 +197,13 @@ func (p *shardPersistenceClient) AssertShardOwnership( ctx context.Context, request *AssertShardOwnershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAssertShardOwnershipScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceAssertShardOwnershipScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceAssertShardOwnershipScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.AssertShardOwnership(ctx, request) } @@ -206,7 +223,13 @@ func (p *executionPersistenceClient) CreateWorkflowExecution( ctx context.Context, request *CreateWorkflowExecutionRequest, ) (_ *CreateWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceCreateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateWorkflowExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceCreateWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.CreateWorkflowExecution(ctx, request) } @@ -214,7 +237,13 @@ func (p *executionPersistenceClient) GetWorkflowExecution( ctx context.Context, request *GetWorkflowExecutionRequest, ) (_ *GetWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetWorkflowExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetWorkflowExecution(ctx, request) } @@ -222,7 +251,13 @@ func (p *executionPersistenceClient) SetWorkflowExecution( ctx context.Context, request *SetWorkflowExecutionRequest, ) (_ *SetWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceSetWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceSetWorkflowExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceSetWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.SetWorkflowExecution(ctx, request) } @@ -230,7 +265,13 @@ func (p *executionPersistenceClient) UpdateWorkflowExecution( ctx context.Context, request *UpdateWorkflowExecutionRequest, ) (_ *UpdateWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceUpdateWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateWorkflowExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.UpdateWorkflowExecution(ctx, request) } @@ -238,7 +279,13 @@ func (p *executionPersistenceClient) ConflictResolveWorkflowExecution( ctx context.Context, request *ConflictResolveWorkflowExecutionRequest, ) (_ *ConflictResolveWorkflowExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceConflictResolveWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceConflictResolveWorkflowExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceConflictResolveWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ConflictResolveWorkflowExecution(ctx, request) } @@ -246,7 +293,13 @@ func (p *executionPersistenceClient) DeleteWorkflowExecution( ctx context.Context, request *DeleteWorkflowExecutionRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteWorkflowExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteWorkflowExecution(ctx, request) } @@ -254,7 +307,13 @@ func (p *executionPersistenceClient) DeleteCurrentWorkflowExecution( ctx context.Context, request *DeleteCurrentWorkflowExecutionRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteCurrentWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteCurrentWorkflowExecution(ctx, request) } @@ -262,7 +321,13 @@ func (p *executionPersistenceClient) GetCurrentExecution( ctx context.Context, request *GetCurrentExecutionRequest, ) (_ *GetCurrentExecutionResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetCurrentExecutionScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetCurrentExecutionScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetCurrentExecutionScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetCurrentExecution(ctx, request) } @@ -270,7 +335,13 @@ func (p *executionPersistenceClient) ListConcreteExecutions( ctx context.Context, request *ListConcreteExecutionsRequest, ) (_ *ListConcreteExecutionsResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceListConcreteExecutionsScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceListConcreteExecutionsScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceListConcreteExecutionsScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ListConcreteExecutions(ctx, request) } @@ -305,7 +376,13 @@ func (p *executionPersistenceClient) AddHistoryTasks( ctx context.Context, request *AddHistoryTasksRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAddTasksScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceAddTasksScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceAddTasksScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.AddHistoryTasks(ctx, request) } @@ -329,7 +406,13 @@ func (p *executionPersistenceClient) GetHistoryTasks( return nil, serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, operation, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(operation, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetHistoryTasks(ctx, request) } @@ -353,7 +436,13 @@ func (p *executionPersistenceClient) CompleteHistoryTask( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, operation, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(operation, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.CompleteHistoryTask(ctx, request) } @@ -377,7 +466,13 @@ func (p *executionPersistenceClient) RangeCompleteHistoryTasks( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - defer recordMetricsAndSignalsFn(ctx, request.ShardID, operation, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, operation, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(operation, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.RangeCompleteHistoryTasks(ctx, request) } @@ -385,7 +480,13 @@ func (p *executionPersistenceClient) PutReplicationTaskToDLQ( ctx context.Context, request *PutReplicationTaskToDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistencePutReplicationTaskToDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistencePutReplicationTaskToDLQScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistencePutReplicationTaskToDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.PutReplicationTaskToDLQ(ctx, request) } @@ -393,7 +494,13 @@ func (p *executionPersistenceClient) GetReplicationTasksFromDLQ( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ *GetHistoryTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetReplicationTasksFromDLQScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetReplicationTasksFromDLQ(ctx, request) } @@ -401,7 +508,13 @@ func (p *executionPersistenceClient) DeleteReplicationTaskFromDLQ( ctx context.Context, request *DeleteReplicationTaskFromDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteReplicationTaskFromDLQScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteReplicationTaskFromDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) } @@ -409,7 +522,13 @@ func (p *executionPersistenceClient) RangeDeleteReplicationTaskFromDLQ( ctx context.Context, request *RangeDeleteReplicationTaskFromDLQRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) } @@ -417,7 +536,13 @@ func (p *executionPersistenceClient) IsReplicationDLQEmpty( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ bool, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetReplicationTasksFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetReplicationTasksFromDLQScope, request.ShardID, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.IsReplicationDLQEmpty(ctx, request) } @@ -433,7 +558,13 @@ func (p *taskPersistenceClient) CreateTasks( ctx context.Context, request *CreateTasksRequest, ) (_ *CreateTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTasksScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateTasksScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceCreateTasksScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.CreateTasks(ctx, request) } @@ -441,7 +572,13 @@ func (p *taskPersistenceClient) GetTasks( ctx context.Context, request *GetTasksRequest, ) (_ *GetTasksResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTasksScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetTasksScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetTasksScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetTasks(ctx, request) } @@ -449,7 +586,13 @@ func (p *taskPersistenceClient) CompleteTask( ctx context.Context, request *CompleteTaskRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTaskScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceCompleteTaskScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceCompleteTaskScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.CompleteTask(ctx, request) } @@ -457,7 +600,13 @@ func (p *taskPersistenceClient) CompleteTasksLessThan( ctx context.Context, request *CompleteTasksLessThanRequest, ) (_ int, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCompleteTasksLessThanScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceCompleteTasksLessThanScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceCompleteTasksLessThanScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.CompleteTasksLessThan(ctx, request) } @@ -465,7 +614,13 @@ func (p *taskPersistenceClient) CreateTaskQueue( ctx context.Context, request *CreateTaskQueueRequest, ) (_ *CreateTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateTaskQueueScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceCreateTaskQueueScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.CreateTaskQueue(ctx, request) } @@ -473,7 +628,13 @@ func (p *taskPersistenceClient) UpdateTaskQueue( ctx context.Context, request *UpdateTaskQueueRequest, ) (_ *UpdateTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateTaskQueueScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateTaskQueueScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.UpdateTaskQueue(ctx, request) } @@ -481,7 +642,13 @@ func (p *taskPersistenceClient) GetTaskQueue( ctx context.Context, request *GetTaskQueueRequest, ) (_ *GetTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetTaskQueueScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetTaskQueueScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetTaskQueue(ctx, request) } @@ -489,7 +656,13 @@ func (p *taskPersistenceClient) ListTaskQueue( ctx context.Context, request *ListTaskQueueRequest, ) (_ *ListTaskQueueResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceListTaskQueueScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceListTaskQueueScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ListTaskQueue(ctx, request) } @@ -497,7 +670,13 @@ func (p *taskPersistenceClient) DeleteTaskQueue( ctx context.Context, request *DeleteTaskQueueRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteTaskQueueScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteTaskQueueScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteTaskQueueScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteTaskQueue(ctx, request) } @@ -513,7 +692,13 @@ func (p *metadataPersistenceClient) CreateNamespace( ctx context.Context, request *CreateNamespaceRequest, ) (_ *CreateNamespaceResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceCreateNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateNamespaceScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceCreateNamespaceScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.CreateNamespace(ctx, request) } @@ -521,7 +706,13 @@ func (p *metadataPersistenceClient) GetNamespace( ctx context.Context, request *GetNamespaceRequest, ) (_ *GetNamespaceResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetNamespaceScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetNamespaceScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetNamespace(ctx, request) } @@ -529,7 +720,13 @@ func (p *metadataPersistenceClient) UpdateNamespace( ctx context.Context, request *UpdateNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateNamespaceScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateNamespaceScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.UpdateNamespace(ctx, request) } @@ -537,7 +734,13 @@ func (p *metadataPersistenceClient) RenameNamespace( ctx context.Context, request *RenameNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRenameNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceRenameNamespaceScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceRenameNamespaceScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.RenameNamespace(ctx, request) } @@ -545,7 +748,13 @@ func (p *metadataPersistenceClient) DeleteNamespace( ctx context.Context, request *DeleteNamespaceRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteNamespaceScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteNamespace(ctx, request) } @@ -553,7 +762,13 @@ func (p *metadataPersistenceClient) DeleteNamespaceByName( ctx context.Context, request *DeleteNamespaceByNameRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteNamespaceByNameScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteNamespaceByNameScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceByNameScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteNamespaceByName(ctx, request) } @@ -561,14 +776,26 @@ func (p *metadataPersistenceClient) ListNamespaces( ctx context.Context, request *ListNamespacesRequest, ) (_ *ListNamespacesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListNamespacesScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceListNamespacesScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceListNamespacesScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ListNamespaces(ctx, request) } func (p *metadataPersistenceClient) GetMetadata( ctx context.Context, ) (_ *GetMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetMetadataScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetMetadataScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetMetadataScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetMetadata(ctx) } @@ -581,7 +808,13 @@ func (p *executionPersistenceClient) AppendHistoryNodes( ctx context.Context, request *AppendHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceAppendHistoryNodesScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceAppendHistoryNodesScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.AppendHistoryNodes(ctx, request) } @@ -590,7 +823,13 @@ func (p *executionPersistenceClient) AppendRawHistoryNodes( ctx context.Context, request *AppendRawHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceAppendRawHistoryNodesScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceAppendRawHistoryNodesScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceAppendRawHistoryNodesScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.AppendRawHistoryNodes(ctx, request) } @@ -599,7 +838,13 @@ func (p *executionPersistenceClient) ReadHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ReadHistoryBranch(ctx, request) } @@ -607,7 +852,13 @@ func (p *executionPersistenceClient) ReadHistoryBranchReverse( ctx context.Context, request *ReadHistoryBranchReverseRequest, ) (_ *ReadHistoryBranchReverseResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchReverseScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadHistoryBranchReverseScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchReverseScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ReadHistoryBranchReverse(ctx, request) } @@ -616,7 +867,13 @@ func (p *executionPersistenceClient) ReadHistoryBranchByBatch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchByBatchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ReadHistoryBranchByBatch(ctx, request) } @@ -625,7 +882,13 @@ func (p *executionPersistenceClient) ReadRawHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadRawHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceReadRawHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadRawHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceReadRawHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ReadRawHistoryBranch(ctx, request) } @@ -634,7 +897,13 @@ func (p *executionPersistenceClient) ForkHistoryBranch( ctx context.Context, request *ForkHistoryBranchRequest, ) (_ *ForkHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceForkHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceForkHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceForkHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ForkHistoryBranch(ctx, request) } @@ -643,7 +912,13 @@ func (p *executionPersistenceClient) DeleteHistoryBranch( ctx context.Context, request *DeleteHistoryBranchRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceDeleteHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteHistoryBranch(ctx, request) } @@ -652,7 +927,13 @@ func (p *executionPersistenceClient) TrimHistoryBranch( ctx context.Context, request *TrimHistoryBranchRequest, ) (_ *TrimHistoryBranchResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceTrimHistoryBranchScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceTrimHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceTrimHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.TrimHistoryBranch(ctx, request) } @@ -660,7 +941,13 @@ func (p *executionPersistenceClient) GetAllHistoryTreeBranches( ctx context.Context, request *GetAllHistoryTreeBranchesRequest, ) (_ *GetAllHistoryTreeBranchesResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAllHistoryTreeBranchesScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetAllHistoryTreeBranchesScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetAllHistoryTreeBranchesScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetAllHistoryTreeBranches(ctx, request) } @@ -669,7 +956,13 @@ func (p *executionPersistenceClient) GetHistoryTree( ctx context.Context, request *GetHistoryTreeRequest, ) (_ *GetHistoryTreeResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, request.ShardID, metrics.PersistenceGetHistoryTreeScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetHistoryTreeScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetHistoryTreeScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetHistoryTree(ctx, request) } @@ -684,7 +977,13 @@ func (p *queuePersistenceClient) EnqueueMessage( ctx context.Context, blob commonpb.DataBlob, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceEnqueueMessageScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceEnqueueMessageScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.EnqueueMessage(ctx, blob) } @@ -693,7 +992,13 @@ func (p *queuePersistenceClient) ReadMessages( lastMessageID int64, maxCount int, ) (_ []*QueueMessage, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadQueueMessagesScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadQueueMessagesScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceReadQueueMessagesScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ReadMessages(ctx, lastMessageID, maxCount) } @@ -701,14 +1006,26 @@ func (p *queuePersistenceClient) UpdateAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateAckLevelScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateAckLevelScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.UpdateAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetAckLevelScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetAckLevelScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetAckLevels(ctx) } @@ -716,7 +1033,13 @@ func (p *queuePersistenceClient) DeleteMessagesBefore( ctx context.Context, messageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessagesBeforeScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteMessagesBeforeScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteMessagesBeforeScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteMessagesBefore(ctx, messageID) } @@ -724,7 +1047,13 @@ func (p *queuePersistenceClient) EnqueueMessageToDLQ( ctx context.Context, blob commonpb.DataBlob, ) (_ int64, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceEnqueueMessageToDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceEnqueueMessageToDLQScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceEnqueueMessageToDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.EnqueueMessageToDLQ(ctx, blob) } @@ -735,7 +1064,13 @@ func (p *queuePersistenceClient) ReadMessagesFromDLQ( pageSize int, pageToken []byte, ) (_ []*QueueMessage, _ []byte, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceReadMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadMessagesFromDLQScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceReadMessagesFromDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) } @@ -743,7 +1078,13 @@ func (p *queuePersistenceClient) DeleteMessageFromDLQ( ctx context.Context, messageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteMessageFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteMessageFromDLQScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteMessageFromDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteMessageFromDLQ(ctx, messageID) } @@ -752,7 +1093,13 @@ func (p *queuePersistenceClient) RangeDeleteMessagesFromDLQ( firstMessageID int64, lastMessageID int64, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceRangeDeleteMessagesFromDLQScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceRangeDeleteMessagesFromDLQScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceRangeDeleteMessagesFromDLQScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) } @@ -760,14 +1107,26 @@ func (p *queuePersistenceClient) UpdateDLQAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpdateDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateDLQAckLevelScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateDLQAckLevelScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.UpdateDLQAckLevel(ctx, metadata) } func (p *queuePersistenceClient) GetDLQAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetDLQAckLevelScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetDLQAckLevelScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetDLQAckLevelScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetDLQAckLevels(ctx) } @@ -783,14 +1142,26 @@ func (p *clusterMetadataPersistenceClient) ListClusterMetadata( ctx context.Context, request *ListClusterMetadataRequest, ) (_ *ListClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceListClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceListClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceListClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.ListClusterMetadata(ctx, request) } func (p *clusterMetadataPersistenceClient) GetCurrentClusterMetadata( ctx context.Context, ) (_ *GetClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetCurrentClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetCurrentClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetCurrentClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetCurrentClusterMetadata(ctx) } @@ -798,7 +1169,13 @@ func (p *clusterMetadataPersistenceClient) GetClusterMetadata( ctx context.Context, request *GetClusterMetadataRequest, ) (_ *GetClusterMetadataResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetClusterMetadata(ctx, request) } @@ -806,7 +1183,13 @@ func (p *clusterMetadataPersistenceClient) SaveClusterMetadata( ctx context.Context, request *SaveClusterMetadataRequest, ) (_ bool, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceSaveClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceSaveClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceSaveClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.SaveClusterMetadata(ctx, request) } @@ -814,7 +1197,13 @@ func (p *clusterMetadataPersistenceClient) DeleteClusterMetadata( ctx context.Context, request *DeleteClusterMetadataRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceDeleteClusterMetadataScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.DeleteClusterMetadata(ctx, request) } @@ -826,7 +1215,13 @@ func (p *clusterMetadataPersistenceClient) GetClusterMembers( ctx context.Context, request *GetClusterMembersRequest, ) (_ *GetClusterMembersResponse, retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceGetClusterMembersScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetClusterMembersScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceGetClusterMembersScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.GetClusterMembers(ctx, request) } @@ -834,7 +1229,13 @@ func (p *clusterMetadataPersistenceClient) UpsertClusterMembership( ctx context.Context, request *UpsertClusterMembershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceUpsertClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpsertClusterMembershipScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceUpsertClusterMembershipScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.UpsertClusterMembership(ctx, request) } @@ -842,7 +1243,13 @@ func (p *clusterMetadataPersistenceClient) PruneClusterMembership( ctx context.Context, request *PruneClusterMembershipRequest, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistencePruneClusterMembershipScope, p.metricEmitter, p.healthSignals, retErr)() + callerInfo := headers.GetCallerInfo(ctx) + signalFn := signalRecordFn(callerInfo, metrics.PersistencePruneClusterMembershipScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistencePruneClusterMembershipScope, callerInfo.CallerName, startTime, retErr) + }() return p.persistence.PruneClusterMembership(ctx, request) } @@ -850,33 +1257,14 @@ func (p *metadataPersistenceClient) InitializeSystemNamespaces( ctx context.Context, currentClusterName string, ) (retErr error) { - defer recordMetricsAndSignalsFn(ctx, CallerSegmentMissing, metrics.PersistenceInitializeSystemNamespaceScope, p.metricEmitter, p.healthSignals, retErr)() - return p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) -} - -func recordMetricsAndSignalsFn( - ctx context.Context, - shardID int32, - scope string, - emitter metricEmitter, - healthSignals HealthSignalAggregator, - err error, -) func() { - startTime := time.Now().UTC() callerInfo := headers.GetCallerInfo(ctx) - signalFn := healthSignals.GetRecordFn(quotas.NewRequest( - scope, - RateLimitDefaultToken, - callerInfo.CallerName, - callerInfo.CallerType, - shardID, - callerInfo.CallOrigin, - )) - - return func() { - signalFn(err) - emitter.recordRequestMetrics(scope, callerInfo.CallerName, startTime, err) - } + signalFn := signalRecordFn(callerInfo, metrics.PersistenceInitializeSystemNamespaceScope, CallerSegmentMissing, p.healthSignals) + startTime := time.Now().UTC() + defer func() { + signalFn(retErr) + p.recordRequestMetrics(metrics.PersistenceInitializeSystemNamespaceScope, callerInfo.CallerName, startTime, retErr) + }() + return p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) } func (p *metricEmitter) recordRequestMetrics(operation string, caller string, startTime time.Time, err error) { @@ -911,3 +1299,19 @@ func updateErrorMetric(handler metrics.Handler, logger log.Logger, operation str } } } + +func signalRecordFn( + callerInfo headers.CallerInfo, + api string, + shardID int32, + healthSignals HealthSignalAggregator, +) func(err error) { + return healthSignals.GetRecordFn(quotas.NewRequest( + api, + RateLimitDefaultToken, + callerInfo.CallerName, + callerInfo.CallerType, + shardID, + callerInfo.CallOrigin, + )) +} From e91a0ee5a2d891787b94bec0d54b780785588ab3 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 07:40:04 -0700 Subject: [PATCH 23/36] types --- common/aggregate/moving_window_average.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index 6d3960e834e..a52637dfc00 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -48,7 +48,7 @@ type ( head *ring.Ring tail *ring.Ring sum int64 - count int + count int64 } ) @@ -69,7 +69,7 @@ func (a *MovingWindowAvgImpl) Record(val int64) { a.Lock() defer a.Unlock() - if a.count == a.maxBufferSize { + if a.count == int64(a.maxBufferSize) { a.expireOneLocked() } @@ -89,7 +89,7 @@ func (a *MovingWindowAvgImpl) Average() float64 { if a.count == 0 { return 0 } - return float64(a.sum / int64(a.count)) + return float64(a.sum) / float64(a.count) } func (a *MovingWindowAvgImpl) expireOldValues() { From db4db375f8656f9a632e1eb81fca1b386018002a Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 14:40:03 -0700 Subject: [PATCH 24/36] acquire lock once --- common/aggregate/moving_window_average.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index a52637dfc00..23c2e738a3f 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -42,7 +42,7 @@ type ( } MovingWindowAvgImpl struct { - sync.RWMutex + sync.Mutex windowSize time.Duration maxBufferSize int head *ring.Ring @@ -81,18 +81,17 @@ func (a *MovingWindowAvgImpl) Record(val int64) { } func (a *MovingWindowAvgImpl) Average() float64 { - a.expireOldValues() - - a.RLock() - defer a.RUnlock() + a.Lock() + defer a.Unlock() + a.expireOldValuesLocked() if a.count == 0 { return 0 } return float64(a.sum) / float64(a.count) } -func (a *MovingWindowAvgImpl) expireOldValues() { +func (a *MovingWindowAvgImpl) expireOldValuesLocked() { a.Lock() defer a.Unlock() From c256fb19ad747dea6daeede518b00f63b09188f1 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 14:59:32 -0700 Subject: [PATCH 25/36] locks --- common/aggregate/moving_window_average.go | 3 --- common/aggregate/moving_window_average_array.go | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) create mode 100644 common/aggregate/moving_window_average_array.go diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index 23c2e738a3f..5440c50f460 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -92,9 +92,6 @@ func (a *MovingWindowAvgImpl) Average() float64 { } func (a *MovingWindowAvgImpl) expireOldValuesLocked() { - a.Lock() - defer a.Unlock() - for ; a.head != a.tail; a.head = a.head.Next() { data, ok := a.head.Value.(timestampedData) if !ok || time.Since(data.timestamp) < a.windowSize { diff --git a/common/aggregate/moving_window_average_array.go b/common/aggregate/moving_window_average_array.go new file mode 100644 index 00000000000..17fdfeda482 --- /dev/null +++ b/common/aggregate/moving_window_average_array.go @@ -0,0 +1 @@ +package aggregate From 98f2b66b9b7a711c427d5f55de82bc55d135ddfc Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 15:10:41 -0700 Subject: [PATCH 26/36] array moving average --- .../aggregate/bench_moving_window_avg_test.go | 14 ++- .../aggregate/moving_window_average_array.go | 92 +++++++++++++++++++ 2 files changed, 104 insertions(+), 2 deletions(-) diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go index 706460c2c30..4ce34c9fac7 100644 --- a/common/aggregate/bench_moving_window_avg_test.go +++ b/common/aggregate/bench_moving_window_avg_test.go @@ -31,10 +31,12 @@ import ( ) // BenchmarkRingMovingWindowAvg -// BenchmarkRingMovingWindowAvg-10 12283236 94.76 ns/op +// BenchmarkRingMovingWindowAvg-10 12622564 92.76 ns/op +// BenchmarkArrayMovingWindowAvg +// BenchmarkArrayMovingWindowAvg-10 12022722 99.94 ns/op const ( - testWindowSize = 3 * time.Second + testWindowSize = 10 * time.Millisecond testBufferSize = 200 ) @@ -45,3 +47,11 @@ func BenchmarkRingMovingWindowAvg(b *testing.B) { avg.Average() } } + +func BenchmarkArrayMovingWindowAvg(b *testing.B) { + avg := NewMovingWindowAvgArrayImpl(testWindowSize, testBufferSize) + for i := 0; i < b.N; i++ { + avg.Record(rand.Int63()) + avg.Average() + } +} diff --git a/common/aggregate/moving_window_average_array.go b/common/aggregate/moving_window_average_array.go index 17fdfeda482..a6bfcd62962 100644 --- a/common/aggregate/moving_window_average_array.go +++ b/common/aggregate/moving_window_average_array.go @@ -1 +1,93 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package aggregate + +import ( + "sync" + "time" +) + +type ( + MovingWindowAvgArrayImpl struct { + sync.Mutex + windowSize time.Duration + maxBufferSize int + buffer []timestampedData + headIdx int + tailIdx int + sum int64 + count int64 + } +) + +func NewMovingWindowAvgArrayImpl( + windowSize time.Duration, + maxBufferSize int, +) *MovingWindowAvgArrayImpl { + return &MovingWindowAvgArrayImpl{ + windowSize: windowSize, + maxBufferSize: maxBufferSize, + buffer: make([]timestampedData, maxBufferSize), + } +} + +func (a *MovingWindowAvgArrayImpl) Record(val int64) { + a.Lock() + defer a.Unlock() + + a.buffer[a.tailIdx] = timestampedData{timestamp: time.Now(), value: val} + a.tailIdx = (a.tailIdx + 1) % a.maxBufferSize + + a.sum += val + a.count++ + + if a.tailIdx == a.headIdx { + // buffer full, expire oldest element + a.sum -= a.buffer[a.headIdx].value + a.count-- + a.headIdx = (a.headIdx + 1) % a.maxBufferSize + } +} + +func (a *MovingWindowAvgArrayImpl) Average() float64 { + a.Lock() + defer a.Unlock() + + a.expireOldValuesLocked() + if a.count == 0 { + return 0 + } + return float64(a.sum) / float64(a.count) +} + +func (a *MovingWindowAvgArrayImpl) expireOldValuesLocked() { + for ; a.headIdx != a.tailIdx; a.headIdx = (a.headIdx + 1) % a.maxBufferSize { + if time.Since(a.buffer[a.headIdx].timestamp) < a.windowSize { + break + } + a.sum -= a.buffer[a.headIdx].value + a.count-- + } +} From f3dfa3de477d2272ee50c463d54a54e64866055a Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 15:11:30 -0700 Subject: [PATCH 27/36] Revert "array moving average" This reverts commit 98f2b66b9b7a711c427d5f55de82bc55d135ddfc. --- .../aggregate/bench_moving_window_avg_test.go | 14 +-- .../aggregate/moving_window_average_array.go | 92 ------------------- 2 files changed, 2 insertions(+), 104 deletions(-) diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go index 4ce34c9fac7..706460c2c30 100644 --- a/common/aggregate/bench_moving_window_avg_test.go +++ b/common/aggregate/bench_moving_window_avg_test.go @@ -31,12 +31,10 @@ import ( ) // BenchmarkRingMovingWindowAvg -// BenchmarkRingMovingWindowAvg-10 12622564 92.76 ns/op -// BenchmarkArrayMovingWindowAvg -// BenchmarkArrayMovingWindowAvg-10 12022722 99.94 ns/op +// BenchmarkRingMovingWindowAvg-10 12283236 94.76 ns/op const ( - testWindowSize = 10 * time.Millisecond + testWindowSize = 3 * time.Second testBufferSize = 200 ) @@ -47,11 +45,3 @@ func BenchmarkRingMovingWindowAvg(b *testing.B) { avg.Average() } } - -func BenchmarkArrayMovingWindowAvg(b *testing.B) { - avg := NewMovingWindowAvgArrayImpl(testWindowSize, testBufferSize) - for i := 0; i < b.N; i++ { - avg.Record(rand.Int63()) - avg.Average() - } -} diff --git a/common/aggregate/moving_window_average_array.go b/common/aggregate/moving_window_average_array.go index a6bfcd62962..17fdfeda482 100644 --- a/common/aggregate/moving_window_average_array.go +++ b/common/aggregate/moving_window_average_array.go @@ -1,93 +1 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - package aggregate - -import ( - "sync" - "time" -) - -type ( - MovingWindowAvgArrayImpl struct { - sync.Mutex - windowSize time.Duration - maxBufferSize int - buffer []timestampedData - headIdx int - tailIdx int - sum int64 - count int64 - } -) - -func NewMovingWindowAvgArrayImpl( - windowSize time.Duration, - maxBufferSize int, -) *MovingWindowAvgArrayImpl { - return &MovingWindowAvgArrayImpl{ - windowSize: windowSize, - maxBufferSize: maxBufferSize, - buffer: make([]timestampedData, maxBufferSize), - } -} - -func (a *MovingWindowAvgArrayImpl) Record(val int64) { - a.Lock() - defer a.Unlock() - - a.buffer[a.tailIdx] = timestampedData{timestamp: time.Now(), value: val} - a.tailIdx = (a.tailIdx + 1) % a.maxBufferSize - - a.sum += val - a.count++ - - if a.tailIdx == a.headIdx { - // buffer full, expire oldest element - a.sum -= a.buffer[a.headIdx].value - a.count-- - a.headIdx = (a.headIdx + 1) % a.maxBufferSize - } -} - -func (a *MovingWindowAvgArrayImpl) Average() float64 { - a.Lock() - defer a.Unlock() - - a.expireOldValuesLocked() - if a.count == 0 { - return 0 - } - return float64(a.sum) / float64(a.count) -} - -func (a *MovingWindowAvgArrayImpl) expireOldValuesLocked() { - for ; a.headIdx != a.tailIdx; a.headIdx = (a.headIdx + 1) % a.maxBufferSize { - if time.Since(a.buffer[a.headIdx].timestamp) < a.windowSize { - break - } - a.sum -= a.buffer[a.headIdx].value - a.count-- - } -} From 33448c84b13a2a6cd74d73157b6016e7c23d6ae6 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 15:35:26 -0700 Subject: [PATCH 28/36] cleanup --- common/aggregate/moving_window_average_array.go | 1 - 1 file changed, 1 deletion(-) delete mode 100644 common/aggregate/moving_window_average_array.go diff --git a/common/aggregate/moving_window_average_array.go b/common/aggregate/moving_window_average_array.go deleted file mode 100644 index 17fdfeda482..00000000000 --- a/common/aggregate/moving_window_average_array.go +++ /dev/null @@ -1 +0,0 @@ -package aggregate From 5a94697c804ef90f8613241e797af6e26ff0c7dd Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Wed, 24 May 2023 16:26:01 -0700 Subject: [PATCH 29/36] emit per shard RPS --- common/dynamicconfig/constants.go | 2 + .../persistence/health_signal_aggregator.go | 68 ++++++++++--------- common/resource/fx.go | 3 + 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index 856f546a0f4..d7bf5b9edc0 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -110,6 +110,8 @@ const ( PersistenceHealthSignalWindowSize = "system.persistenceHealthSignalWindowSize" // PersistenceHealthSignalBufferSize is the maximum number of persistence signals to buffer in memory per signal key PersistenceHealthSignalBufferSize = "system.persistenceHealthSignalBufferSize" + // ShardRPSWarnLimit is the per-shard RPS limit for warning + ShardRPSWarnLimit = "system.shardRPSWarnLimit" // Whether the deadlock detector should dump goroutines DeadlockDumpGoroutines = "system.deadlock.DumpGoroutines" diff --git a/common/persistence/health_signal_aggregator.go b/common/persistence/health_signal_aggregator.go index 09b47194163..e138a3d649f 100644 --- a/common/persistence/health_signal_aggregator.go +++ b/common/persistence/health_signal_aggregator.go @@ -31,10 +31,16 @@ import ( "go.temporal.io/server/common/aggregate" "go.temporal.io/server/common/dynamicconfig" + "go.temporal.io/server/common/log" + "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/quotas" ) +const ( + emitMetricsInterval = 30 * time.Second +) + type ( HealthSignalAggregator interface { GetRecordFn(req quotas.Request) func(err error) @@ -43,20 +49,17 @@ type ( } HealthSignalAggregatorImpl struct { - requestsPerShardAndNs map[perShardPerNamespaceKey]*atomic.Int64 - requestsLock sync.RWMutex + requestsPerShard map[int32]*atomic.Int64 + requestsLock sync.RWMutex latencyAverage aggregate.MovingWindowAverage errorRatio aggregate.MovingWindowAverage - metricsHandler metrics.Handler - emitMetricsInterval time.Duration - emitMetricsTimer *time.Ticker - } + metricsHandler metrics.Handler + emitMetricsTimer *time.Ticker + perShardRPSWarnLimit dynamicconfig.IntPropertyFn - perShardPerNamespaceKey struct { - namespace string - shard int32 + logger log.Logger } ) @@ -64,27 +67,34 @@ func NewHealthSignalAggregatorImpl( windowSize dynamicconfig.DurationPropertyFn, maxBufferSize dynamicconfig.IntPropertyFn, metricsHandler metrics.Handler, + perShardRPSWarnLimit dynamicconfig.IntPropertyFn, + logger log.Logger, ) *HealthSignalAggregatorImpl { return &HealthSignalAggregatorImpl{ - requestsPerShardAndNs: make(map[perShardPerNamespaceKey]*atomic.Int64), - latencyAverage: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), - errorRatio: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), - metricsHandler: metricsHandler, - emitMetricsInterval: windowSize(), - emitMetricsTimer: time.NewTicker(windowSize()), + requestsPerShard: make(map[int32]*atomic.Int64), + latencyAverage: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), + errorRatio: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), + metricsHandler: metricsHandler, + emitMetricsTimer: time.NewTicker(emitMetricsInterval), + perShardRPSWarnLimit: perShardRPSWarnLimit, + logger: logger, } } func (s *HealthSignalAggregatorImpl) GetRecordFn(req quotas.Request) func(err error) { start := time.Now() return func(err error) { - s.getOrInitRequestCount(req).Add(1) s.latencyAverage.Record(time.Since(start).Milliseconds()) + if isUnhealthyError(err) { s.errorRatio.Record(1) } else { s.errorRatio.Record(0) } + + if req.CallerSegment != CallerSegmentMissing { + s.getOrInitShardRequestCount(req.CallerSegment).Add(1) + } } } @@ -96,11 +106,9 @@ func (s *HealthSignalAggregatorImpl) ErrorRatio() float64 { return s.errorRatio.Average() } -func (s *HealthSignalAggregatorImpl) getOrInitRequestCount(req quotas.Request) *atomic.Int64 { - key := getPerShardPerNsKey(req) - +func (s *HealthSignalAggregatorImpl) getOrInitShardRequestCount(shardID int32) *atomic.Int64 { s.requestsLock.RLock() - count, ok := s.requestsPerShardAndNs[key] + count, ok := s.requestsPerShard[shardID] s.requestsLock.RUnlock() if ok { return count @@ -111,12 +119,12 @@ func (s *HealthSignalAggregatorImpl) getOrInitRequestCount(req quotas.Request) * s.requestsLock.Lock() defer s.requestsLock.Unlock() - count, ok = s.requestsPerShardAndNs[key] + count, ok = s.requestsPerShard[shardID] if ok { return count } - s.requestsPerShardAndNs[key] = newCount + s.requestsPerShard[shardID] = newCount return newCount } @@ -125,22 +133,18 @@ func (s *HealthSignalAggregatorImpl) emitMetricsLoop() { select { case <-s.emitMetricsTimer.C: s.requestsLock.RLock() - for key, count := range s.requestsPerShardAndNs { - shardRPS := int64(float64(count.Swap(0)) / s.emitMetricsInterval.Seconds()) - s.metricsHandler.Histogram(metrics.PersistenceShardRPS.GetMetricName(), metrics.PersistenceShardRPS.GetMetricUnit()).Record(shardRPS, metrics.NamespaceTag(key.namespace)) + for shardID, count := range s.requestsPerShard { + shardRPS := int64(float64(count.Swap(0)) / emitMetricsInterval.Seconds()) + s.metricsHandler.Histogram(metrics.PersistenceShardRPS.GetMetricName(), metrics.PersistenceShardRPS.GetMetricUnit()).Record(shardRPS) + if shardRPS > int64(s.perShardRPSWarnLimit()) { + s.logger.Warn("Per shard RPS warn limit exceeded", tag.ShardID(shardID)) + } } s.requestsLock.RUnlock() } } } -func getPerShardPerNsKey(req quotas.Request) perShardPerNamespaceKey { - return perShardPerNamespaceKey{ - namespace: req.Caller, - shard: req.CallerSegment, - } -} - func isUnhealthyError(err error) bool { if err == nil { return false diff --git a/common/resource/fx.go b/common/resource/fx.go index a10cf60adcf..b4eb50f72b7 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -219,11 +219,14 @@ func NamespaceRegistryProvider( func PersistenceHealthSignalAggregatorProvider( dynamicCollection *dynamicconfig.Collection, metricsHandler metrics.Handler, + logger log.Logger, ) persistence.HealthSignalAggregator { return persistence.NewHealthSignalAggregatorImpl( dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), metricsHandler, + dynamicCollection.GetIntProperty(dynamicconfig.ShardRPSWarnLimit, 50), + logger, ) } From e0ce388b2887e001408061a0d9c3df27320416ba Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 25 May 2023 09:24:45 -0700 Subject: [PATCH 30/36] array average --- .../aggregate/bench_moving_window_avg_test.go | 12 +++--- common/aggregate/moving_window_average.go | 40 ++++++++----------- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/common/aggregate/bench_moving_window_avg_test.go b/common/aggregate/bench_moving_window_avg_test.go index 706460c2c30..3500b789e29 100644 --- a/common/aggregate/bench_moving_window_avg_test.go +++ b/common/aggregate/bench_moving_window_avg_test.go @@ -30,18 +30,20 @@ import ( "time" ) -// BenchmarkRingMovingWindowAvg -// BenchmarkRingMovingWindowAvg-10 12283236 94.76 ns/op +// BenchmarkArrayMovingWindowAvg +// BenchmarkArrayMovingWindowAvg-10 17021074 66.27 ns/op const ( - testWindowSize = 3 * time.Second + testWindowSize = 10 * time.Millisecond testBufferSize = 200 ) -func BenchmarkRingMovingWindowAvg(b *testing.B) { +func BenchmarkArrayMovingWindowAvg(b *testing.B) { avg := NewMovingWindowAvgImpl(testWindowSize, testBufferSize) for i := 0; i < b.N; i++ { avg.Record(rand.Int63()) - avg.Average() + if i%10 == 0 { + avg.Average() + } } } diff --git a/common/aggregate/moving_window_average.go b/common/aggregate/moving_window_average.go index 5440c50f460..ad5d93cfc0c 100644 --- a/common/aggregate/moving_window_average.go +++ b/common/aggregate/moving_window_average.go @@ -25,7 +25,6 @@ package aggregate import ( - "container/ring" "sync" "time" ) @@ -45,8 +44,9 @@ type ( sync.Mutex windowSize time.Duration maxBufferSize int - head *ring.Ring - tail *ring.Ring + buffer []timestampedData + headIdx int + tailIdx int sum int64 count int64 } @@ -56,12 +56,10 @@ func NewMovingWindowAvgImpl( windowSize time.Duration, maxBufferSize int, ) *MovingWindowAvgImpl { - buffer := ring.New(maxBufferSize) return &MovingWindowAvgImpl{ windowSize: windowSize, maxBufferSize: maxBufferSize, - head: buffer, - tail: buffer, + buffer: make([]timestampedData, maxBufferSize), } } @@ -69,15 +67,18 @@ func (a *MovingWindowAvgImpl) Record(val int64) { a.Lock() defer a.Unlock() - if a.count == int64(a.maxBufferSize) { - a.expireOneLocked() - } - - a.tail.Value = timestampedData{value: val, timestamp: time.Now()} - a.tail = a.tail.Next() + a.buffer[a.tailIdx] = timestampedData{timestamp: time.Now(), value: val} + a.tailIdx = (a.tailIdx + 1) % a.maxBufferSize a.sum += val a.count++ + + if a.tailIdx == a.headIdx { + // buffer full, expire oldest element + a.sum -= a.buffer[a.headIdx].value + a.count-- + a.headIdx = (a.headIdx + 1) % a.maxBufferSize + } } func (a *MovingWindowAvgImpl) Average() float64 { @@ -92,20 +93,11 @@ func (a *MovingWindowAvgImpl) Average() float64 { } func (a *MovingWindowAvgImpl) expireOldValuesLocked() { - for ; a.head != a.tail; a.head = a.head.Next() { - data, ok := a.head.Value.(timestampedData) - if !ok || time.Since(data.timestamp) < a.windowSize { + for ; a.headIdx != a.tailIdx; a.headIdx = (a.headIdx + 1) % a.maxBufferSize { + if time.Since(a.buffer[a.headIdx].timestamp) < a.windowSize { break } - a.sum -= data.value - a.count-- - } -} - -func (a *MovingWindowAvgImpl) expireOneLocked() { - if data, ok := a.head.Value.(timestampedData); ok { - a.sum -= data.value + a.sum -= a.buffer[a.headIdx].value a.count-- } - a.head = a.head.Next() } From e48fcec05b5f7c3ab8429026fcf758e725399708 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 25 May 2023 10:03:03 -0700 Subject: [PATCH 31/36] feedback --- common/persistence/client/factory.go | 47 +++++--------- common/persistence/client/fx.go | 2 +- .../persistence/health_signal_aggregator.go | 63 +++++++++++-------- .../noop_health_signal_aggregator.go | 8 +++ 4 files changed, 63 insertions(+), 57 deletions(-) diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index 188a598a796..f6149a2f9da 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -89,6 +89,7 @@ func NewFactory( logger log.Logger, healthSignals p.HealthSignalAggregator, ) Factory { + healthSignals.Start() return &factoryImpl{ dataStoreFactory: dataStoreFactory, config: cfg, @@ -113,11 +114,7 @@ func (f *factoryImpl) NewTaskManager() (p.TaskManager, error) { result = p.NewTaskPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } if f.metricsHandler != nil || f.healthSignals != nil { - if f.metricsHandler == nil { - f.metricsHandler = metrics.NoopMetricsHandler - } else if f.healthSignals == nil { - f.healthSignals = p.NoopHealthSignalAggregator - } + f.updateNilMetricsAndHealthSignals() result = p.NewTaskPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } return result, nil @@ -135,11 +132,7 @@ func (f *factoryImpl) NewShardManager() (p.ShardManager, error) { result = p.NewShardPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } if f.metricsHandler != nil || f.healthSignals != nil { - if f.metricsHandler == nil { - f.metricsHandler = metrics.NoopMetricsHandler - } else if f.healthSignals == nil { - f.healthSignals = p.NoopHealthSignalAggregator - } + f.updateNilMetricsAndHealthSignals() result = p.NewShardPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewShardPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -158,11 +151,7 @@ func (f *factoryImpl) NewMetadataManager() (p.MetadataManager, error) { result = p.NewMetadataPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } if f.metricsHandler != nil || f.healthSignals != nil { - if f.metricsHandler == nil { - f.metricsHandler = metrics.NoopMetricsHandler - } else if f.healthSignals == nil { - f.healthSignals = p.NoopHealthSignalAggregator - } + f.updateNilMetricsAndHealthSignals() result = p.NewMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -181,11 +170,7 @@ func (f *factoryImpl) NewClusterMetadataManager() (p.ClusterMetadataManager, err result = p.NewClusterMetadataPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } if f.metricsHandler != nil || f.healthSignals != nil { - if f.metricsHandler == nil { - f.metricsHandler = metrics.NoopMetricsHandler - } else if f.healthSignals == nil { - f.healthSignals = p.NoopHealthSignalAggregator - } + f.updateNilMetricsAndHealthSignals() result = p.NewClusterMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewClusterMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -204,11 +189,7 @@ func (f *factoryImpl) NewExecutionManager() (p.ExecutionManager, error) { result = p.NewExecutionPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } if f.metricsHandler != nil || f.healthSignals != nil { - if f.metricsHandler == nil { - f.metricsHandler = metrics.NoopMetricsHandler - } else if f.healthSignals == nil { - f.healthSignals = p.NoopHealthSignalAggregator - } + f.updateNilMetricsAndHealthSignals() result = p.NewExecutionPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewExecutionPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -225,11 +206,7 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu result = p.NewQueuePersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } if f.metricsHandler != nil || f.healthSignals != nil { - if f.metricsHandler == nil { - f.metricsHandler = metrics.NoopMetricsHandler - } else if f.healthSignals == nil { - f.healthSignals = p.NoopHealthSignalAggregator - } + f.updateNilMetricsAndHealthSignals() result = p.NewQueuePersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewQueuePersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -239,6 +216,7 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu // Close closes this factory func (f *factoryImpl) Close() { f.dataStoreFactory.Close() + f.healthSignals.Stop() } func IsPersistenceTransientError(err error) bool { @@ -249,3 +227,12 @@ func IsPersistenceTransientError(err error) bool { return false } + +func (f *factoryImpl) updateNilMetricsAndHealthSignals() { + if f.metricsHandler == nil { + f.metricsHandler = metrics.NoopMetricsHandler + } + if f.healthSignals == nil { + f.healthSignals = p.NoopHealthSignalAggregator + } +} diff --git a/common/persistence/client/fx.go b/common/persistence/client/fx.go index 0bd5d4ac239..a1711aed9cb 100644 --- a/common/persistence/client/fx.go +++ b/common/persistence/client/fx.go @@ -25,7 +25,6 @@ package client import ( - "go.temporal.io/server/common/persistence" "go.uber.org/fx" "go.temporal.io/server/common/cluster" @@ -33,6 +32,7 @@ import ( "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/log" "go.temporal.io/server/common/metrics" + "go.temporal.io/server/common/persistence" "go.temporal.io/server/common/persistence/serialization" "go.temporal.io/server/common/primitives" "go.temporal.io/server/common/quotas" diff --git a/common/persistence/health_signal_aggregator.go b/common/persistence/health_signal_aggregator.go index e138a3d649f..989ba64b14d 100644 --- a/common/persistence/health_signal_aggregator.go +++ b/common/persistence/health_signal_aggregator.go @@ -29,6 +29,7 @@ import ( "sync/atomic" "time" + "go.temporal.io/server/common" "go.temporal.io/server/common/aggregate" "go.temporal.io/server/common/dynamicconfig" "go.temporal.io/server/common/log" @@ -43,14 +44,18 @@ const ( type ( HealthSignalAggregator interface { + common.Daemon GetRecordFn(req quotas.Request) func(err error) AverageLatency() float64 ErrorRatio() float64 } HealthSignalAggregatorImpl struct { - requestsPerShard map[int32]*atomic.Int64 - requestsLock sync.RWMutex + status int32 + shutdownCh chan struct{} + + requestsPerShard map[int32]int64 + requestsLock sync.Mutex latencyAverage aggregate.MovingWindowAverage errorRatio aggregate.MovingWindowAverage @@ -71,7 +76,9 @@ func NewHealthSignalAggregatorImpl( logger log.Logger, ) *HealthSignalAggregatorImpl { return &HealthSignalAggregatorImpl{ - requestsPerShard: make(map[int32]*atomic.Int64), + status: common.DaemonStatusInitialized, + shutdownCh: make(chan struct{}), + requestsPerShard: make(map[int32]int64), latencyAverage: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), errorRatio: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), metricsHandler: metricsHandler, @@ -81,6 +88,21 @@ func NewHealthSignalAggregatorImpl( } } +func (s *HealthSignalAggregatorImpl) Start() { + if !atomic.CompareAndSwapInt32(&s.status, common.DaemonStatusInitialized, common.DaemonStatusStarted) { + return + } + go s.emitMetricsLoop() +} + +func (s *HealthSignalAggregatorImpl) Stop() { + if !atomic.CompareAndSwapInt32(&s.status, common.DaemonStatusStarted, common.DaemonStatusStopped) { + return + } + close(s.shutdownCh) + s.emitMetricsTimer.Stop() +} + func (s *HealthSignalAggregatorImpl) GetRecordFn(req quotas.Request) func(err error) { start := time.Now() return func(err error) { @@ -93,7 +115,7 @@ func (s *HealthSignalAggregatorImpl) GetRecordFn(req quotas.Request) func(err er } if req.CallerSegment != CallerSegmentMissing { - s.getOrInitShardRequestCount(req.CallerSegment).Add(1) + s.incrementShardRequestCount(req.CallerSegment) } } } @@ -106,41 +128,30 @@ func (s *HealthSignalAggregatorImpl) ErrorRatio() float64 { return s.errorRatio.Average() } -func (s *HealthSignalAggregatorImpl) getOrInitShardRequestCount(shardID int32) *atomic.Int64 { - s.requestsLock.RLock() - count, ok := s.requestsPerShard[shardID] - s.requestsLock.RUnlock() - if ok { - return count - } - - newCount := &atomic.Int64{} - +func (s *HealthSignalAggregatorImpl) incrementShardRequestCount(shardID int32) { s.requestsLock.Lock() defer s.requestsLock.Unlock() - - count, ok = s.requestsPerShard[shardID] - if ok { - return count - } - - s.requestsPerShard[shardID] = newCount - return newCount + s.requestsPerShard[shardID]++ } func (s *HealthSignalAggregatorImpl) emitMetricsLoop() { for { select { + case <-s.shutdownCh: + return case <-s.emitMetricsTimer.C: - s.requestsLock.RLock() - for shardID, count := range s.requestsPerShard { - shardRPS := int64(float64(count.Swap(0)) / emitMetricsInterval.Seconds()) + s.requestsLock.Lock() + requestCounts := s.requestsPerShard + s.requestsPerShard = make(map[int32]int64, len(requestCounts)) + s.requestsLock.Unlock() + + for shardID, count := range requestCounts { + shardRPS := int64(float64(count) / emitMetricsInterval.Seconds()) s.metricsHandler.Histogram(metrics.PersistenceShardRPS.GetMetricName(), metrics.PersistenceShardRPS.GetMetricUnit()).Record(shardRPS) if shardRPS > int64(s.perShardRPSWarnLimit()) { s.logger.Warn("Per shard RPS warn limit exceeded", tag.ShardID(shardID)) } } - s.requestsLock.RUnlock() } } } diff --git a/common/persistence/noop_health_signal_aggregator.go b/common/persistence/noop_health_signal_aggregator.go index 915b35599bf..17fa08669d2 100644 --- a/common/persistence/noop_health_signal_aggregator.go +++ b/common/persistence/noop_health_signal_aggregator.go @@ -36,6 +36,14 @@ type ( func newNoopSignalAggregator() *noopSignalAggregator { return &noopSignalAggregator{} } +func (a *noopSignalAggregator) Start() { + return +} + +func (a *noopSignalAggregator) Stop() { + return +} + func (a *noopSignalAggregator) GetRecordFn(_ quotas.Request) func(error) { return func(error) {} } From d3b20f183e40c709c87f3e576a0e983915961e9b Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 25 May 2023 10:03:57 -0700 Subject: [PATCH 32/36] cleanup --- common/persistence/noop_health_signal_aggregator.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/common/persistence/noop_health_signal_aggregator.go b/common/persistence/noop_health_signal_aggregator.go index 17fa08669d2..35a5f90b75f 100644 --- a/common/persistence/noop_health_signal_aggregator.go +++ b/common/persistence/noop_health_signal_aggregator.go @@ -36,13 +36,9 @@ type ( func newNoopSignalAggregator() *noopSignalAggregator { return &noopSignalAggregator{} } -func (a *noopSignalAggregator) Start() { - return -} +func (a *noopSignalAggregator) Start() {} -func (a *noopSignalAggregator) Stop() { - return -} +func (a *noopSignalAggregator) Stop() {} func (a *noopSignalAggregator) GetRecordFn(_ quotas.Request) func(error) { return func(error) {} From b6fd86248e95859cae15134463aca6b6eebdc990 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 25 May 2023 10:20:19 -0700 Subject: [PATCH 33/36] handle nil health signals --- common/persistence/client/factory.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index f6149a2f9da..d44474f204b 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -89,7 +89,6 @@ func NewFactory( logger log.Logger, healthSignals p.HealthSignalAggregator, ) Factory { - healthSignals.Start() return &factoryImpl{ dataStoreFactory: dataStoreFactory, config: cfg, @@ -216,7 +215,9 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu // Close closes this factory func (f *factoryImpl) Close() { f.dataStoreFactory.Close() - f.healthSignals.Stop() + if f.healthSignals != nil { + f.healthSignals.Start() + } } func IsPersistenceTransientError(err error) bool { @@ -235,4 +236,5 @@ func (f *factoryImpl) updateNilMetricsAndHealthSignals() { if f.healthSignals == nil { f.healthSignals = p.NoopHealthSignalAggregator } + f.healthSignals.Start() } From 0bd90b07b33c3cecb4f53fe60d91ae0cb9fd282a Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 25 May 2023 12:49:29 -0700 Subject: [PATCH 34/36] feedback --- common/dynamicconfig/constants.go | 2 + common/persistence/client/factory.go | 2 +- common/persistence/client/fx.go | 18 + .../persistence/health_signal_aggregator.go | 91 ++-- .../noop_health_signal_aggregator.go | 6 +- .../persistence/persistenceMetricClients.go | 500 ++++++++---------- common/resource/fx.go | 15 - 7 files changed, 288 insertions(+), 346 deletions(-) diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index d7bf5b9edc0..b8277e03a7f 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -106,6 +106,8 @@ const ( EnableEagerWorkflowStart = "system.enableEagerWorkflowStart" // NamespaceCacheRefreshInterval is the key for namespace cache refresh interval dynamic config NamespaceCacheRefreshInterval = "system.namespaceCacheRefreshInterval" + // PersistenceHealthSignalCollectionEnabled determines whether persistence health signal collection/aggregation is enabled + PersistenceHealthSignalCollectionEnabled = "system.persistenceHealthSignalCollectionEnabled" // PersistenceHealthSignalWindowSize is the time window size in seconds for aggregating persistence signals PersistenceHealthSignalWindowSize = "system.persistenceHealthSignalWindowSize" // PersistenceHealthSignalBufferSize is the maximum number of persistence signals to buffer in memory per signal key diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index d44474f204b..413a65adf3b 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -216,7 +216,7 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu func (f *factoryImpl) Close() { f.dataStoreFactory.Close() if f.healthSignals != nil { - f.healthSignals.Start() + f.healthSignals.Stop() } } diff --git a/common/persistence/client/fx.go b/common/persistence/client/fx.go index a1711aed9cb..e5d4c2f1adb 100644 --- a/common/persistence/client/fx.go +++ b/common/persistence/client/fx.go @@ -25,6 +25,8 @@ package client import ( + "time" + "go.uber.org/fx" "go.temporal.io/server/common/cluster" @@ -68,6 +70,7 @@ var Module = fx.Options( BeanModule, fx.Provide(ClusterNameProvider), fx.Provide(DataStoreFactoryProvider), + fx.Provide(PersistenceHealthSignalAggregatorProvider), ) func ClusterNameProvider(config *cluster.Config) ClusterName { @@ -102,3 +105,18 @@ func FactoryProvider( params.HealthSignals, ) } + +func PersistenceHealthSignalAggregatorProvider( + dynamicCollection *dynamicconfig.Collection, + metricsHandler metrics.Handler, + logger log.Logger, +) persistence.HealthSignalAggregator { + return persistence.NewHealthSignalAggregatorImpl( + dynamicCollection.GetBoolProperty(dynamicconfig.PersistenceHealthSignalCollectionEnabled, false), + dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second)(), + dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500)(), + metricsHandler, + dynamicCollection.GetIntProperty(dynamicconfig.ShardRPSWarnLimit, 50), + logger, + ) +} diff --git a/common/persistence/health_signal_aggregator.go b/common/persistence/health_signal_aggregator.go index 989ba64b14d..f266eecf516 100644 --- a/common/persistence/health_signal_aggregator.go +++ b/common/persistence/health_signal_aggregator.go @@ -35,7 +35,6 @@ import ( "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" - "go.temporal.io/server/common/quotas" ) const ( @@ -45,12 +44,14 @@ const ( type ( HealthSignalAggregator interface { common.Daemon - GetRecordFn(req quotas.Request) func(err error) + Record(callerSegment int32, latency time.Duration, err error) AverageLatency() float64 ErrorRatio() float64 } HealthSignalAggregatorImpl struct { + enabled dynamicconfig.BoolPropertyFn + status int32 shutdownCh chan struct{} @@ -69,18 +70,20 @@ type ( ) func NewHealthSignalAggregatorImpl( - windowSize dynamicconfig.DurationPropertyFn, - maxBufferSize dynamicconfig.IntPropertyFn, + enabled dynamicconfig.BoolPropertyFn, + windowSize time.Duration, + maxBufferSize int, metricsHandler metrics.Handler, perShardRPSWarnLimit dynamicconfig.IntPropertyFn, logger log.Logger, ) *HealthSignalAggregatorImpl { return &HealthSignalAggregatorImpl{ + enabled: enabled, status: common.DaemonStatusInitialized, shutdownCh: make(chan struct{}), requestsPerShard: make(map[int32]int64), - latencyAverage: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), - errorRatio: aggregate.NewMovingWindowAvgImpl(windowSize(), maxBufferSize()), + latencyAverage: aggregate.NewMovingWindowAvgImpl(windowSize, maxBufferSize), + errorRatio: aggregate.NewMovingWindowAvgImpl(windowSize, maxBufferSize), metricsHandler: metricsHandler, emitMetricsTimer: time.NewTicker(emitMetricsInterval), perShardRPSWarnLimit: perShardRPSWarnLimit, @@ -89,6 +92,11 @@ func NewHealthSignalAggregatorImpl( } func (s *HealthSignalAggregatorImpl) Start() { + if !s.enabled() { + NoopHealthSignalAggregator.Start() + return + } + if !atomic.CompareAndSwapInt32(&s.status, common.DaemonStatusInitialized, common.DaemonStatusStarted) { return } @@ -96,6 +104,11 @@ func (s *HealthSignalAggregatorImpl) Start() { } func (s *HealthSignalAggregatorImpl) Stop() { + if !s.enabled() { + NoopHealthSignalAggregator.Stop() + return + } + if !atomic.CompareAndSwapInt32(&s.status, common.DaemonStatusStarted, common.DaemonStatusStopped) { return } @@ -103,28 +116,39 @@ func (s *HealthSignalAggregatorImpl) Stop() { s.emitMetricsTimer.Stop() } -func (s *HealthSignalAggregatorImpl) GetRecordFn(req quotas.Request) func(err error) { - start := time.Now() - return func(err error) { - s.latencyAverage.Record(time.Since(start).Milliseconds()) - - if isUnhealthyError(err) { - s.errorRatio.Record(1) - } else { - s.errorRatio.Record(0) - } +func (s *HealthSignalAggregatorImpl) Record(callerSegment int32, latency time.Duration, err error) { + if !s.enabled() { + NoopHealthSignalAggregator.Record(callerSegment, latency, err) + return + } - if req.CallerSegment != CallerSegmentMissing { - s.incrementShardRequestCount(req.CallerSegment) - } + // TODO: uncomment when adding dynamic rate limiter + //s.latencyAverage.Record(latency.Milliseconds()) + // + //if isUnhealthyError(err) { + // s.errorRatio.Record(1) + //} else { + // s.errorRatio.Record(0) + //} + + if callerSegment != CallerSegmentMissing { + s.incrementShardRequestCount(callerSegment) } } func (s *HealthSignalAggregatorImpl) AverageLatency() float64 { + if !s.enabled() { + return NoopHealthSignalAggregator.AverageLatency() + } + return s.latencyAverage.Average() } func (s *HealthSignalAggregatorImpl) ErrorRatio() float64 { + if !s.enabled() { + return NoopHealthSignalAggregator.ErrorRatio() + } + return s.errorRatio.Average() } @@ -156,17 +180,18 @@ func (s *HealthSignalAggregatorImpl) emitMetricsLoop() { } } -func isUnhealthyError(err error) bool { - if err == nil { - return false - } - switch err.(type) { - case *ShardOwnershipLostError, - *AppendHistoryTimeoutError, - *TimeoutError: - return true - - default: - return false - } -} +// TODO: uncomment when adding dynamic rate limiter +//func isUnhealthyError(err error) bool { +// if err == nil { +// return false +// } +// switch err.(type) { +// case *ShardOwnershipLostError, +// *AppendHistoryTimeoutError, +// *TimeoutError: +// return true +// +// default: +// return false +// } +//} diff --git a/common/persistence/noop_health_signal_aggregator.go b/common/persistence/noop_health_signal_aggregator.go index 35a5f90b75f..dff0c151d28 100644 --- a/common/persistence/noop_health_signal_aggregator.go +++ b/common/persistence/noop_health_signal_aggregator.go @@ -25,7 +25,7 @@ package persistence import ( - "go.temporal.io/server/common/quotas" + "time" ) var NoopHealthSignalAggregator HealthSignalAggregator = newNoopSignalAggregator() @@ -40,9 +40,7 @@ func (a *noopSignalAggregator) Start() {} func (a *noopSignalAggregator) Stop() {} -func (a *noopSignalAggregator) GetRecordFn(_ quotas.Request) func(error) { - return func(error) {} -} +func (a *noopSignalAggregator) Record(_ int32, _ time.Duration, _ error) {} func (a *noopSignalAggregator) AverageLatency() float64 { return 0 diff --git a/common/persistence/persistenceMetricClients.go b/common/persistence/persistenceMetricClients.go index 5e50dfd4a44..6f68bb510b3 100644 --- a/common/persistence/persistenceMetricClients.go +++ b/common/persistence/persistenceMetricClients.go @@ -35,7 +35,6 @@ import ( "go.temporal.io/server/common/log" "go.temporal.io/server/common/log/tag" "go.temporal.io/server/common/metrics" - "go.temporal.io/server/common/quotas" "go.temporal.io/server/service/history/tasks" ) @@ -169,12 +168,11 @@ func (p *shardPersistenceClient) GetOrCreateShard( ctx context.Context, request *GetOrCreateShardRequest, ) (_ *GetOrCreateShardResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetOrCreateShardScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetOrCreateShardScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetOrCreateShardScope, caller, startTime, retErr) }() return p.persistence.GetOrCreateShard(ctx, request) } @@ -183,12 +181,11 @@ func (p *shardPersistenceClient) UpdateShard( ctx context.Context, request *UpdateShardRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateShardScope, request.ShardInfo.GetShardId(), p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateShardScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardInfo.GetShardId(), time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateShardScope, caller, startTime, retErr) }() return p.persistence.UpdateShard(ctx, request) } @@ -197,12 +194,11 @@ func (p *shardPersistenceClient) AssertShardOwnership( ctx context.Context, request *AssertShardOwnershipRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceAssertShardOwnershipScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceAssertShardOwnershipScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceAssertShardOwnershipScope, caller, startTime, retErr) }() return p.persistence.AssertShardOwnership(ctx, request) } @@ -223,12 +219,11 @@ func (p *executionPersistenceClient) CreateWorkflowExecution( ctx context.Context, request *CreateWorkflowExecutionRequest, ) (_ *CreateWorkflowExecutionResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateWorkflowExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceCreateWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceCreateWorkflowExecutionScope, caller, startTime, retErr) }() return p.persistence.CreateWorkflowExecution(ctx, request) } @@ -237,12 +232,11 @@ func (p *executionPersistenceClient) GetWorkflowExecution( ctx context.Context, request *GetWorkflowExecutionRequest, ) (_ *GetWorkflowExecutionResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetWorkflowExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetWorkflowExecutionScope, caller, startTime, retErr) }() return p.persistence.GetWorkflowExecution(ctx, request) } @@ -251,12 +245,11 @@ func (p *executionPersistenceClient) SetWorkflowExecution( ctx context.Context, request *SetWorkflowExecutionRequest, ) (_ *SetWorkflowExecutionResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceSetWorkflowExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceSetWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceSetWorkflowExecutionScope, caller, startTime, retErr) }() return p.persistence.SetWorkflowExecution(ctx, request) } @@ -265,12 +258,11 @@ func (p *executionPersistenceClient) UpdateWorkflowExecution( ctx context.Context, request *UpdateWorkflowExecutionRequest, ) (_ *UpdateWorkflowExecutionResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateWorkflowExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateWorkflowExecutionScope, caller, startTime, retErr) }() return p.persistence.UpdateWorkflowExecution(ctx, request) } @@ -279,12 +271,11 @@ func (p *executionPersistenceClient) ConflictResolveWorkflowExecution( ctx context.Context, request *ConflictResolveWorkflowExecutionRequest, ) (_ *ConflictResolveWorkflowExecutionResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceConflictResolveWorkflowExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceConflictResolveWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceConflictResolveWorkflowExecutionScope, caller, startTime, retErr) }() return p.persistence.ConflictResolveWorkflowExecution(ctx, request) } @@ -293,12 +284,11 @@ func (p *executionPersistenceClient) DeleteWorkflowExecution( ctx context.Context, request *DeleteWorkflowExecutionRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteWorkflowExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteWorkflowExecutionScope, caller, startTime, retErr) }() return p.persistence.DeleteWorkflowExecution(ctx, request) } @@ -307,12 +297,11 @@ func (p *executionPersistenceClient) DeleteCurrentWorkflowExecution( ctx context.Context, request *DeleteCurrentWorkflowExecutionRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteCurrentWorkflowExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteCurrentWorkflowExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteCurrentWorkflowExecutionScope, caller, startTime, retErr) }() return p.persistence.DeleteCurrentWorkflowExecution(ctx, request) } @@ -321,12 +310,11 @@ func (p *executionPersistenceClient) GetCurrentExecution( ctx context.Context, request *GetCurrentExecutionRequest, ) (_ *GetCurrentExecutionResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetCurrentExecutionScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetCurrentExecutionScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetCurrentExecutionScope, caller, startTime, retErr) }() return p.persistence.GetCurrentExecution(ctx, request) } @@ -335,12 +323,11 @@ func (p *executionPersistenceClient) ListConcreteExecutions( ctx context.Context, request *ListConcreteExecutionsRequest, ) (_ *ListConcreteExecutionsResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceListConcreteExecutionsScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceListConcreteExecutionsScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceListConcreteExecutionsScope, caller, startTime, retErr) }() return p.persistence.ListConcreteExecutions(ctx, request) } @@ -376,12 +363,11 @@ func (p *executionPersistenceClient) AddHistoryTasks( ctx context.Context, request *AddHistoryTasksRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceAddTasksScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceAddTasksScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceAddTasksScope, caller, startTime, retErr) }() return p.persistence.AddHistoryTasks(ctx, request) } @@ -406,12 +392,11 @@ func (p *executionPersistenceClient) GetHistoryTasks( return nil, serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, operation, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(operation, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(operation, caller, startTime, retErr) }() return p.persistence.GetHistoryTasks(ctx, request) } @@ -436,12 +421,11 @@ func (p *executionPersistenceClient) CompleteHistoryTask( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, operation, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(operation, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(operation, caller, startTime, retErr) }() return p.persistence.CompleteHistoryTask(ctx, request) } @@ -466,12 +450,11 @@ func (p *executionPersistenceClient) RangeCompleteHistoryTasks( return serviceerror.NewInternal(fmt.Sprintf("unknown task category type: %v", request.TaskCategory)) } - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, operation, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(operation, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(operation, caller, startTime, retErr) }() return p.persistence.RangeCompleteHistoryTasks(ctx, request) } @@ -480,12 +463,11 @@ func (p *executionPersistenceClient) PutReplicationTaskToDLQ( ctx context.Context, request *PutReplicationTaskToDLQRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistencePutReplicationTaskToDLQScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistencePutReplicationTaskToDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistencePutReplicationTaskToDLQScope, caller, startTime, retErr) }() return p.persistence.PutReplicationTaskToDLQ(ctx, request) } @@ -494,12 +476,11 @@ func (p *executionPersistenceClient) GetReplicationTasksFromDLQ( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ *GetHistoryTasksResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetReplicationTasksFromDLQScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, startTime, retErr) }() return p.persistence.GetReplicationTasksFromDLQ(ctx, request) } @@ -508,12 +489,11 @@ func (p *executionPersistenceClient) DeleteReplicationTaskFromDLQ( ctx context.Context, request *DeleteReplicationTaskFromDLQRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteReplicationTaskFromDLQScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteReplicationTaskFromDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteReplicationTaskFromDLQScope, caller, startTime, retErr) }() return p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) } @@ -522,12 +502,11 @@ func (p *executionPersistenceClient) RangeDeleteReplicationTaskFromDLQ( ctx context.Context, request *RangeDeleteReplicationTaskFromDLQRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, caller, startTime, retErr) }() return p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) } @@ -536,12 +515,11 @@ func (p *executionPersistenceClient) IsReplicationDLQEmpty( ctx context.Context, request *GetReplicationTasksFromDLQRequest, ) (_ bool, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetReplicationTasksFromDLQScope, request.ShardID, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, startTime, retErr) }() return p.persistence.IsReplicationDLQEmpty(ctx, request) } @@ -558,12 +536,11 @@ func (p *taskPersistenceClient) CreateTasks( ctx context.Context, request *CreateTasksRequest, ) (_ *CreateTasksResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateTasksScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceCreateTasksScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceCreateTasksScope, caller, startTime, retErr) }() return p.persistence.CreateTasks(ctx, request) } @@ -572,12 +549,11 @@ func (p *taskPersistenceClient) GetTasks( ctx context.Context, request *GetTasksRequest, ) (_ *GetTasksResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetTasksScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetTasksScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetTasksScope, caller, startTime, retErr) }() return p.persistence.GetTasks(ctx, request) } @@ -586,12 +562,11 @@ func (p *taskPersistenceClient) CompleteTask( ctx context.Context, request *CompleteTaskRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceCompleteTaskScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceCompleteTaskScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceCompleteTaskScope, caller, startTime, retErr) }() return p.persistence.CompleteTask(ctx, request) } @@ -600,12 +575,11 @@ func (p *taskPersistenceClient) CompleteTasksLessThan( ctx context.Context, request *CompleteTasksLessThanRequest, ) (_ int, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceCompleteTasksLessThanScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceCompleteTasksLessThanScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceCompleteTasksLessThanScope, caller, startTime, retErr) }() return p.persistence.CompleteTasksLessThan(ctx, request) } @@ -614,12 +588,11 @@ func (p *taskPersistenceClient) CreateTaskQueue( ctx context.Context, request *CreateTaskQueueRequest, ) (_ *CreateTaskQueueResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateTaskQueueScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceCreateTaskQueueScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceCreateTaskQueueScope, caller, startTime, retErr) }() return p.persistence.CreateTaskQueue(ctx, request) } @@ -628,12 +601,11 @@ func (p *taskPersistenceClient) UpdateTaskQueue( ctx context.Context, request *UpdateTaskQueueRequest, ) (_ *UpdateTaskQueueResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateTaskQueueScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateTaskQueueScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateTaskQueueScope, caller, startTime, retErr) }() return p.persistence.UpdateTaskQueue(ctx, request) } @@ -642,12 +614,11 @@ func (p *taskPersistenceClient) GetTaskQueue( ctx context.Context, request *GetTaskQueueRequest, ) (_ *GetTaskQueueResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetTaskQueueScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetTaskQueueScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetTaskQueueScope, caller, startTime, retErr) }() return p.persistence.GetTaskQueue(ctx, request) } @@ -656,12 +627,11 @@ func (p *taskPersistenceClient) ListTaskQueue( ctx context.Context, request *ListTaskQueueRequest, ) (_ *ListTaskQueueResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceListTaskQueueScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceListTaskQueueScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceListTaskQueueScope, caller, startTime, retErr) }() return p.persistence.ListTaskQueue(ctx, request) } @@ -670,12 +640,11 @@ func (p *taskPersistenceClient) DeleteTaskQueue( ctx context.Context, request *DeleteTaskQueueRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteTaskQueueScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteTaskQueueScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteTaskQueueScope, caller, startTime, retErr) }() return p.persistence.DeleteTaskQueue(ctx, request) } @@ -692,12 +661,11 @@ func (p *metadataPersistenceClient) CreateNamespace( ctx context.Context, request *CreateNamespaceRequest, ) (_ *CreateNamespaceResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceCreateNamespaceScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceCreateNamespaceScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceCreateNamespaceScope, caller, startTime, retErr) }() return p.persistence.CreateNamespace(ctx, request) } @@ -706,12 +674,11 @@ func (p *metadataPersistenceClient) GetNamespace( ctx context.Context, request *GetNamespaceRequest, ) (_ *GetNamespaceResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetNamespaceScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetNamespaceScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetNamespaceScope, caller, startTime, retErr) }() return p.persistence.GetNamespace(ctx, request) } @@ -720,12 +687,11 @@ func (p *metadataPersistenceClient) UpdateNamespace( ctx context.Context, request *UpdateNamespaceRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateNamespaceScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateNamespaceScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateNamespaceScope, caller, startTime, retErr) }() return p.persistence.UpdateNamespace(ctx, request) } @@ -734,12 +700,11 @@ func (p *metadataPersistenceClient) RenameNamespace( ctx context.Context, request *RenameNamespaceRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceRenameNamespaceScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceRenameNamespaceScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceRenameNamespaceScope, caller, startTime, retErr) }() return p.persistence.RenameNamespace(ctx, request) } @@ -748,12 +713,11 @@ func (p *metadataPersistenceClient) DeleteNamespace( ctx context.Context, request *DeleteNamespaceRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteNamespaceScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceScope, caller, startTime, retErr) }() return p.persistence.DeleteNamespace(ctx, request) } @@ -762,12 +726,11 @@ func (p *metadataPersistenceClient) DeleteNamespaceByName( ctx context.Context, request *DeleteNamespaceByNameRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteNamespaceByNameScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceByNameScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceByNameScope, caller, startTime, retErr) }() return p.persistence.DeleteNamespaceByName(ctx, request) } @@ -776,12 +739,11 @@ func (p *metadataPersistenceClient) ListNamespaces( ctx context.Context, request *ListNamespacesRequest, ) (_ *ListNamespacesResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceListNamespacesScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceListNamespacesScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceListNamespacesScope, caller, startTime, retErr) }() return p.persistence.ListNamespaces(ctx, request) } @@ -789,12 +751,11 @@ func (p *metadataPersistenceClient) ListNamespaces( func (p *metadataPersistenceClient) GetMetadata( ctx context.Context, ) (_ *GetMetadataResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetMetadataScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetMetadataScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetMetadataScope, caller, startTime, retErr) }() return p.persistence.GetMetadata(ctx) } @@ -808,12 +769,11 @@ func (p *executionPersistenceClient) AppendHistoryNodes( ctx context.Context, request *AppendHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceAppendHistoryNodesScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceAppendHistoryNodesScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceAppendHistoryNodesScope, caller, startTime, retErr) }() return p.persistence.AppendHistoryNodes(ctx, request) } @@ -823,12 +783,11 @@ func (p *executionPersistenceClient) AppendRawHistoryNodes( ctx context.Context, request *AppendRawHistoryNodesRequest, ) (_ *AppendHistoryNodesResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceAppendRawHistoryNodesScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceAppendRawHistoryNodesScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceAppendRawHistoryNodesScope, caller, startTime, retErr) }() return p.persistence.AppendRawHistoryNodes(ctx, request) } @@ -838,12 +797,11 @@ func (p *executionPersistenceClient) ReadHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, startTime, retErr) }() return p.persistence.ReadHistoryBranch(ctx, request) } @@ -852,12 +810,11 @@ func (p *executionPersistenceClient) ReadHistoryBranchReverse( ctx context.Context, request *ReadHistoryBranchReverseRequest, ) (_ *ReadHistoryBranchReverseResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadHistoryBranchReverseScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchReverseScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchReverseScope, caller, startTime, retErr) }() return p.persistence.ReadHistoryBranchReverse(ctx, request) } @@ -867,12 +824,11 @@ func (p *executionPersistenceClient) ReadHistoryBranchByBatch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadHistoryBranchByBatchResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, startTime, retErr) }() return p.persistence.ReadHistoryBranchByBatch(ctx, request) } @@ -882,12 +838,11 @@ func (p *executionPersistenceClient) ReadRawHistoryBranch( ctx context.Context, request *ReadHistoryBranchRequest, ) (_ *ReadRawHistoryBranchResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadRawHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceReadRawHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceReadRawHistoryBranchScope, caller, startTime, retErr) }() return p.persistence.ReadRawHistoryBranch(ctx, request) } @@ -897,12 +852,11 @@ func (p *executionPersistenceClient) ForkHistoryBranch( ctx context.Context, request *ForkHistoryBranchRequest, ) (_ *ForkHistoryBranchResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceForkHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceForkHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceForkHistoryBranchScope, caller, startTime, retErr) }() return p.persistence.ForkHistoryBranch(ctx, request) } @@ -912,12 +866,11 @@ func (p *executionPersistenceClient) DeleteHistoryBranch( ctx context.Context, request *DeleteHistoryBranchRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteHistoryBranchScope, caller, startTime, retErr) }() return p.persistence.DeleteHistoryBranch(ctx, request) } @@ -927,12 +880,11 @@ func (p *executionPersistenceClient) TrimHistoryBranch( ctx context.Context, request *TrimHistoryBranchRequest, ) (_ *TrimHistoryBranchResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceTrimHistoryBranchScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceTrimHistoryBranchScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceTrimHistoryBranchScope, caller, startTime, retErr) }() return p.persistence.TrimHistoryBranch(ctx, request) } @@ -941,12 +893,11 @@ func (p *executionPersistenceClient) GetAllHistoryTreeBranches( ctx context.Context, request *GetAllHistoryTreeBranchesRequest, ) (_ *GetAllHistoryTreeBranchesResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetAllHistoryTreeBranchesScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetAllHistoryTreeBranchesScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetAllHistoryTreeBranchesScope, caller, startTime, retErr) }() return p.persistence.GetAllHistoryTreeBranches(ctx, request) } @@ -956,12 +907,11 @@ func (p *executionPersistenceClient) GetHistoryTree( ctx context.Context, request *GetHistoryTreeRequest, ) (_ *GetHistoryTreeResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetHistoryTreeScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetHistoryTreeScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetHistoryTreeScope, caller, startTime, retErr) }() return p.persistence.GetHistoryTree(ctx, request) } @@ -977,12 +927,11 @@ func (p *queuePersistenceClient) EnqueueMessage( ctx context.Context, blob commonpb.DataBlob, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceEnqueueMessageScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceEnqueueMessageScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceEnqueueMessageScope, caller, startTime, retErr) }() return p.persistence.EnqueueMessage(ctx, blob) } @@ -992,12 +941,11 @@ func (p *queuePersistenceClient) ReadMessages( lastMessageID int64, maxCount int, ) (_ []*QueueMessage, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadQueueMessagesScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceReadQueueMessagesScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceReadQueueMessagesScope, caller, startTime, retErr) }() return p.persistence.ReadMessages(ctx, lastMessageID, maxCount) } @@ -1006,12 +954,11 @@ func (p *queuePersistenceClient) UpdateAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateAckLevelScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateAckLevelScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateAckLevelScope, caller, startTime, retErr) }() return p.persistence.UpdateAckLevel(ctx, metadata) } @@ -1019,12 +966,11 @@ func (p *queuePersistenceClient) UpdateAckLevel( func (p *queuePersistenceClient) GetAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetAckLevelScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetAckLevelScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetAckLevelScope, caller, startTime, retErr) }() return p.persistence.GetAckLevels(ctx) } @@ -1033,12 +979,11 @@ func (p *queuePersistenceClient) DeleteMessagesBefore( ctx context.Context, messageID int64, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteMessagesBeforeScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteMessagesBeforeScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteMessagesBeforeScope, caller, startTime, retErr) }() return p.persistence.DeleteMessagesBefore(ctx, messageID) } @@ -1047,12 +992,11 @@ func (p *queuePersistenceClient) EnqueueMessageToDLQ( ctx context.Context, blob commonpb.DataBlob, ) (_ int64, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceEnqueueMessageToDLQScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceEnqueueMessageToDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceEnqueueMessageToDLQScope, caller, startTime, retErr) }() return p.persistence.EnqueueMessageToDLQ(ctx, blob) } @@ -1064,12 +1008,11 @@ func (p *queuePersistenceClient) ReadMessagesFromDLQ( pageSize int, pageToken []byte, ) (_ []*QueueMessage, _ []byte, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceReadMessagesFromDLQScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceReadMessagesFromDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceReadMessagesFromDLQScope, caller, startTime, retErr) }() return p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) } @@ -1078,12 +1021,11 @@ func (p *queuePersistenceClient) DeleteMessageFromDLQ( ctx context.Context, messageID int64, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteMessageFromDLQScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteMessageFromDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteMessageFromDLQScope, caller, startTime, retErr) }() return p.persistence.DeleteMessageFromDLQ(ctx, messageID) } @@ -1093,12 +1035,11 @@ func (p *queuePersistenceClient) RangeDeleteMessagesFromDLQ( firstMessageID int64, lastMessageID int64, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceRangeDeleteMessagesFromDLQScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceRangeDeleteMessagesFromDLQScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceRangeDeleteMessagesFromDLQScope, caller, startTime, retErr) }() return p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) } @@ -1107,12 +1048,11 @@ func (p *queuePersistenceClient) UpdateDLQAckLevel( ctx context.Context, metadata *InternalQueueMetadata, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpdateDLQAckLevelScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateDLQAckLevelScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateDLQAckLevelScope, caller, startTime, retErr) }() return p.persistence.UpdateDLQAckLevel(ctx, metadata) } @@ -1120,12 +1060,11 @@ func (p *queuePersistenceClient) UpdateDLQAckLevel( func (p *queuePersistenceClient) GetDLQAckLevels( ctx context.Context, ) (_ *InternalQueueMetadata, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetDLQAckLevelScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetDLQAckLevelScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetDLQAckLevelScope, caller, startTime, retErr) }() return p.persistence.GetDLQAckLevels(ctx) } @@ -1142,12 +1081,11 @@ func (p *clusterMetadataPersistenceClient) ListClusterMetadata( ctx context.Context, request *ListClusterMetadataRequest, ) (_ *ListClusterMetadataResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceListClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceListClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceListClusterMetadataScope, caller, startTime, retErr) }() return p.persistence.ListClusterMetadata(ctx, request) } @@ -1155,12 +1093,11 @@ func (p *clusterMetadataPersistenceClient) ListClusterMetadata( func (p *clusterMetadataPersistenceClient) GetCurrentClusterMetadata( ctx context.Context, ) (_ *GetClusterMetadataResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetCurrentClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetCurrentClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetCurrentClusterMetadataScope, caller, startTime, retErr) }() return p.persistence.GetCurrentClusterMetadata(ctx) } @@ -1169,12 +1106,11 @@ func (p *clusterMetadataPersistenceClient) GetClusterMetadata( ctx context.Context, request *GetClusterMetadataRequest, ) (_ *GetClusterMetadataResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetClusterMetadataScope, caller, startTime, retErr) }() return p.persistence.GetClusterMetadata(ctx, request) } @@ -1183,12 +1119,11 @@ func (p *clusterMetadataPersistenceClient) SaveClusterMetadata( ctx context.Context, request *SaveClusterMetadataRequest, ) (_ bool, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceSaveClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceSaveClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceSaveClusterMetadataScope, caller, startTime, retErr) }() return p.persistence.SaveClusterMetadata(ctx, request) } @@ -1197,12 +1132,11 @@ func (p *clusterMetadataPersistenceClient) DeleteClusterMetadata( ctx context.Context, request *DeleteClusterMetadataRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceDeleteClusterMetadataScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteClusterMetadataScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteClusterMetadataScope, caller, startTime, retErr) }() return p.persistence.DeleteClusterMetadata(ctx, request) } @@ -1215,12 +1149,11 @@ func (p *clusterMetadataPersistenceClient) GetClusterMembers( ctx context.Context, request *GetClusterMembersRequest, ) (_ *GetClusterMembersResponse, retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceGetClusterMembersScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceGetClusterMembersScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceGetClusterMembersScope, caller, startTime, retErr) }() return p.persistence.GetClusterMembers(ctx, request) } @@ -1229,12 +1162,11 @@ func (p *clusterMetadataPersistenceClient) UpsertClusterMembership( ctx context.Context, request *UpsertClusterMembershipRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceUpsertClusterMembershipScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceUpsertClusterMembershipScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceUpsertClusterMembershipScope, caller, startTime, retErr) }() return p.persistence.UpsertClusterMembership(ctx, request) } @@ -1243,12 +1175,11 @@ func (p *clusterMetadataPersistenceClient) PruneClusterMembership( ctx context.Context, request *PruneClusterMembershipRequest, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistencePruneClusterMembershipScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistencePruneClusterMembershipScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistencePruneClusterMembershipScope, caller, startTime, retErr) }() return p.persistence.PruneClusterMembership(ctx, request) } @@ -1257,12 +1188,11 @@ func (p *metadataPersistenceClient) InitializeSystemNamespaces( ctx context.Context, currentClusterName string, ) (retErr error) { - callerInfo := headers.GetCallerInfo(ctx) - signalFn := signalRecordFn(callerInfo, metrics.PersistenceInitializeSystemNamespaceScope, CallerSegmentMissing, p.healthSignals) + caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - signalFn(retErr) - p.recordRequestMetrics(metrics.PersistenceInitializeSystemNamespaceScope, callerInfo.CallerName, startTime, retErr) + p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) + p.recordRequestMetrics(metrics.PersistenceInitializeSystemNamespaceScope, caller, startTime, retErr) }() return p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) } @@ -1299,19 +1229,3 @@ func updateErrorMetric(handler metrics.Handler, logger log.Logger, operation str } } } - -func signalRecordFn( - callerInfo headers.CallerInfo, - api string, - shardID int32, - healthSignals HealthSignalAggregator, -) func(err error) { - return healthSignals.GetRecordFn(quotas.NewRequest( - api, - RateLimitDefaultToken, - callerInfo.CallerName, - callerInfo.CallerType, - shardID, - callerInfo.CallOrigin, - )) -} diff --git a/common/resource/fx.go b/common/resource/fx.go index b4eb50f72b7..3f793afd476 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -105,7 +105,6 @@ var Module = fx.Options( func(p namespace.Registry) common.Pingable { return p }, fx.ResultTags(`group:"deadlockDetectorRoots"`), )), - fx.Provide(PersistenceHealthSignalAggregatorProvider), fx.Provide(serialization.NewSerializer), fx.Provide(HistoryBootstrapContainerProvider), fx.Provide(VisibilityBootstrapContainerProvider), @@ -216,20 +215,6 @@ func NamespaceRegistryProvider( ) } -func PersistenceHealthSignalAggregatorProvider( - dynamicCollection *dynamicconfig.Collection, - metricsHandler metrics.Handler, - logger log.Logger, -) persistence.HealthSignalAggregator { - return persistence.NewHealthSignalAggregatorImpl( - dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second), - dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500), - metricsHandler, - dynamicCollection.GetIntProperty(dynamicconfig.ShardRPSWarnLimit, 50), - logger, - ) -} - func ClientFactoryProvider( factoryProvider client.FactoryProvider, rpcFactory common.RPCFactory, From 0911a110486af6e1a750a511fce7d93e30547773 Mon Sep 17 00:00:00 2001 From: PJ Doerner Date: Thu, 25 May 2023 12:52:58 -0700 Subject: [PATCH 35/36] allow start and stop when not enabled --- common/persistence/health_signal_aggregator.go | 14 ++++---------- service/history/shard/context_test.go | 6 +++--- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/common/persistence/health_signal_aggregator.go b/common/persistence/health_signal_aggregator.go index f266eecf516..4e6bd83d5b1 100644 --- a/common/persistence/health_signal_aggregator.go +++ b/common/persistence/health_signal_aggregator.go @@ -92,11 +92,6 @@ func NewHealthSignalAggregatorImpl( } func (s *HealthSignalAggregatorImpl) Start() { - if !s.enabled() { - NoopHealthSignalAggregator.Start() - return - } - if !atomic.CompareAndSwapInt32(&s.status, common.DaemonStatusInitialized, common.DaemonStatusStarted) { return } @@ -104,11 +99,6 @@ func (s *HealthSignalAggregatorImpl) Start() { } func (s *HealthSignalAggregatorImpl) Stop() { - if !s.enabled() { - NoopHealthSignalAggregator.Stop() - return - } - if !atomic.CompareAndSwapInt32(&s.status, common.DaemonStatusStarted, common.DaemonStatusStopped) { return } @@ -164,6 +154,10 @@ func (s *HealthSignalAggregatorImpl) emitMetricsLoop() { case <-s.shutdownCh: return case <-s.emitMetricsTimer.C: + if !s.enabled() { + continue + } + s.requestsLock.Lock() requestCounts := s.requestsPerShard s.requestsPerShard = make(map[int32]int64, len(requestCounts)) diff --git a/service/history/shard/context_test.go b/service/history/shard/context_test.go index 2e63b5cc6f8..d79b59cc0f9 100644 --- a/service/history/shard/context_test.go +++ b/service/history/shard/context_test.go @@ -122,12 +122,12 @@ func (s *contextSuite) TestOverwriteScheduledTaskTimestamp() { tasks.CategoryTimer, time.Time{}, ) - tasks := map[tasks.Category][]tasks.Task{ + testTasks := map[tasks.Category][]tasks.Task{ tasks.CategoryTimer: {fakeTask}, } s.mockExecutionManager.EXPECT().AddHistoryTasks(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() - s.mockHistoryEngine.EXPECT().NotifyNewTasks(tasks).AnyTimes() + s.mockHistoryEngine.EXPECT().NotifyNewTasks(testTasks).AnyTimes() testCases := []struct { taskTimestamp time.Time @@ -162,7 +162,7 @@ func (s *contextSuite) TestOverwriteScheduledTaskTimestamp() { NamespaceID: workflowKey.NamespaceID, WorkflowID: workflowKey.WorkflowID, RunID: workflowKey.RunID, - Tasks: tasks, + Tasks: testTasks, }, ) s.NoError(err) From df5f722f8f37a6de88d52c6452892cf8f93eb6df Mon Sep 17 00:00:00 2001 From: Yichao Yang Date: Thu, 25 May 2023 14:09:45 -0700 Subject: [PATCH 36/36] some improvements --- common/persistence/client/factory.go | 28 +- common/persistence/client/fx.go | 23 +- .../persistence/health_signal_aggregator.go | 21 -- .../persistence/persistenceMetricClients.go | 349 +++++++++++------- 4 files changed, 236 insertions(+), 185 deletions(-) diff --git a/common/persistence/client/factory.go b/common/persistence/client/factory.go index 413a65adf3b..4242fa11528 100644 --- a/common/persistence/client/factory.go +++ b/common/persistence/client/factory.go @@ -89,7 +89,7 @@ func NewFactory( logger log.Logger, healthSignals p.HealthSignalAggregator, ) Factory { - return &factoryImpl{ + factory := &factoryImpl{ dataStoreFactory: dataStoreFactory, config: cfg, serializer: serializer, @@ -99,6 +99,8 @@ func NewFactory( ratelimiter: ratelimiter, healthSignals: healthSignals, } + factory.initDependencies() + return factory } // NewTaskManager returns a new task manager @@ -112,8 +114,7 @@ func (f *factoryImpl) NewTaskManager() (p.TaskManager, error) { if f.ratelimiter != nil { result = p.NewTaskPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil || f.healthSignals != nil { - f.updateNilMetricsAndHealthSignals() + if f.metricsHandler != nil && f.healthSignals != nil { result = p.NewTaskPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } return result, nil @@ -130,8 +131,7 @@ func (f *factoryImpl) NewShardManager() (p.ShardManager, error) { if f.ratelimiter != nil { result = p.NewShardPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil || f.healthSignals != nil { - f.updateNilMetricsAndHealthSignals() + if f.metricsHandler != nil && f.healthSignals != nil { result = p.NewShardPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewShardPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -149,8 +149,7 @@ func (f *factoryImpl) NewMetadataManager() (p.MetadataManager, error) { if f.ratelimiter != nil { result = p.NewMetadataPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil || f.healthSignals != nil { - f.updateNilMetricsAndHealthSignals() + if f.metricsHandler != nil && f.healthSignals != nil { result = p.NewMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -168,8 +167,7 @@ func (f *factoryImpl) NewClusterMetadataManager() (p.ClusterMetadataManager, err if f.ratelimiter != nil { result = p.NewClusterMetadataPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil || f.healthSignals != nil { - f.updateNilMetricsAndHealthSignals() + if f.metricsHandler != nil && f.healthSignals != nil { result = p.NewClusterMetadataPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewClusterMetadataPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -187,8 +185,7 @@ func (f *factoryImpl) NewExecutionManager() (p.ExecutionManager, error) { if f.ratelimiter != nil { result = p.NewExecutionPersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil || f.healthSignals != nil { - f.updateNilMetricsAndHealthSignals() + if f.metricsHandler != nil && f.healthSignals != nil { result = p.NewExecutionPersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewExecutionPersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -204,8 +201,7 @@ func (f *factoryImpl) NewNamespaceReplicationQueue() (p.NamespaceReplicationQueu if f.ratelimiter != nil { result = p.NewQueuePersistenceRateLimitedClient(result, f.ratelimiter, f.logger) } - if f.metricsHandler != nil || f.healthSignals != nil { - f.updateNilMetricsAndHealthSignals() + if f.metricsHandler != nil && f.healthSignals != nil { result = p.NewQueuePersistenceMetricsClient(result, f.metricsHandler, f.healthSignals, f.logger) } result = p.NewQueuePersistenceRetryableClient(result, retryPolicy, IsPersistenceTransientError) @@ -229,7 +225,11 @@ func IsPersistenceTransientError(err error) bool { return false } -func (f *factoryImpl) updateNilMetricsAndHealthSignals() { +func (f *factoryImpl) initDependencies() { + if f.metricsHandler == nil && f.healthSignals == nil { + return + } + if f.metricsHandler == nil { f.metricsHandler = metrics.NoopMetricsHandler } diff --git a/common/persistence/client/fx.go b/common/persistence/client/fx.go index e5d4c2f1adb..2a238b936b3 100644 --- a/common/persistence/client/fx.go +++ b/common/persistence/client/fx.go @@ -70,7 +70,7 @@ var Module = fx.Options( BeanModule, fx.Provide(ClusterNameProvider), fx.Provide(DataStoreFactoryProvider), - fx.Provide(PersistenceHealthSignalAggregatorProvider), + fx.Provide(HealthSignalAggregatorProvider), ) func ClusterNameProvider(config *cluster.Config) ClusterName { @@ -106,17 +106,20 @@ func FactoryProvider( ) } -func PersistenceHealthSignalAggregatorProvider( +func HealthSignalAggregatorProvider( dynamicCollection *dynamicconfig.Collection, metricsHandler metrics.Handler, logger log.Logger, ) persistence.HealthSignalAggregator { - return persistence.NewHealthSignalAggregatorImpl( - dynamicCollection.GetBoolProperty(dynamicconfig.PersistenceHealthSignalCollectionEnabled, false), - dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second)(), - dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500)(), - metricsHandler, - dynamicCollection.GetIntProperty(dynamicconfig.ShardRPSWarnLimit, 50), - logger, - ) + if dynamicCollection.GetBoolProperty(dynamicconfig.PersistenceHealthSignalCollectionEnabled, true)() { + return persistence.NewHealthSignalAggregatorImpl( + dynamicCollection.GetDurationProperty(dynamicconfig.PersistenceHealthSignalWindowSize, 3*time.Second)(), + dynamicCollection.GetIntProperty(dynamicconfig.PersistenceHealthSignalBufferSize, 500)(), + metricsHandler, + dynamicCollection.GetIntProperty(dynamicconfig.ShardRPSWarnLimit, 50), + logger, + ) + } + + return persistence.NoopHealthSignalAggregator } diff --git a/common/persistence/health_signal_aggregator.go b/common/persistence/health_signal_aggregator.go index 4e6bd83d5b1..b9ce009b56c 100644 --- a/common/persistence/health_signal_aggregator.go +++ b/common/persistence/health_signal_aggregator.go @@ -50,8 +50,6 @@ type ( } HealthSignalAggregatorImpl struct { - enabled dynamicconfig.BoolPropertyFn - status int32 shutdownCh chan struct{} @@ -70,7 +68,6 @@ type ( ) func NewHealthSignalAggregatorImpl( - enabled dynamicconfig.BoolPropertyFn, windowSize time.Duration, maxBufferSize int, metricsHandler metrics.Handler, @@ -78,7 +75,6 @@ func NewHealthSignalAggregatorImpl( logger log.Logger, ) *HealthSignalAggregatorImpl { return &HealthSignalAggregatorImpl{ - enabled: enabled, status: common.DaemonStatusInitialized, shutdownCh: make(chan struct{}), requestsPerShard: make(map[int32]int64), @@ -107,11 +103,6 @@ func (s *HealthSignalAggregatorImpl) Stop() { } func (s *HealthSignalAggregatorImpl) Record(callerSegment int32, latency time.Duration, err error) { - if !s.enabled() { - NoopHealthSignalAggregator.Record(callerSegment, latency, err) - return - } - // TODO: uncomment when adding dynamic rate limiter //s.latencyAverage.Record(latency.Milliseconds()) // @@ -127,18 +118,10 @@ func (s *HealthSignalAggregatorImpl) Record(callerSegment int32, latency time.Du } func (s *HealthSignalAggregatorImpl) AverageLatency() float64 { - if !s.enabled() { - return NoopHealthSignalAggregator.AverageLatency() - } - return s.latencyAverage.Average() } func (s *HealthSignalAggregatorImpl) ErrorRatio() float64 { - if !s.enabled() { - return NoopHealthSignalAggregator.ErrorRatio() - } - return s.errorRatio.Average() } @@ -154,10 +137,6 @@ func (s *HealthSignalAggregatorImpl) emitMetricsLoop() { case <-s.shutdownCh: return case <-s.emitMetricsTimer.C: - if !s.enabled() { - continue - } - s.requestsLock.Lock() requestCounts := s.requestsPerShard s.requestsPerShard = make(map[int32]int64, len(requestCounts)) diff --git a/common/persistence/persistenceMetricClients.go b/common/persistence/persistenceMetricClients.go index 6f68bb510b3..5358ffcbd8b 100644 --- a/common/persistence/persistenceMetricClients.go +++ b/common/persistence/persistenceMetricClients.go @@ -171,8 +171,9 @@ func (p *shardPersistenceClient) GetOrCreateShard( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetOrCreateShardScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetOrCreateShardScope, caller, latency, retErr) }() return p.persistence.GetOrCreateShard(ctx, request) } @@ -184,8 +185,9 @@ func (p *shardPersistenceClient) UpdateShard( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardInfo.GetShardId(), time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateShardScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardInfo.GetShardId(), latency, retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateShardScope, caller, latency, retErr) }() return p.persistence.UpdateShard(ctx, request) } @@ -197,8 +199,9 @@ func (p *shardPersistenceClient) AssertShardOwnership( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceAssertShardOwnershipScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceAssertShardOwnershipScope, caller, latency, retErr) }() return p.persistence.AssertShardOwnership(ctx, request) } @@ -222,8 +225,9 @@ func (p *executionPersistenceClient) CreateWorkflowExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceCreateWorkflowExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceCreateWorkflowExecutionScope, caller, latency, retErr) }() return p.persistence.CreateWorkflowExecution(ctx, request) } @@ -235,8 +239,9 @@ func (p *executionPersistenceClient) GetWorkflowExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetWorkflowExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetWorkflowExecutionScope, caller, latency, retErr) }() return p.persistence.GetWorkflowExecution(ctx, request) } @@ -248,8 +253,9 @@ func (p *executionPersistenceClient) SetWorkflowExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceSetWorkflowExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceSetWorkflowExecutionScope, caller, latency, retErr) }() return p.persistence.SetWorkflowExecution(ctx, request) } @@ -261,8 +267,9 @@ func (p *executionPersistenceClient) UpdateWorkflowExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateWorkflowExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateWorkflowExecutionScope, caller, latency, retErr) }() return p.persistence.UpdateWorkflowExecution(ctx, request) } @@ -274,8 +281,9 @@ func (p *executionPersistenceClient) ConflictResolveWorkflowExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceConflictResolveWorkflowExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceConflictResolveWorkflowExecutionScope, caller, latency, retErr) }() return p.persistence.ConflictResolveWorkflowExecution(ctx, request) } @@ -287,8 +295,9 @@ func (p *executionPersistenceClient) DeleteWorkflowExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteWorkflowExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteWorkflowExecutionScope, caller, latency, retErr) }() return p.persistence.DeleteWorkflowExecution(ctx, request) } @@ -300,8 +309,9 @@ func (p *executionPersistenceClient) DeleteCurrentWorkflowExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteCurrentWorkflowExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteCurrentWorkflowExecutionScope, caller, latency, retErr) }() return p.persistence.DeleteCurrentWorkflowExecution(ctx, request) } @@ -313,8 +323,9 @@ func (p *executionPersistenceClient) GetCurrentExecution( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetCurrentExecutionScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetCurrentExecutionScope, caller, latency, retErr) }() return p.persistence.GetCurrentExecution(ctx, request) } @@ -326,8 +337,9 @@ func (p *executionPersistenceClient) ListConcreteExecutions( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceListConcreteExecutionsScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceListConcreteExecutionsScope, caller, latency, retErr) }() return p.persistence.ListConcreteExecutions(ctx, request) } @@ -366,8 +378,9 @@ func (p *executionPersistenceClient) AddHistoryTasks( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceAddTasksScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceAddTasksScope, caller, latency, retErr) }() return p.persistence.AddHistoryTasks(ctx, request) } @@ -395,8 +408,9 @@ func (p *executionPersistenceClient) GetHistoryTasks( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(operation, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(operation, caller, latency, retErr) }() return p.persistence.GetHistoryTasks(ctx, request) } @@ -424,8 +438,9 @@ func (p *executionPersistenceClient) CompleteHistoryTask( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(operation, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(operation, caller, latency, retErr) }() return p.persistence.CompleteHistoryTask(ctx, request) } @@ -453,8 +468,9 @@ func (p *executionPersistenceClient) RangeCompleteHistoryTasks( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(operation, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(operation, caller, latency, retErr) }() return p.persistence.RangeCompleteHistoryTasks(ctx, request) } @@ -466,8 +482,9 @@ func (p *executionPersistenceClient) PutReplicationTaskToDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistencePutReplicationTaskToDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistencePutReplicationTaskToDLQScope, caller, latency, retErr) }() return p.persistence.PutReplicationTaskToDLQ(ctx, request) } @@ -479,8 +496,9 @@ func (p *executionPersistenceClient) GetReplicationTasksFromDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, latency, retErr) }() return p.persistence.GetReplicationTasksFromDLQ(ctx, request) } @@ -492,8 +510,9 @@ func (p *executionPersistenceClient) DeleteReplicationTaskFromDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteReplicationTaskFromDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteReplicationTaskFromDLQScope, caller, latency, retErr) }() return p.persistence.DeleteReplicationTaskFromDLQ(ctx, request) } @@ -505,8 +524,9 @@ func (p *executionPersistenceClient) RangeDeleteReplicationTaskFromDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceRangeDeleteReplicationTaskFromDLQScope, caller, latency, retErr) }() return p.persistence.RangeDeleteReplicationTaskFromDLQ(ctx, request) } @@ -518,8 +538,9 @@ func (p *executionPersistenceClient) IsReplicationDLQEmpty( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(request.ShardID, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(request.ShardID, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetReplicationTasksFromDLQScope, caller, latency, retErr) }() return p.persistence.IsReplicationDLQEmpty(ctx, request) } @@ -539,8 +560,9 @@ func (p *taskPersistenceClient) CreateTasks( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceCreateTasksScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceCreateTasksScope, caller, latency, retErr) }() return p.persistence.CreateTasks(ctx, request) } @@ -552,8 +574,9 @@ func (p *taskPersistenceClient) GetTasks( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetTasksScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetTasksScope, caller, latency, retErr) }() return p.persistence.GetTasks(ctx, request) } @@ -565,8 +588,9 @@ func (p *taskPersistenceClient) CompleteTask( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceCompleteTaskScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceCompleteTaskScope, caller, latency, retErr) }() return p.persistence.CompleteTask(ctx, request) } @@ -578,8 +602,9 @@ func (p *taskPersistenceClient) CompleteTasksLessThan( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceCompleteTasksLessThanScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceCompleteTasksLessThanScope, caller, latency, retErr) }() return p.persistence.CompleteTasksLessThan(ctx, request) } @@ -591,8 +616,9 @@ func (p *taskPersistenceClient) CreateTaskQueue( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceCreateTaskQueueScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceCreateTaskQueueScope, caller, latency, retErr) }() return p.persistence.CreateTaskQueue(ctx, request) } @@ -604,8 +630,9 @@ func (p *taskPersistenceClient) UpdateTaskQueue( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateTaskQueueScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateTaskQueueScope, caller, latency, retErr) }() return p.persistence.UpdateTaskQueue(ctx, request) } @@ -617,8 +644,9 @@ func (p *taskPersistenceClient) GetTaskQueue( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetTaskQueueScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetTaskQueueScope, caller, latency, retErr) }() return p.persistence.GetTaskQueue(ctx, request) } @@ -630,8 +658,9 @@ func (p *taskPersistenceClient) ListTaskQueue( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceListTaskQueueScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceListTaskQueueScope, caller, latency, retErr) }() return p.persistence.ListTaskQueue(ctx, request) } @@ -643,8 +672,9 @@ func (p *taskPersistenceClient) DeleteTaskQueue( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteTaskQueueScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteTaskQueueScope, caller, latency, retErr) }() return p.persistence.DeleteTaskQueue(ctx, request) } @@ -664,8 +694,9 @@ func (p *metadataPersistenceClient) CreateNamespace( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceCreateNamespaceScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceCreateNamespaceScope, caller, latency, retErr) }() return p.persistence.CreateNamespace(ctx, request) } @@ -677,8 +708,9 @@ func (p *metadataPersistenceClient) GetNamespace( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetNamespaceScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetNamespaceScope, caller, latency, retErr) }() return p.persistence.GetNamespace(ctx, request) } @@ -690,8 +722,9 @@ func (p *metadataPersistenceClient) UpdateNamespace( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateNamespaceScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateNamespaceScope, caller, latency, retErr) }() return p.persistence.UpdateNamespace(ctx, request) } @@ -703,8 +736,9 @@ func (p *metadataPersistenceClient) RenameNamespace( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceRenameNamespaceScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceRenameNamespaceScope, caller, latency, retErr) }() return p.persistence.RenameNamespace(ctx, request) } @@ -716,8 +750,9 @@ func (p *metadataPersistenceClient) DeleteNamespace( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceScope, caller, latency, retErr) }() return p.persistence.DeleteNamespace(ctx, request) } @@ -729,8 +764,9 @@ func (p *metadataPersistenceClient) DeleteNamespaceByName( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceByNameScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteNamespaceByNameScope, caller, latency, retErr) }() return p.persistence.DeleteNamespaceByName(ctx, request) } @@ -742,8 +778,9 @@ func (p *metadataPersistenceClient) ListNamespaces( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceListNamespacesScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceListNamespacesScope, caller, latency, retErr) }() return p.persistence.ListNamespaces(ctx, request) } @@ -754,8 +791,9 @@ func (p *metadataPersistenceClient) GetMetadata( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetMetadataScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetMetadataScope, caller, latency, retErr) }() return p.persistence.GetMetadata(ctx) } @@ -772,8 +810,9 @@ func (p *executionPersistenceClient) AppendHistoryNodes( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceAppendHistoryNodesScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceAppendHistoryNodesScope, caller, latency, retErr) }() return p.persistence.AppendHistoryNodes(ctx, request) } @@ -786,8 +825,9 @@ func (p *executionPersistenceClient) AppendRawHistoryNodes( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceAppendRawHistoryNodesScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceAppendRawHistoryNodesScope, caller, latency, retErr) }() return p.persistence.AppendRawHistoryNodes(ctx, request) } @@ -800,8 +840,9 @@ func (p *executionPersistenceClient) ReadHistoryBranch( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, latency, retErr) }() return p.persistence.ReadHistoryBranch(ctx, request) } @@ -813,8 +854,9 @@ func (p *executionPersistenceClient) ReadHistoryBranchReverse( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchReverseScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchReverseScope, caller, latency, retErr) }() return p.persistence.ReadHistoryBranchReverse(ctx, request) } @@ -827,8 +869,9 @@ func (p *executionPersistenceClient) ReadHistoryBranchByBatch( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceReadHistoryBranchScope, caller, latency, retErr) }() return p.persistence.ReadHistoryBranchByBatch(ctx, request) } @@ -841,8 +884,9 @@ func (p *executionPersistenceClient) ReadRawHistoryBranch( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceReadRawHistoryBranchScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceReadRawHistoryBranchScope, caller, latency, retErr) }() return p.persistence.ReadRawHistoryBranch(ctx, request) } @@ -855,8 +899,9 @@ func (p *executionPersistenceClient) ForkHistoryBranch( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceForkHistoryBranchScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceForkHistoryBranchScope, caller, latency, retErr) }() return p.persistence.ForkHistoryBranch(ctx, request) } @@ -869,8 +914,9 @@ func (p *executionPersistenceClient) DeleteHistoryBranch( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteHistoryBranchScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteHistoryBranchScope, caller, latency, retErr) }() return p.persistence.DeleteHistoryBranch(ctx, request) } @@ -883,8 +929,9 @@ func (p *executionPersistenceClient) TrimHistoryBranch( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceTrimHistoryBranchScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceTrimHistoryBranchScope, caller, latency, retErr) }() return p.persistence.TrimHistoryBranch(ctx, request) } @@ -896,8 +943,9 @@ func (p *executionPersistenceClient) GetAllHistoryTreeBranches( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetAllHistoryTreeBranchesScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetAllHistoryTreeBranchesScope, caller, latency, retErr) }() return p.persistence.GetAllHistoryTreeBranches(ctx, request) } @@ -910,8 +958,9 @@ func (p *executionPersistenceClient) GetHistoryTree( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetHistoryTreeScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetHistoryTreeScope, caller, latency, retErr) }() return p.persistence.GetHistoryTree(ctx, request) } @@ -930,8 +979,9 @@ func (p *queuePersistenceClient) EnqueueMessage( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceEnqueueMessageScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceEnqueueMessageScope, caller, latency, retErr) }() return p.persistence.EnqueueMessage(ctx, blob) } @@ -944,8 +994,9 @@ func (p *queuePersistenceClient) ReadMessages( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceReadQueueMessagesScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceReadQueueMessagesScope, caller, latency, retErr) }() return p.persistence.ReadMessages(ctx, lastMessageID, maxCount) } @@ -957,8 +1008,9 @@ func (p *queuePersistenceClient) UpdateAckLevel( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateAckLevelScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateAckLevelScope, caller, latency, retErr) }() return p.persistence.UpdateAckLevel(ctx, metadata) } @@ -969,8 +1021,9 @@ func (p *queuePersistenceClient) GetAckLevels( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetAckLevelScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetAckLevelScope, caller, latency, retErr) }() return p.persistence.GetAckLevels(ctx) } @@ -982,8 +1035,9 @@ func (p *queuePersistenceClient) DeleteMessagesBefore( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteMessagesBeforeScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteMessagesBeforeScope, caller, latency, retErr) }() return p.persistence.DeleteMessagesBefore(ctx, messageID) } @@ -995,8 +1049,9 @@ func (p *queuePersistenceClient) EnqueueMessageToDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceEnqueueMessageToDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceEnqueueMessageToDLQScope, caller, latency, retErr) }() return p.persistence.EnqueueMessageToDLQ(ctx, blob) } @@ -1011,8 +1066,9 @@ func (p *queuePersistenceClient) ReadMessagesFromDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceReadMessagesFromDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceReadMessagesFromDLQScope, caller, latency, retErr) }() return p.persistence.ReadMessagesFromDLQ(ctx, firstMessageID, lastMessageID, pageSize, pageToken) } @@ -1024,8 +1080,9 @@ func (p *queuePersistenceClient) DeleteMessageFromDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteMessageFromDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteMessageFromDLQScope, caller, latency, retErr) }() return p.persistence.DeleteMessageFromDLQ(ctx, messageID) } @@ -1038,8 +1095,9 @@ func (p *queuePersistenceClient) RangeDeleteMessagesFromDLQ( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceRangeDeleteMessagesFromDLQScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceRangeDeleteMessagesFromDLQScope, caller, latency, retErr) }() return p.persistence.RangeDeleteMessagesFromDLQ(ctx, firstMessageID, lastMessageID) } @@ -1051,8 +1109,9 @@ func (p *queuePersistenceClient) UpdateDLQAckLevel( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceUpdateDLQAckLevelScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceUpdateDLQAckLevelScope, caller, latency, retErr) }() return p.persistence.UpdateDLQAckLevel(ctx, metadata) } @@ -1063,8 +1122,9 @@ func (p *queuePersistenceClient) GetDLQAckLevels( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetDLQAckLevelScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetDLQAckLevelScope, caller, latency, retErr) }() return p.persistence.GetDLQAckLevels(ctx) } @@ -1084,8 +1144,9 @@ func (p *clusterMetadataPersistenceClient) ListClusterMetadata( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceListClusterMetadataScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceListClusterMetadataScope, caller, latency, retErr) }() return p.persistence.ListClusterMetadata(ctx, request) } @@ -1096,8 +1157,9 @@ func (p *clusterMetadataPersistenceClient) GetCurrentClusterMetadata( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetCurrentClusterMetadataScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetCurrentClusterMetadataScope, caller, latency, retErr) }() return p.persistence.GetCurrentClusterMetadata(ctx) } @@ -1109,8 +1171,9 @@ func (p *clusterMetadataPersistenceClient) GetClusterMetadata( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetClusterMetadataScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetClusterMetadataScope, caller, latency, retErr) }() return p.persistence.GetClusterMetadata(ctx, request) } @@ -1122,8 +1185,9 @@ func (p *clusterMetadataPersistenceClient) SaveClusterMetadata( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceSaveClusterMetadataScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceSaveClusterMetadataScope, caller, latency, retErr) }() return p.persistence.SaveClusterMetadata(ctx, request) } @@ -1135,8 +1199,9 @@ func (p *clusterMetadataPersistenceClient) DeleteClusterMetadata( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceDeleteClusterMetadataScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceDeleteClusterMetadataScope, caller, latency, retErr) }() return p.persistence.DeleteClusterMetadata(ctx, request) } @@ -1152,8 +1217,9 @@ func (p *clusterMetadataPersistenceClient) GetClusterMembers( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceGetClusterMembersScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceGetClusterMembersScope, caller, latency, retErr) }() return p.persistence.GetClusterMembers(ctx, request) } @@ -1165,8 +1231,9 @@ func (p *clusterMetadataPersistenceClient) UpsertClusterMembership( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceUpsertClusterMembershipScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceUpsertClusterMembershipScope, caller, latency, retErr) }() return p.persistence.UpsertClusterMembership(ctx, request) } @@ -1178,8 +1245,9 @@ func (p *clusterMetadataPersistenceClient) PruneClusterMembership( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistencePruneClusterMembershipScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistencePruneClusterMembershipScope, caller, latency, retErr) }() return p.persistence.PruneClusterMembership(ctx, request) } @@ -1191,16 +1259,17 @@ func (p *metadataPersistenceClient) InitializeSystemNamespaces( caller := headers.GetCallerInfo(ctx).CallerName startTime := time.Now().UTC() defer func() { - p.healthSignals.Record(CallerSegmentMissing, time.Since(startTime), retErr) - p.recordRequestMetrics(metrics.PersistenceInitializeSystemNamespaceScope, caller, startTime, retErr) + latency := time.Since(startTime) + p.healthSignals.Record(CallerSegmentMissing, latency, retErr) + p.recordRequestMetrics(metrics.PersistenceInitializeSystemNamespaceScope, caller, latency, retErr) }() return p.persistence.InitializeSystemNamespaces(ctx, currentClusterName) } -func (p *metricEmitter) recordRequestMetrics(operation string, caller string, startTime time.Time, err error) { +func (p *metricEmitter) recordRequestMetrics(operation string, caller string, latency time.Duration, err error) { handler := p.metricsHandler.WithTags(metrics.OperationTag(operation), metrics.NamespaceTag(caller)) handler.Counter(metrics.PersistenceRequests.GetMetricName()).Record(1) - handler.Timer(metrics.PersistenceLatency.GetMetricName()).Record(time.Since(startTime)) + handler.Timer(metrics.PersistenceLatency.GetMetricName()).Record(latency) updateErrorMetric(handler, p.logger, operation, err) }