Skip to content

Commit

Permalink
Minor metrics / log improvement for global ratelimiter (#6259)
Browse files Browse the repository at this point in the history
Ran into some issues building a dashboard with these, as the collecton's `worker_domain_name` value does not automatically relate to everything else's `domain-name` value, and addressing that is... sadly difficult in our monitoring systems.

Regardless, I think using local keys is probably better.  We already have the collection name in logs and metrics, and this way you can search for the domain name and find all of these at once (easier for browsing randomly) and it's no harder to narrow down either (just also search by collection).
  • Loading branch information
Groxx authored Aug 29, 2024
1 parent 1b02d78 commit 4b76e5b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
8 changes: 5 additions & 3 deletions common/metrics/tags.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,12 @@ func AsyncWFRequestTypeTag(value string) Tag {
return metricWithUnknown(asyncWFRequestType, value)
}

// GlobalRatelimiterKeyTag reports the full (global) ratelimit key being used, e.g. "user:domain-x",
// though the value will be sanitized and may appear as "user_domain_x" or similar.
// GlobalRatelimiterKeyTag reports the local ratelimit key being used, e.g. "domain-x".
// This will likely be ambiguous if it is not combined with the collection name,
// but keeping this untouched helps keep the values template-friendly and correlate-able
// in metrics dashboards and queries.
func GlobalRatelimiterKeyTag(value string) Tag {
return simpleMetric{key: globalRatelimitKey, value: sanitizer.Value(value)}
return simpleMetric{key: globalRatelimitKey, value: value}
}

// GlobalRatelimiterTypeTag reports the "limit usage type" being reported, e.g. global vs local
Expand Down
14 changes: 7 additions & 7 deletions common/quotas/global/collection/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,14 +351,14 @@ func (c *Collection) backgroundUpdateLoop() {
if counts.Idle > gcAfterIdle || c.shouldDeleteKey(mode, true) {
c.logger.Debug(
"deleting local ratelimiter",
tag.GlobalRatelimiterKey(string(gkey)),
tag.GlobalRatelimiterKey(string(k)),
tag.GlobalRatelimiterIdleCount(counts.Idle),
)
c.local.Delete(k)
return true // continue iterating, possibly delete others too
}

c.sendMetrics(gkey, k, true, mode, counts)
c.sendMetrics(k, true, mode, counts)
return true
})
}()
Expand All @@ -374,7 +374,7 @@ func (c *Collection) backgroundUpdateLoop() {
if counts.Idle > gcAfterIdle || c.shouldDeleteKey(mode, false) {
c.logger.Debug(
"deleting global ratelimiter",
tag.GlobalRatelimiterKey(string(gkey)),
tag.GlobalRatelimiterKey(string(k)),
tag.GlobalRatelimiterIdleCount(counts.Idle),
)
c.global.Delete(k)
Expand All @@ -394,7 +394,7 @@ func (c *Collection) backgroundUpdateLoop() {
Allowed: counts.Allowed,
Rejected: counts.Rejected,
}
c.sendMetrics(gkey, k, false, mode, counts)
c.sendMetrics(k, false, mode, counts)

return true
})
Expand All @@ -419,13 +419,13 @@ func (c *Collection) backgroundUpdateLoop() {
}
}

func (c *Collection) sendMetrics(gkey shared.GlobalKey, lkey shared.LocalKey, isLocalLimiter bool, mode keyMode, usage internal.UsageMetrics) {
func (c *Collection) sendMetrics(lkey shared.LocalKey, isLocalLimiter bool, mode keyMode, usage internal.UsageMetrics) {
// emit quota information to make monitoring easier.
// regrettably this will only be emitted when the key is (recently) in use, but
// for active users this is probably sufficient. other cases will probably need
// a continual "emit all quotas" loop somewhere.
c.scope.
Tagged(metrics.GlobalRatelimiterKeyTag(string(gkey))).
Tagged(metrics.GlobalRatelimiterKeyTag(string(lkey))).
UpdateGauge(metrics.GlobalRatelimiterQuota, float64(c.targetRPS(lkey)))

limitType := "global"
Expand All @@ -435,7 +435,7 @@ func (c *Collection) sendMetrics(gkey shared.GlobalKey, lkey shared.LocalKey, is
limitTypeIsPrimary := isLocalLimiter && mode.isLocalPrimary() || !isLocalLimiter && mode.isGlobalPrimary()

scope := c.scope.Tagged(
metrics.GlobalRatelimiterKeyTag(string(gkey)),
metrics.GlobalRatelimiterKeyTag(string(lkey)),
metrics.GlobalRatelimiterTypeTag(limitType),
// useful for being able to tell when a key is "in use" or not, e.g. for monitoring purposes
metrics.GlobalRatelimiterIsPrimary(limitTypeIsPrimary),
Expand Down

0 comments on commit 4b76e5b

Please sign in to comment.