Skip to content

Commit

Permalink
Ingester: add cortex_ingester_circuit_breaker_request_timeouts_total …
Browse files Browse the repository at this point in the history
…metric (#8446)

* Ingester: add cortex_ingester_circuit_breaker_request_timeouts_total metric

Signed-off-by: Yuri Nikolic <[email protected]>

* Fixing review findings

Signed-off-by: Yuri Nikolic <[email protected]>

---------

Signed-off-by: Yuri Nikolic <[email protected]>
(cherry picked from commit c279dc5)
  • Loading branch information
duricanikolic authored and grafanabot committed Jun 21, 2024
1 parent c7bffb1 commit f2acad2
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 47 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
* [FEATURE] mimirtool: Add `runtime-config verify` sub-command, for verifying Mimir runtime config files. #8123
* [FEATURE] Query-frontend, querier: new experimental `/cardinality/active_native_histogram_metrics` API to get active native histogram metric names with statistics about active native histogram buckets. #7982 #7986 #8008
* [FEATURE] Alertmanager: Added `-alertmanager.max-silences-count` and `-alertmanager.max-silence-size-bytes` to set limits on per tenant silences. Disabled by default. #6898
* [FEATURE] Ingester: add experimental support for the server-side circuit breakers when writing to and reading from ingesters. This can be enabled using `-ingester.push-circuit-breaker.enabled` and `-ingester.read-circuit-breaker.enabled` options. Further `-ingester.push-circuit-breaker.*` and `-ingester.read-circuit-breaker.*` options for configuring circuit-breaker are available. Added metrics `cortex_ingester_circuit_breaker_results_total`, `cortex_ingester_circuit_breaker_transitions_total` and `cortex_ingester_circuit_breaker_current_state`. #8180 #8285 #8315
* [FEATURE] Ingester: add experimental support for the server-side circuit breakers when writing to and reading from ingesters. This can be enabled using `-ingester.push-circuit-breaker.enabled` and `-ingester.read-circuit-breaker.enabled` options. Further `-ingester.push-circuit-breaker.*` and `-ingester.read-circuit-breaker.*` options for configuring circuit-breaker are available. Added metrics `cortex_ingester_circuit_breaker_results_total`, `cortex_ingester_circuit_breaker_transitions_total`, `cortex_ingester_circuit_breaker_current_state` and `cortex_ingester_circuit_breaker_request_timeouts_total`. #8180 #8285 #8315 #8446
* [FEATURE] Distributor, ingester: add new setting `-validation.past-grace-period` to limit how old (based on the wall clock minus OOO window) the ingested samples can be. The default 0 value disables this limit. #8262
* [ENHANCEMENT] Distributor: add metrics `cortex_distributor_samples_per_request` and `cortex_distributor_exemplars_per_request` to track samples/exemplars per request. #8265
* [ENHANCEMENT] Reduced memory allocations in functions used to propagate contextual information between gRPC calls. #7529
Expand Down
46 changes: 31 additions & 15 deletions pkg/ingester/circuitbreaker.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ const (
)

type circuitBreakerMetrics struct {
circuitBreakerTransitions *prometheus.CounterVec
circuitBreakerResults *prometheus.CounterVec
circuitBreakerTransitions *prometheus.CounterVec
circuitBreakerResults *prometheus.CounterVec
circuitBreakerRequestTimeouts prometheus.Counter
}

func newCircuitBreakerMetrics(r prometheus.Registerer, currentState func() circuitbreaker.State, requestType string) *circuitBreakerMetrics {
Expand All @@ -48,6 +49,11 @@ func newCircuitBreakerMetrics(r prometheus.Registerer, currentState func() circu
Help: "Results of executing requests via the circuit breaker.",
ConstLabels: map[string]string{circuitBreakerRequestTypeLabel: requestType},
}, []string{"result"}),
circuitBreakerRequestTimeouts: promauto.With(r).NewCounter(prometheus.CounterOpts{
Name: "cortex_ingester_circuit_breaker_request_timeouts_total",
Help: "Number of times the circuit breaker recorded a request that reached timeout.",
ConstLabels: map[string]string{circuitBreakerRequestTypeLabel: requestType},
}),
}
circuitBreakerCurrentStateGauge := func(state circuitbreaker.State) prometheus.GaugeFunc {
return promauto.With(r).NewGaugeFunc(prometheus.GaugeOpts{
Expand Down Expand Up @@ -154,7 +160,16 @@ func newCircuitBreaker(cfg CircuitBreakerConfig, registerer prometheus.Registere
return &cb
}

func isCircuitBreakerFailure(err error) bool {
func isDeadlineExceeded(err error) bool {
if errors.Is(err, context.DeadlineExceeded) {
return true
}

statusCode := grpcutil.ErrorToStatusCode(err)
return statusCode == codes.DeadlineExceeded
}

func (cb *circuitBreaker) tryRecordFailure(err error) bool {
if err == nil {
return false
}
Expand All @@ -163,20 +178,23 @@ func isCircuitBreakerFailure(err error) bool {
// to be errors worthy of tripping the circuit breaker since these
// are specific to a particular ingester, not a user or request.

if errors.Is(err, context.DeadlineExceeded) {
return true
isFailure := false
if isDeadlineExceeded(err) {
cb.metrics.circuitBreakerRequestTimeouts.Inc()
isFailure = true
} else {
var ingesterErr ingesterError
if errors.As(err, &ingesterErr) {
isFailure = ingesterErr.errorCause() == mimirpb.INSTANCE_LIMIT
}
}

statusCode := grpcutil.ErrorToStatusCode(err)
if statusCode == codes.DeadlineExceeded {
if isFailure {
cb.cb.RecordFailure()
cb.metrics.circuitBreakerResults.WithLabelValues(circuitBreakerResultError).Inc()
return true
}

var ingesterErr ingesterError
if errors.As(err, &ingesterErr) {
return ingesterErr.errorCause() == mimirpb.INSTANCE_LIMIT
}

return false
}

Expand Down Expand Up @@ -245,9 +263,7 @@ func (cb *circuitBreaker) recordResult(errs ...error) error {
}

for _, err := range errs {
if err != nil && isCircuitBreakerFailure(err) {
cb.cb.RecordFailure()
cb.metrics.circuitBreakerResults.WithLabelValues(circuitBreakerResultError).Inc()
if cb.tryRecordFailure(err) {
return err
}
}
Expand Down
Loading

0 comments on commit f2acad2

Please sign in to comment.