From 6f612e06ac9842c0010a6496e120dbeacf47c6d0 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 5 Jul 2021 13:45:27 +0200 Subject: [PATCH] Added 'Per route p99 latency' to ruler configuration API Signed-off-by: Marco Pracucci --- CHANGELOG.md | 1 + cortex-mixin/dashboards/ruler.libsonnet | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30a2a72d..2c386591 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ * [ENHANCEMENT] Dashboards: defined container functions for common resources panels: containerDiskWritesPanel, containerDiskReadsPanel, containerDiskSpaceUtilization. #331 * [ENHANCEMENT] cortex-mixin: Added `alert_excluded_routes` config to exclude specific routes from alerts. #338 * [ENHANCEMENT] Added `CortexMemcachedRequestErrors` alert. #346 +* [ENHANCEMENT] Ruler dashboard: added "Per route p99 latency" panel in the "Configuration API" row. #353 * [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308 * [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329 * [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335 diff --git a/cortex-mixin/dashboards/ruler.libsonnet b/cortex-mixin/dashboards/ruler.libsonnet index b9347f7f..070a80a9 100644 --- a/cortex-mixin/dashboards/ruler.libsonnet +++ b/cortex-mixin/dashboards/ruler.libsonnet @@ -1,6 +1,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; (import 'dashboard-utils.libsonnet') { + local ruler_config_api_routes_re = 'api_prom_rules.*|api_prom_api_v1_(rules|alerts)', rulerQueries+:: { ruleEvaluations: { @@ -106,11 +107,19 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.row('Configuration API (gateway)') .addPanel( $.panel('QPS') + - $.qpsPanel('cortex_request_duration_seconds_count{%s, route=~"api_prom_rules.*|api_prom_api_v1_(rules|alerts)"}' % $.jobMatcher($._config.job_names.gateway)) + $.qpsPanel('cortex_request_duration_seconds_count{%s, route=~"%s"}' % [$.jobMatcher($._config.job_names.gateway), ruler_config_api_routes_re]) ) .addPanel( $.panel('Latency') + - utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', 'api_prom_rules.*|api_prom_api_v1_(rules|alerts)')]) + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', ruler_config_api_routes_re)]) + ) + .addPanel( + $.panel('Per route p99 Latency') + + $.queryPanel( + 'histogram_quantile(0.99, sum by (route, le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{%s, route=~"%s"}))' % [$.jobMatcher($._config.job_names.gateway), ruler_config_api_routes_re], + '{{ route }}' + ) + + { yaxes: $.yaxes('s') } ) ) .addRow(