Skip to content

Commit

Permalink
Add recording rules for Alertmanager notification rate panels.
Browse files Browse the repository at this point in the history
With large numbers of tenants the queries for these panels can become quite
slow as the metrics exposed are per-tenant.
  • Loading branch information
stevesg committed Sep 2, 2021
1 parent 2c00155 commit af439cf
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
12 changes: 6 additions & 6 deletions cortex-mixin/dashboards/alertmanager.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ local utils = import 'mixin-utils/utils.libsonnet';
$.queryPanel(
[
|||
sum(rate(cortex_alertmanager_notifications_total{%s}[$__rate_interval]))
sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate1m{%s})
-
sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__rate_interval]))
sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate1m{%s})
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
'sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager'),
'sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate1m{%s})' % $.jobMatcher('alertmanager'),
],
['success', 'failed']
)
Expand All @@ -61,13 +61,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
[
|||
(
sum(rate(cortex_alertmanager_notifications_total{%s}[$__rate_interval])) by(integration)
sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate1m{%s}) by(integration)
-
sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__rate_interval])) by(integration)
sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate1m{%s}) by(integration)
) > 0
or on () vector(0)
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
'sum(rate(cortex_alertmanager_notifications_failed_total{%s}[$__rate_interval])) by(integration)' % $.jobMatcher('alertmanager'),
'sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate1m{%s}) by(integration)' % $.jobMatcher('alertmanager'),
],
['success - {{ integration }}', 'failed - {{ integration }}']
)
Expand Down
13 changes: 13 additions & 0 deletions cortex-mixin/recording_rules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,19 @@ local utils = import 'mixin-utils/utils.libsonnet';
)
|||,
},
// Aggregations of per-user Alertmanager metrics used in dashboards.
{
record: 'cluster_job_integration:cortex_alertmanager_notifications_total:rate1m',
expr: |||
sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_total[1m]))
|||,
},
{
record: 'cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate1m',
expr: |||
sum by (cluster, job, integration) (rate(cortex_alertmanager_notifications_failed_total[1m]))
|||,
},
],
},
],
Expand Down

0 comments on commit af439cf

Please sign in to comment.