From ea7660cb89d20a3a2d8f1535f06c1dcf022c24ea Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Tue, 21 Jun 2022 11:18:35 +0200 Subject: [PATCH 1/3] Fix GossipMembersMismatch misfiring with remote ruler eval The alert doesn't take into account the queries that now have ruler- prepended to their name. This PR fixes it Signed-off-by: Dimitar Dimitrov --- operations/mimir-mixin-compiled/alerts.yaml | 2 +- operations/mimir-mixin/config.libsonnet | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/mimir-mixin-compiled/alerts.yaml b/operations/mimir-mixin-compiled/alerts.yaml index 28381bfe5a8..879a027a560 100644 --- a/operations/mimir-mixin-compiled/alerts.yaml +++ b/operations/mimir-mixin-compiled/alerts.yaml @@ -352,7 +352,7 @@ groups: message: Mimir instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} sees incorrect number of gossip members. expr: | - avg by (cluster, namespace) (memberlist_client_cluster_members_count) != sum by (cluster, namespace) (up{job=~".+/(alertmanager|compactor|distributor|ingester.*|querier.*|ruler|store-gateway.*|cortex|mimir)"}) + avg by (cluster, namespace) (memberlist_client_cluster_members_count) != sum by (cluster, namespace) (up{job=~".+/(alertmanager|compactor|distributor|ingester.*|(ruler-)?querier.*|ruler|store-gateway.*|cortex|mimir)"}) for: 15m labels: severity: warning diff --git a/operations/mimir-mixin/config.libsonnet b/operations/mimir-mixin/config.libsonnet index 11e28d6a0a5..8f49e69a248 100644 --- a/operations/mimir-mixin/config.libsonnet +++ b/operations/mimir-mixin/config.libsonnet @@ -30,7 +30,7 @@ ruler_query_frontend: '(ruler-query-frontend.*)', // Match also custom ruler-query-frontend deployments. query_scheduler: 'query-scheduler.*', // Not part of single-binary. Match also custom query-scheduler deployments. ruler_query_scheduler: 'ruler-query-scheduler.*', // Not part of single-binary. Match also custom query-scheduler deployments. - ring_members: ['alertmanager', 'compactor', 'distributor', 'ingester.*', 'querier.*', 'ruler', 'store-gateway.*', 'cortex', 'mimir'], + ring_members: ['alertmanager', 'compactor', 'distributor', 'ingester.*', '(ruler-)?querier.*', 'ruler', 'store-gateway.*', 'cortex', 'mimir'], store_gateway: '(store-gateway.*|cortex|mimir)', // Match also per-zone store-gateway deployments. gateway: '(gateway|cortex-gw|cortex-gw-internal)', compactor: 'compactor.*|cortex|mimir', // Match also custom compactor deployments. From 7a25f7df444728796587a13368c7c5e8b4cc6606 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Tue, 21 Jun 2022 12:16:06 +0200 Subject: [PATCH 2/3] Simpler regexes --- operations/mimir-mixin/config.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/mimir-mixin/config.libsonnet b/operations/mimir-mixin/config.libsonnet index 8f49e69a248..8b2d40c6998 100644 --- a/operations/mimir-mixin/config.libsonnet +++ b/operations/mimir-mixin/config.libsonnet @@ -30,7 +30,7 @@ ruler_query_frontend: '(ruler-query-frontend.*)', // Match also custom ruler-query-frontend deployments. query_scheduler: 'query-scheduler.*', // Not part of single-binary. Match also custom query-scheduler deployments. ruler_query_scheduler: 'ruler-query-scheduler.*', // Not part of single-binary. Match also custom query-scheduler deployments. - ring_members: ['alertmanager', 'compactor', 'distributor', 'ingester.*', '(ruler-)?querier.*', 'ruler', 'store-gateway.*', 'cortex', 'mimir'], + ring_members: ['alertmanager', 'compactor', 'distributor', 'ingester.*', 'querier.*', 'ruler', 'ruler-querier.*', 'store-gateway.*', 'cortex', 'mimir'], store_gateway: '(store-gateway.*|cortex|mimir)', // Match also per-zone store-gateway deployments. gateway: '(gateway|cortex-gw|cortex-gw-internal)', compactor: 'compactor.*|cortex|mimir', // Match also custom compactor deployments. From 07fdc9cf34ba93f8eb33ab9707719c88b2f01063 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Tue, 21 Jun 2022 12:31:11 +0200 Subject: [PATCH 3/3] Update computed mixin --- operations/mimir-mixin-compiled/alerts.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/mimir-mixin-compiled/alerts.yaml b/operations/mimir-mixin-compiled/alerts.yaml index 879a027a560..46b42cdfb3d 100644 --- a/operations/mimir-mixin-compiled/alerts.yaml +++ b/operations/mimir-mixin-compiled/alerts.yaml @@ -352,7 +352,7 @@ groups: message: Mimir instance {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} sees incorrect number of gossip members. expr: | - avg by (cluster, namespace) (memberlist_client_cluster_members_count) != sum by (cluster, namespace) (up{job=~".+/(alertmanager|compactor|distributor|ingester.*|(ruler-)?querier.*|ruler|store-gateway.*|cortex|mimir)"}) + avg by (cluster, namespace) (memberlist_client_cluster_members_count) != sum by (cluster, namespace) (up{job=~".+/(alertmanager|compactor|distributor|ingester.*|querier.*|ruler|ruler-querier.*|store-gateway.*|cortex|mimir)"}) for: 15m labels: severity: warning