From 96b862fcea3c726d54a5968070164ad63515d9be Mon Sep 17 00:00:00 2001 From: Javier Palomo Date: Mon, 31 May 2021 14:11:14 +0200 Subject: [PATCH 1/3] cortex-mixin: Use kube_pod_container_resource_{requests,limits} metrics This updates the recording rules to make them compatible with kube-state-metrics v2.0.0 which introduces some breaking changes in some metric names. With kube-state-metrics v2.0.0: - `kube_pod_container_resource_requests_cpu_cores` becomes `kube_pod_container_resource_requests{resource="cpu"}` - `kube_pod_container_resource_requests_memory_bytes` becomes `kube_pod_container_resource_requests{resource="memory"}` --- cortex-mixin/recording_rules.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cortex-mixin/recording_rules.libsonnet b/cortex-mixin/recording_rules.libsonnet index f9804744..7abec55d 100644 --- a/cortex-mixin/recording_rules.libsonnet +++ b/cortex-mixin/recording_rules.libsonnet @@ -215,7 +215,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; expr: ||| sum by (cluster, namespace, deployment) ( label_replace( - kube_pod_container_resource_requests_cpu_cores, + kube_pod_container_resource_requests{resource="cpu"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" ) ) @@ -257,7 +257,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; expr: ||| sum by (cluster, namespace, deployment) ( label_replace( - kube_pod_container_resource_requests_memory_bytes, + kube_pod_container_resource_requests{resource="memory"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" ) ) From d6dec26edebb97b8736a59f03348ec4e864c2e92 Mon Sep 17 00:00:00 2001 From: Javier Palomo Date: Mon, 31 May 2021 17:12:07 +0200 Subject: [PATCH 2/3] cortex-mixin: Make the recording rules backwards compatible --- cortex-mixin/recording_rules.libsonnet | 56 ++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/cortex-mixin/recording_rules.libsonnet b/cortex-mixin/recording_rules.libsonnet index 7abec55d..4a50eae2 100644 --- a/cortex-mixin/recording_rules.libsonnet +++ b/cortex-mixin/recording_rules.libsonnet @@ -213,10 +213,30 @@ local utils = import 'mixin-utils/utils.libsonnet'; // Convenience rule to get the CPU request for both a deployment and a statefulset. record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum', expr: ||| - sum by (cluster, namespace, deployment) ( - label_replace( - kube_pod_container_resource_requests{resource="cpu"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_cpu_cores was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + kube_pod_container_resource_requests_cpu_cores, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + kube_pod_container_resource_requests{resource="cpu"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ) ) ) |||, @@ -255,10 +275,30 @@ local utils = import 'mixin-utils/utils.libsonnet'; // Convenience rule to get the Memory request for both a deployment and a statefulset. record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum', expr: ||| - sum by (cluster, namespace, deployment) ( - label_replace( - kube_pod_container_resource_requests{resource="memory"}, - "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + # This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2 + # that remove resource metrics, ref: + # - https://github.com/kubernetes/kube-state-metrics/blob/master/CHANGELOG.md#v200-alpha--2020-09-16 + # - https://github.com/kubernetes/kube-state-metrics/pull/1004 + # + # This is the old expression, compatible with kube-state-metrics < v2.0.0, + # where kube_pod_container_resource_requests_memory_bytes was removed: + ( + sum by (cluster, namespace, deployment) ( + label_replace( + kube_pod_container_resource_requests_memory_bytes, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ) + ) + ) + or + # This expression is compatible with kube-state-metrics >= v1.4.0, + # where kube_pod_container_resource_requests was introduced. + ( + sum by (cluster, namespace, deployment) ( + label_replace( + kube_pod_container_resource_requests{resource="memory"}, + "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))" + ) ) ) |||, From d1f41871cf1b9d031a1bc0f7c14c3189f045cfc7 Mon Sep 17 00:00:00 2001 From: Javier Palomo Date: Mon, 31 May 2021 17:21:47 +0200 Subject: [PATCH 3/3] Update CHANGELOG with https://github.com/grafana/cortex-jsonnet/pull/317 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc055f18..2e6e4479 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * [CHANGE] `namespace` template variable in dashboards now only selects namespaces for selected clusters. #311 * [CHANGE] Alertmanager: mounted overrides configmap to alertmanager too. #315 * [CHANGE] Memcached: upgraded memcached from `1.5.17` to `1.6.9`. #316 +* [ENHANCEMENT] cortex-mixin: Make `cluster_namespace_deployment:kube_pod_container_resource_requests_{cpu_cores,memory_bytes}:sum` backwards compatible with `kube-state-metrics` v2.0.0. #317 * [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308 ## 1.9.0 / 2021-05-18