From e5a38dba9ec5d20d1099b2ebfa580f54f324b65a Mon Sep 17 00:00:00 2001 From: Darren Janeczek Date: Wed, 16 Jun 2021 19:01:36 -0400 Subject: [PATCH 1/3] refactor: resources dashboard comtainer functions added: - containerDiskWritesPanel - containerDiskReadsPanel - containerDiskSpaceUtilization --- CHANGELOG.md | 1 + .../alertmanager-resources.libsonnet | 20 +--- .../dashboards/compactor-resources.libsonnet | 28 ++---- .../dashboards/dashboard-utils.libsonnet | 96 +++++++++++++------ .../dashboards/reads-resources.libsonnet | 20 +--- .../dashboards/writes-resources.libsonnet | 20 +--- 6 files changed, 86 insertions(+), 99 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3eebd056..47a64a96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ * [CHANGE] Increased `CortexBadRuntimeConfig` alert severity to `critical` and removed support for `cortex_overrides_last_reload_successful` metric (was removed in Cortex 1.3.0). #335 * [ENHANCEMENT] cortex-mixin: Make `cluster_namespace_deployment:kube_pod_container_resource_requests_{cpu_cores,memory_bytes}:sum` backwards compatible with `kube-state-metrics` v2.0.0. #317 * [ENHANCEMENT] Added documentation text panels and descriptions to reads and writes dashboards. #324 +* [ENHANCEMENT] Dashboards: defined container functions for common resources panels: containerDiskWritesPanel, containerDiskReadsPanel, containerDiskSpaceUtilization. #331 * [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308 * [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329 * [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335 diff --git a/cortex-mixin/dashboards/alertmanager-resources.libsonnet b/cortex-mixin/dashboards/alertmanager-resources.libsonnet index 4c67c161..8a719d52 100644 --- a/cortex-mixin/dashboards/alertmanager-resources.libsonnet +++ b/cortex-mixin/dashboards/alertmanager-resources.libsonnet @@ -52,30 +52,16 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Disk') .addPanel( - $.panel('Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskWritesPanel('Writes', 'alertmanager'), ) .addPanel( - $.panel('Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskReadsPanel('Reads', 'alertmanager'), ) ) .addRow( $.row('') .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name="alertmanager"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskSpaceUtilization('Disk Space Utilization', 'alertmanager'), ) ), } diff --git a/cortex-mixin/dashboards/compactor-resources.libsonnet b/cortex-mixin/dashboards/compactor-resources.libsonnet index 79a7ac03..82a6bce4 100644 --- a/cortex-mixin/dashboards/compactor-resources.libsonnet +++ b/cortex-mixin/dashboards/compactor-resources.libsonnet @@ -28,27 +28,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Disk') .addPanel( - $.panel('Disk Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('compactor')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, - ) - .addPanel( - $.panel('Disk Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('compactor')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, - ) - .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name="compactor"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskWritesPanel('Disk Writes', 'compactor'), + ) + .addPanel( + $.containerDiskReadsPanel('Disk Reads', 'compactor'), + ) + .addPanel( + $.containerDiskSpaceUtilization('Disk Space Utilization', 'compactor'), ) ) + { templating+: { diff --git a/cortex-mixin/dashboards/dashboard-utils.libsonnet b/cortex-mixin/dashboards/dashboard-utils.libsonnet index c54ae659..0b11db1c 100644 --- a/cortex-mixin/dashboards/dashboard-utils.libsonnet +++ b/cortex-mixin/dashboards/dashboard-utils.libsonnet @@ -198,6 +198,73 @@ local utils = import 'mixin-utils/utils.libsonnet'; containerNetworkTransmitBytesPanel(instanceName):: $.containerNetworkPanel('Transmit Bandwidth', 'container_network_transmit_bytes_total', instanceName), + containerDiskWritesPanel(title, containerName):: + $.panel(title) + + $.queryPanel( + ||| + sum by(%s, %s, device) ( + rate( + node_disk_written_bytes_total[$__rate_interval] + ) + ) + + + %s + ||| % [ + $._config.per_node_label, + $._config.per_instance_label, + $.filterNodeDiskContainer(containerName), + ], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.stack + + { yaxes: $.yaxes('Bps') }, + + containerDiskReadsPanel(title, containerName):: + $.panel(title) + + $.queryPanel( + ||| + sum by(%s, %s, device) ( + rate( + node_disk_read_bytes_total[$__rate_interval] + ) + ) + %s + ||| % [ + $._config.per_node_label, + $._config.per_instance_label, + $.filterNodeDiskContainer(containerName), + ], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.stack + + { yaxes: $.yaxes('Bps') }, + + containerDiskSpaceUtilization(title, containerName):: + $.panel(title) + + $.queryPanel( + ||| + max by(persistentvolumeclaim) ( + kubelet_volume_stats_used_bytes{%(namespace)s} / + kubelet_volume_stats_capacity_bytes{%(namespace)s} + ) + and + count by(persistentvolumeclaim) ( + kube_persistentvolumeclaim_labels{ + %(namespace)s, + %(label)s + } + ) + ||| % { + namespace: $.namespaceMatcher(), + label: $.containerLabelMatcher(containerName), + }, '{{persistentvolumeclaim}}' + ) + + { yaxes: $.yaxes('percentunit') }, + + containerLabelMatcher(containerName):: + if containerName == 'ingester' + then 'label_name=~"ingester.*"' + else 'label_name="%s"' % containerName, + goHeapInUsePanel(title, jobName):: $.panel(title) + $.queryPanel( @@ -402,33 +469,8 @@ local utils = import 'mixin-utils/utils.libsonnet'; filterNodeDiskContainer(containerName):: ||| - ignoring(%s) group_right() ( - label_replace( - count by( - %s, - %s, - device - ) - ( - container_fs_writes_bytes_total{ - %s, - container="%s", - device!~".*sda.*" - } - ), - "device", - "$1", - "device", - "/dev/(.*)" - ) * 0 - ) - ||| % [ - $._config.per_instance_label, - $._config.per_node_label, - $._config.per_instance_label, - $.namespaceMatcher(), - containerName, - ], + ignoring(%s) group_right() (label_replace(count by(%s, %s, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0) + ||| % [$._config.per_instance_label, $._config.per_node_label, $._config.per_instance_label, $.namespaceMatcher(), containerName], panelDescription(title, description):: { description: ||| diff --git a/cortex-mixin/dashboards/reads-resources.libsonnet b/cortex-mixin/dashboards/reads-resources.libsonnet index 697a7fd4..a1b36272 100644 --- a/cortex-mixin/dashboards/reads-resources.libsonnet +++ b/cortex-mixin/dashboards/reads-resources.libsonnet @@ -103,27 +103,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; std.member($._config.storage_engine, 'blocks'), $.row('') .addPanel( - $.panel('Disk Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskWritesPanel('Disk Writes', 'store-gateway'), ) .addPanel( - $.panel('Disk Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskReadsPanel('Disk Reads', 'store-gateway'), ) .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name="store-gateway"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskSpaceUtilization('Disk Space Utilization', 'store-gateway'), ) ) + { templating+: { diff --git a/cortex-mixin/dashboards/writes-resources.libsonnet b/cortex-mixin/dashboards/writes-resources.libsonnet index f833e406..85d7f4c4 100644 --- a/cortex-mixin/dashboards/writes-resources.libsonnet +++ b/cortex-mixin/dashboards/writes-resources.libsonnet @@ -56,27 +56,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('') .addPanel( - $.panel('Disk Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('ingester')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskWritesPanel('Disk Writes', 'ingester') ) .addPanel( - $.panel('Disk Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('ingester')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskReadsPanel('Disk Reads', 'ingester') ) .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name=~"ingester.*"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskSpaceUtilization('Disk Space Utilization', 'ingester'), ) ) + { From 87e9af11b62927987a657228e2255ce6a02d6a1d Mon Sep 17 00:00:00 2001 From: Darren Janeczek Date: Tue, 22 Jun 2021 10:18:06 -0400 Subject: [PATCH 2/3] revert: matching spacing format of main --- .../dashboards/dashboard-utils.libsonnet | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/cortex-mixin/dashboards/dashboard-utils.libsonnet b/cortex-mixin/dashboards/dashboard-utils.libsonnet index 0b11db1c..61fc6742 100644 --- a/cortex-mixin/dashboards/dashboard-utils.libsonnet +++ b/cortex-mixin/dashboards/dashboard-utils.libsonnet @@ -469,8 +469,33 @@ local utils = import 'mixin-utils/utils.libsonnet'; filterNodeDiskContainer(containerName):: ||| - ignoring(%s) group_right() (label_replace(count by(%s, %s, device) (container_fs_writes_bytes_total{%s,container="%s",device!~".*sda.*"}), "device", "$1", "device", "/dev/(.*)") * 0) - ||| % [$._config.per_instance_label, $._config.per_node_label, $._config.per_instance_label, $.namespaceMatcher(), containerName], + ignoring(%s) group_right() ( + label_replace( + count by( + %s, + %s, + device + ) + ( + container_fs_writes_bytes_total{ + %s, + container="%s", + device!~".*sda.*" + } + ), + "device", + "$1", + "device", + "/dev/(.*)" + ) * 0 + ) + ||| % [ + $._config.per_instance_label, + $._config.per_node_label, + $._config.per_instance_label, + $.namespaceMatcher(), + containerName, + ], panelDescription(title, description):: { description: ||| From 150e5f3ef935dc1daa4a1226352af07216e8f82e Mon Sep 17 00:00:00 2001 From: Darren Janeczek Date: Tue, 22 Jun 2021 10:21:55 -0400 Subject: [PATCH 3/3] lint: white noise --- cortex-mixin/dashboards/dashboard-utils.libsonnet | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cortex-mixin/dashboards/dashboard-utils.libsonnet b/cortex-mixin/dashboards/dashboard-utils.libsonnet index 61fc6742..f8e8cfbe 100644 --- a/cortex-mixin/dashboards/dashboard-utils.libsonnet +++ b/cortex-mixin/dashboards/dashboard-utils.libsonnet @@ -206,8 +206,8 @@ local utils = import 'mixin-utils/utils.libsonnet'; rate( node_disk_written_bytes_total[$__rate_interval] ) - ) - + + ) + + %s ||| % [ $._config.per_node_label, @@ -243,10 +243,10 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.queryPanel( ||| max by(persistentvolumeclaim) ( - kubelet_volume_stats_used_bytes{%(namespace)s} / + kubelet_volume_stats_used_bytes{%(namespace)s} / kubelet_volume_stats_capacity_bytes{%(namespace)s} - ) - and + ) + and count by(persistentvolumeclaim) ( kube_persistentvolumeclaim_labels{ %(namespace)s,