diff --git a/CHANGELOG.md b/CHANGELOG.md index 3eebd056..47a64a96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ * [CHANGE] Increased `CortexBadRuntimeConfig` alert severity to `critical` and removed support for `cortex_overrides_last_reload_successful` metric (was removed in Cortex 1.3.0). #335 * [ENHANCEMENT] cortex-mixin: Make `cluster_namespace_deployment:kube_pod_container_resource_requests_{cpu_cores,memory_bytes}:sum` backwards compatible with `kube-state-metrics` v2.0.0. #317 * [ENHANCEMENT] Added documentation text panels and descriptions to reads and writes dashboards. #324 +* [ENHANCEMENT] Dashboards: defined container functions for common resources panels: containerDiskWritesPanel, containerDiskReadsPanel, containerDiskSpaceUtilization. #331 * [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308 * [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329 * [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335 diff --git a/cortex-mixin/dashboards/alertmanager-resources.libsonnet b/cortex-mixin/dashboards/alertmanager-resources.libsonnet index 4c67c161..8a719d52 100644 --- a/cortex-mixin/dashboards/alertmanager-resources.libsonnet +++ b/cortex-mixin/dashboards/alertmanager-resources.libsonnet @@ -52,30 +52,16 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Disk') .addPanel( - $.panel('Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskWritesPanel('Writes', 'alertmanager'), ) .addPanel( - $.panel('Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('alertmanager')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskReadsPanel('Reads', 'alertmanager'), ) ) .addRow( $.row('') .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name="alertmanager"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskSpaceUtilization('Disk Space Utilization', 'alertmanager'), ) ), } diff --git a/cortex-mixin/dashboards/compactor-resources.libsonnet b/cortex-mixin/dashboards/compactor-resources.libsonnet index 79a7ac03..82a6bce4 100644 --- a/cortex-mixin/dashboards/compactor-resources.libsonnet +++ b/cortex-mixin/dashboards/compactor-resources.libsonnet @@ -28,27 +28,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Disk') .addPanel( - $.panel('Disk Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('compactor')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, - ) - .addPanel( - $.panel('Disk Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('compactor')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, - ) - .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name="compactor"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskWritesPanel('Disk Writes', 'compactor'), + ) + .addPanel( + $.containerDiskReadsPanel('Disk Reads', 'compactor'), + ) + .addPanel( + $.containerDiskSpaceUtilization('Disk Space Utilization', 'compactor'), ) ) + { templating+: { diff --git a/cortex-mixin/dashboards/dashboard-utils.libsonnet b/cortex-mixin/dashboards/dashboard-utils.libsonnet index c54ae659..f8e8cfbe 100644 --- a/cortex-mixin/dashboards/dashboard-utils.libsonnet +++ b/cortex-mixin/dashboards/dashboard-utils.libsonnet @@ -198,6 +198,73 @@ local utils = import 'mixin-utils/utils.libsonnet'; containerNetworkTransmitBytesPanel(instanceName):: $.containerNetworkPanel('Transmit Bandwidth', 'container_network_transmit_bytes_total', instanceName), + containerDiskWritesPanel(title, containerName):: + $.panel(title) + + $.queryPanel( + ||| + sum by(%s, %s, device) ( + rate( + node_disk_written_bytes_total[$__rate_interval] + ) + ) + + + %s + ||| % [ + $._config.per_node_label, + $._config.per_instance_label, + $.filterNodeDiskContainer(containerName), + ], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.stack + + { yaxes: $.yaxes('Bps') }, + + containerDiskReadsPanel(title, containerName):: + $.panel(title) + + $.queryPanel( + ||| + sum by(%s, %s, device) ( + rate( + node_disk_read_bytes_total[$__rate_interval] + ) + ) + %s + ||| % [ + $._config.per_node_label, + $._config.per_instance_label, + $.filterNodeDiskContainer(containerName), + ], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.stack + + { yaxes: $.yaxes('Bps') }, + + containerDiskSpaceUtilization(title, containerName):: + $.panel(title) + + $.queryPanel( + ||| + max by(persistentvolumeclaim) ( + kubelet_volume_stats_used_bytes{%(namespace)s} / + kubelet_volume_stats_capacity_bytes{%(namespace)s} + ) + and + count by(persistentvolumeclaim) ( + kube_persistentvolumeclaim_labels{ + %(namespace)s, + %(label)s + } + ) + ||| % { + namespace: $.namespaceMatcher(), + label: $.containerLabelMatcher(containerName), + }, '{{persistentvolumeclaim}}' + ) + + { yaxes: $.yaxes('percentunit') }, + + containerLabelMatcher(containerName):: + if containerName == 'ingester' + then 'label_name=~"ingester.*"' + else 'label_name="%s"' % containerName, + goHeapInUsePanel(title, jobName):: $.panel(title) + $.queryPanel( diff --git a/cortex-mixin/dashboards/reads-resources.libsonnet b/cortex-mixin/dashboards/reads-resources.libsonnet index 697a7fd4..a1b36272 100644 --- a/cortex-mixin/dashboards/reads-resources.libsonnet +++ b/cortex-mixin/dashboards/reads-resources.libsonnet @@ -103,27 +103,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; std.member($._config.storage_engine, 'blocks'), $.row('') .addPanel( - $.panel('Disk Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskWritesPanel('Disk Writes', 'store-gateway'), ) .addPanel( - $.panel('Disk Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('store-gateway')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskReadsPanel('Disk Reads', 'store-gateway'), ) .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name="store-gateway"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskSpaceUtilization('Disk Space Utilization', 'store-gateway'), ) ) + { templating+: { diff --git a/cortex-mixin/dashboards/writes-resources.libsonnet b/cortex-mixin/dashboards/writes-resources.libsonnet index f833e406..85d7f4c4 100644 --- a/cortex-mixin/dashboards/writes-resources.libsonnet +++ b/cortex-mixin/dashboards/writes-resources.libsonnet @@ -56,27 +56,13 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('') .addPanel( - $.panel('Disk Writes') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('ingester')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskWritesPanel('Disk Writes', 'ingester') ) .addPanel( - $.panel('Disk Reads') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('ingester')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.stack + - { yaxes: $.yaxes('Bps') }, + $.containerDiskReadsPanel('Disk Reads', 'ingester') ) .addPanel( - $.panel('Disk Space Utilization') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,label_name=~"ingester.*"})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher()], '{{persistentvolumeclaim}}') + - { yaxes: $.yaxes('percentunit') }, + $.containerDiskSpaceUtilization('Disk Space Utilization', 'ingester'), ) ) + {