Skip to content

Commit

Permalink
changing rate function to irate for recording rule
Browse files Browse the repository at this point in the history
  • Loading branch information
Prashant Balachandran committed Jun 9, 2021
1 parent 1719d2b commit e996e00
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 33 deletions.
2 changes: 1 addition & 1 deletion dashboards/controller-manager.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -193,4 +193,4 @@ local singlestat = grafana.singlestat;
.addPanel(goroutines)
) + { refresh: $._config.grafanaK8s.refresh },
},
}
}
10 changes: 5 additions & 5 deletions dashboards/resources/cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ local template = grafana.template;
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') +
g.stack
)
)
Expand All @@ -177,11 +177,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel(podWorkloadColumns + [
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", resource="cpu"}) by (namespace)' % $._config,
], tableStyles {
'Value #C': { alias: 'CPU Usage' },
'Value #D': { alias: 'CPU Requests' },
Expand Down Expand Up @@ -320,7 +320,7 @@ local template = grafana.template;
sort: {
col: 4,
desc: true,
}
},
},
)
) + {
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/multi-cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ local template = grafana.template;
g.row('CPU')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config, '{{%(clusterLabel)s}}' % $._config)
+ { fill: 0, linewidth: 2 },
)
)
Expand All @@ -60,11 +60,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{resource="cpu"}) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down
14 changes: 7 additions & 7 deletions dashboards/resources/namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ local template = grafana.template;
},
};

local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config;
local cpuUsageQuery = 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config;

local memoryUsageQuery = 'sum(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""}) by (pod)' % $._config;

Expand Down Expand Up @@ -141,11 +141,11 @@ local template = grafana.template;
})
.addPanel(
g.panel('CPU Utilisation (from requests)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
)
.addPanel(
g.panel('CPU Utilisation (from limits)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
)
.addPanel(
g.panel('Memory Utilisation (from requests)') +
Expand Down Expand Up @@ -198,11 +198,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) by (pod)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down Expand Up @@ -361,7 +361,7 @@ local template = grafana.template;
sort: {
col: 4,
desc: true,
}
},
},
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate] }, refresh: $._config.grafanaK8s.refresh },
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/node.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ local template = grafana.template;
g.row('CPU Usage')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, '{{pod}}') +
g.queryPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, '{{pod}}') +
g.stack,
)
)
Expand All @@ -53,11 +53,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"}) by (pod)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down
10 changes: 5 additions & 5 deletions dashboards/resources/pod.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ local template = grafana.template;
g.panel('CPU Usage') +
g.queryPanel(
[
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="$namespace", pod="$pod", %(clusterLabel)s="$cluster"}) by (container)' % $._config,
cpuRequestsQuery,
cpuLimitsQuery,
], [
Expand Down Expand Up @@ -172,11 +172,11 @@ local template = grafana.template;
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config,
'sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}) by (container)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand Down Expand Up @@ -339,7 +339,7 @@ local template = grafana.template;
sort: {
col: 4,
desc: true,
}
},
},
)
) + { tags: $._config.grafanaK8s.dashboardTags, templating+: { list+: [clusterTemplate, namespaceTemplate, podTemplate] }, refresh: $._config.grafanaK8s.refresh },
Expand Down
4 changes: 2 additions & 2 deletions dashboards/resources/workload-namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ local template = grafana.template;
includeAll=false,
sort=1
),

local typeTemplate =
template.new(
name='type',
Expand Down Expand Up @@ -128,7 +128,7 @@ local template = grafana.template;

local cpuUsageQuery = |||
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/workload.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ local template = grafana.template;

local cpuUsageQuery = |||
sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
) by (pod)
Expand Down
4 changes: 2 additions & 2 deletions rules/apps.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
// Reduces cardinality of this timeseries by #cores, which makes it
// more useable in dashboards. Also, allows us to do things like
// quantile_over_time(...) which would otherwise not be possible.
record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate',
record: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate',

This comment has been minimized.

Copy link
@rmak-cpi

rmak-cpi Sep 13, 2021

Just to make things easy for anyone following the bread-crumbs here, the change from node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate to node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate is actually a breaking change for those who don't upgrade their grafana dashboards and prometheus rules at the same time. This also broke an alerting rule that I have that depends on node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate.

expr: |||
sum by (%(clusterLabel)s, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!=""}[5m])
irate(container_cpu_usage_seconds_total{%(cadvisorSelector)s, image!=""}[5m])
) * on (%(clusterLabel)s, namespace, pod) group_left(node) topk by (%(clusterLabel)s, namespace, pod) (
1, max by(%(clusterLabel)s, namespace, pod, node) (kube_pod_info{node!=""})
)
Expand Down
4 changes: 2 additions & 2 deletions tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -335,10 +335,10 @@ tests:
values: '1+0x5'
promql_expr_test:
- eval_time: 5m
expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
exp_samples:
- value: 5.0e-2
labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}'
labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}'

- interval: 1m
input_series:
Expand Down

0 comments on commit e996e00

Please sign in to comment.