Skip to content

Commit

Permalink
Merge pull request grafana/cortex-jsonnet#278 from grafana/scaling-rules
Browse files Browse the repository at this point in the history
Add recording rules to calculate Cortex scaling
  • Loading branch information
tomwilkie authored Mar 19, 2021
2 parents c616398 + 0774870 commit fe87d90
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 88 deletions.
125 changes: 37 additions & 88 deletions jsonnet/mimir-mixin/dashboards/scaling.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -6,105 +6,54 @@ local utils = import 'mixin-utils/utils.libsonnet';
($.dashboard('Cortex / Scaling') + { uid: '88c041017b96856c9176e07cf557bdcf' })
.addClusterSelectorTemplates()
.addRow(
$.row('Workload-based scaling')
.addPanel(
$.panel('Workload-based scaling') + { sort: { col: 1, desc: false } } +
$.tablePanel([
|||
sum by (cluster, namespace, deployment) (
kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace", deployment=~"ingester|memcached"}
or
label_replace(
kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace", deployment=~"ingester|memcached"},
"deployment", "$1", "statefulset", "(.*)"
)
)
|||,
|||
quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(cortex_distributor_received_samples_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "ingester", "cluster", ".*"))[1h:])
* 3 / 80e3
|||,
|||
label_replace(
sum by(cluster, namespace) (
cortex_ingester_memory_series{cluster=~"$cluster", namespace=~"$namespace"}
) / 1e+6,
"deployment", "ingester", "cluster", ".*"
)
or
label_replace(
sum by (cluster, namespace) (
4 * cortex_ingester_memory_series{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"}
*
cortex_ingester_chunk_size_bytes_sum{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"}
/
cortex_ingester_chunk_size_bytes_count{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"}
)
/
avg by (cluster, namespace) (memcached_limit_bytes{cluster=~"$cluster", namespace=~"$namespace", job=~".+/memcached"}),
"deployment", "memcached", "namespace", ".*"
)
($.row('Cortex Service Scaling') + { height: '200px' })
.addPanel({
type: 'text',
title: '',
options: {
content: |||
This dashboards shows any services which are not scaled correctly.
The table below gives the required number of replicas and the reason why.
We only show services without enough replicas.
Reasons:
- **sample_rate**: There are not enough replicas to handle the
sample rate. Applies to distributor and ingesters.
- **active_series**: There are not enough replicas
to handle the number of active series. Applies to ingesters.
- **cpu_usage**: There are not enough replicas
based on the CPU usage of the jobs vs the resource requests.
Applies to all jobs.
- **memory_usage**: There are not enough replicas based on the memory
usage vs the resource requests. Applies to all jobs.
- **active_series_limits**: There are not enough replicas to hold 60% of the
sum of all the per tenant series limits.
- **sample_rate_limits**: There are not enough replicas to handle 60% of the
sum of all the per tenant rate limits.
|||,
], {
cluster: { alias: 'Cluster' },
namespace: { alias: 'Namespace' },
deployment: { alias: 'Deployment' },
'Value #A': { alias: 'Current Replicas', decimals: 0 },
'Value #B': { alias: 'Required Replicas, by ingestion rate', decimals: 0 },
'Value #C': { alias: 'Required Replicas, by active series', decimals: 0 },
})
)
mode: 'markdown',
},
})
)
.addRow(
($.row('Resource-based scaling') + { height: '500px' })
($.row('Scaling') + { height: '400px' })
.addPanel(
$.panel('Resource-based scaling') + { sort: { col: 1, desc: false } } +
$.panel('Workload-based scaling') + { sort: { col: 0, desc: false } } +
$.tablePanel([
|||
sum by (cluster, namespace, deployment) (
kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"}
or
label_replace(
kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"},
"deployment", "$1", "statefulset", "(.*)"
)
)
|||,
|||
sum by (cluster, namespace, deployment) (
kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"}
or
label_replace(
kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"},
"deployment", "$1", "statefulset", "(.*)"
)
)
*
quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(container_cpu_usage_seconds_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:])
/
sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_cpu_cores{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))
|||,
|||
sum by (cluster, namespace, deployment) (
kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"}
or
label_replace(
kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"},
"deployment", "$1", "statefulset", "(.*)"
)
sort_desc(
cluster_namespace_deployment_reason:required_replicas:count{cluster=~"$cluster", namespace=~"$namespace"}
> ignoring(reason) group_left
cluster_namespace_deployment:actual_replicas:count{cluster=~"$cluster", namespace=~"$namespace"}
)
*
quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(container_memory_usage_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:1m])
/
sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_memory_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))
|||,
], {
'__name__': { alias: 'Cluster', type: 'hidden' },
cluster: { alias: 'Cluster' },
namespace: { alias: 'Namespace' },
deployment: { alias: 'Deployment' },
'Value #A': { alias: 'Current Replicas', decimals: 0 },
'Value #B': { alias: 'Required Replicas, by CPU usage', decimals: 0 },
'Value #C': { alias: 'Required Replicas, by Memory usage', decimals: 0 },
deployment: { alias: 'Service' },
reason: { alias: 'Reason' },
'Value': { alias: 'Required Replicas', decimals: 0 },
})
)
),
Expand Down
201 changes: 201 additions & 0 deletions jsonnet/mimir-mixin/recording_rules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,207 @@ local utils = import 'mixin-utils/utils.libsonnet';
},
],
},
{
local _config = {
max_series_per_ingester: 1.5e6,
max_samples_per_sec_per_ingester: 80e3,
max_samples_per_sec_per_distributor: 240e3,
limit_utilisation_target: 0.6,
},
name: 'cortex_scaling_rules',
rules: [
{
// Convenience rule to get the number of replicas for both a deployment and a statefulset.
record: 'cluster_namespace_deployment:actual_replicas:count',
expr: |||
sum by (cluster, namespace, deployment) (kube_deployment_spec_replicas)
or
sum by (cluster, namespace, deployment) (
label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*)")
)
|||,
},
{
// Distributors should be able to deal with 240k samples/s.
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
deployment: 'distributor',
reason: 'sample_rate',
},
expr: |||
ceil(
quantile_over_time(0.99,
sum by (cluster, namespace) (
cluster_namespace_job:cortex_distributor_received_samples:rate5m
)[24h:]
)
/ %(max_samples_per_sec_per_distributor)s
)
||| % _config,
},
{
// We should be about to cover 80% of our limits,
// and ingester can have 80k samples/s.
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
deployment: 'distributor',
reason: 'sample_rate_limits',
},
expr: |||
ceil(
sum by (cluster, namespace) (cortex_overrides{limit_name="ingestion_rate"})
* %(limit_utilisation_target)s / %(max_samples_per_sec_per_distributor)s
)
||| % _config,
},
{
// We want ingesters each ingester to deal with 80k samples/s.
// NB we measure this at the distributors and multiple by RF (3).
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
deployment: 'ingester',
reason: 'sample_rate',
},
expr: |||
ceil(
quantile_over_time(0.99,
sum by (cluster, namespace) (
cluster_namespace_job:cortex_distributor_received_samples:rate5m
)[24h:]
)
* 3 / %(max_samples_per_sec_per_ingester)s
)
||| % _config,
},
{
// Ingester should have 1.5M series in memory
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
deployment: 'ingester',
reason: 'active_series',
},
expr: |||
ceil(
quantile_over_time(0.99,
sum by(cluster, namespace) (
cortex_ingester_memory_series
)[24h:]
)
/ %(max_series_per_ingester)s
)
||| % _config,
},
{
// We should be about to cover 60% of our limits,
// and ingester can have 1.5M series in memory
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
deployment: 'ingester',
reason: 'active_series_limits',
},
expr: |||
ceil(
sum by (cluster, namespace) (cortex_overrides{limit_name="max_global_series_per_user"})
* 3 * %(limit_utilisation_target)s / %(max_series_per_ingester)s
)
||| % _config,
},
{
// We should be about to cover 60% of our limits,
// and ingester can have 80k samples/s.
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
deployment: 'ingester',
reason: 'sample_rate_limits',
},
expr: |||
ceil(
sum by (cluster, namespace) (cortex_overrides{limit_name="ingestion_rate"})
* %(limit_utilisation_target)s / %(max_samples_per_sec_per_ingester)s
)
||| % _config,
},
{
// Ingesters store 96h of data on disk - we want memcached to store 1/4 of that.
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
deployment: 'memcached',
reason: 'active_series',
},
expr: |||
ceil(
(sum by (cluster, namespace) (
cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester"}
) / 4)
/
avg by (cluster, namespace) (
memcached_limit_bytes{job=~".+/memcached"}
)
)
|||,
},
{
// Jobs should be sized to their CPU usage.
// We do this by comparing 99th percentile usage over the last 24hrs to
// their current provisioned #replicas and resource requests.
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
reason: 'cpu_usage',
},
expr: |||
ceil(
cluster_namespace_deployment:actual_replicas:count
*
quantile_over_time(0.99,
sum by (cluster, namespace, deployment) (
label_replace(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
)
)[24h:5m]
)
/
sum by (cluster, namespace, deployment) (
label_replace(
kube_pod_container_resource_requests_cpu_cores,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
)
)
)
|||,
},
{
// Jobs should be sized to their Memory usage.
// We do this by comparing 99th percentile usage over the last 24hrs to
// their current provisioned #replicas and resource requests.
record: 'cluster_namespace_deployment_reason:required_replicas:count',
labels: {
reason: 'memory_usage',
},
expr: |||
ceil(
cluster_namespace_deployment:actual_replicas:count
*
quantile_over_time(0.99,
sum by (cluster, namespace, deployment) (
label_replace(
container_memory_usage_bytes,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
)
)[24h:5m]
)
/
sum by (cluster, namespace, deployment) (
label_replace(
kube_pod_container_resource_requests_memory_bytes,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
)
)
)
|||,
},
],
},
],
},
}

0 comments on commit fe87d90

Please sign in to comment.