diff --git a/cortex-mixin/alerts/alerts.libsonnet b/cortex-mixin/alerts/alerts.libsonnet index afb312ca..a0e286cc 100644 --- a/cortex-mixin/alerts/alerts.libsonnet +++ b/cortex-mixin/alerts/alerts.libsonnet @@ -436,19 +436,19 @@ }, { alert: 'CortexProvisioningTooManyActiveSeries', - // 1.5 million active series per ingester max. + // We target each ingester to 1.5M in-memory series. This alert fires if the average + // number of series / ingester in a Cortex cluster is > 1.6M for 2h (we compact + // the TSDB head every 2h). expr: ||| avg by (%s) (cortex_ingester_memory_series) > 1.6e6 - and - sum by (%s) (rate(cortex_ingester_received_chunks[1h])) == 0 - ||| % [$._config.alert_aggregation_labels, $._config.alert_aggregation_labels], - 'for': '1h', + ||| % [$._config.alert_aggregation_labels], + 'for': '2h', labels: { severity: 'warning', }, annotations: { message: ||| - Too many active series for ingesters, add more ingesters. + The number of in-memory series per ingester in {{ $labels.namespace }} is too high. |||, }, }, diff --git a/cortex-mixin/docs/playbooks.md b/cortex-mixin/docs/playbooks.md index eea6d266..093ff99d 100644 --- a/cortex-mixin/docs/playbooks.md +++ b/cortex-mixin/docs/playbooks.md @@ -498,7 +498,16 @@ _This alert applies to Cortex chunks storage only._ ### CortexProvisioningTooManyActiveSeries -_TODO: this playbook has not been written yet._ +This alert fires if the average number of in-memory series per ingester is above our target (1.5M). + +How to **fix**: +- Scale up ingesters + - To find out the Cortex clusters where ingesters should be scaled up and how many minimum replicas are expected: + ``` + ceil(sum by(cluster, namespace) (cortex_ingester_memory_series) / 1.5e6) > + count by(cluster, namespace) (cortex_ingester_memory_series) + ``` +- After the scale up, the in-memory series are expected to be reduced at the next TSDB head compaction (occurring every 2h) ### CortexProvisioningTooManyWrites