From 3ff1d4cfcbfa28de1b83c33d42d74749e4c9c97b Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Sat, 16 Nov 2019 13:25:58 +0000 Subject: [PATCH] Initial commit just move everything over Signed-off-by: Goutham Veeramachaneni --- README.md | 55 ++ cortex-mixin/alerts.jsonnet | 1 + cortex-mixin/alerts.libsonnet | 520 +++++++++++++++++++ cortex-mixin/dashboards.jsonnet | 6 + cortex-mixin/dashboards.libsonnet | 649 ++++++++++++++++++++++++ cortex-mixin/jsonnetfile.json | 24 + cortex-mixin/jsonnetfile.lock.json | 26 + cortex-mixin/mixin.libsonnet | 3 + cortex-mixin/recording_rules.jsonnet | 1 + cortex-mixin/recording_rules.libsonnet | 114 +++++ cortex/alertmanager.libsonnet | 32 ++ cortex/common.libsonnet | 14 + cortex/config.libsonnet | 251 +++++++++ cortex/consul.libsonnet | 59 +++ cortex/cortex-manifests.jsonnet.example | 26 + cortex/cortex.libsonnet | 18 + cortex/distributor.libsonnet | 52 ++ cortex/etcd.libsonnet | 9 + cortex/images.libsonnet | 21 + cortex/ingester.libsonnet | 69 +++ cortex/jsonnetfile.json | 44 ++ cortex/jsonnetfile.lock.json | 48 ++ cortex/memcached.libsonnet | 63 +++ cortex/postgresql.libsonnet | 29 ++ cortex/querier.libsonnet | 52 ++ cortex/query-frontend.libsonnet | 60 +++ cortex/ruler.libsonnet | 53 ++ cortex/table-manager.libsonnet | 53 ++ cortex/test-exporter.libsonnet | 40 ++ 29 files changed, 2392 insertions(+) create mode 100644 README.md create mode 100644 cortex-mixin/alerts.jsonnet create mode 100644 cortex-mixin/alerts.libsonnet create mode 100644 cortex-mixin/dashboards.jsonnet create mode 100644 cortex-mixin/dashboards.libsonnet create mode 100644 cortex-mixin/jsonnetfile.json create mode 100644 cortex-mixin/jsonnetfile.lock.json create mode 100644 cortex-mixin/mixin.libsonnet create mode 100644 cortex-mixin/recording_rules.jsonnet create mode 100644 cortex-mixin/recording_rules.libsonnet create mode 100644 cortex/alertmanager.libsonnet create mode 100644 cortex/common.libsonnet create mode 100644 cortex/config.libsonnet create mode 100644 cortex/consul.libsonnet create mode 100644 cortex/cortex-manifests.jsonnet.example create mode 100644 cortex/cortex.libsonnet create mode 100644 cortex/distributor.libsonnet create mode 100644 cortex/etcd.libsonnet create mode 100644 cortex/images.libsonnet create mode 100644 cortex/ingester.libsonnet create mode 100644 cortex/jsonnetfile.json create mode 100644 cortex/jsonnetfile.lock.json create mode 100644 cortex/memcached.libsonnet create mode 100644 cortex/postgresql.libsonnet create mode 100644 cortex/querier.libsonnet create mode 100644 cortex/query-frontend.libsonnet create mode 100644 cortex/ruler.libsonnet create mode 100644 cortex/table-manager.libsonnet create mode 100644 cortex/test-exporter.libsonnet diff --git a/README.md b/README.md new file mode 100644 index 00000000..388d5ff6 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# Jsonnet for Cortex + +This repo has the jsonnet for deploying cortex and the related monitoring in Kubernetes. + +To generate the YAMLs for deploying Cortex: + +1. Make sure you have tanka and jb installed: + +``` +$ go get -u github.com/grafana/tanka/cmd/tk +$ go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb +``` + +2. Initialise the application and download the cortex jsonnet lib. + +``` +$ tk init +``` + +3. Install the cortex jsonnet. + +``` +$ jb install github.com/ksonnet/ksonnet-lib/ksonnet.beta.3 +$ cp vendor/ksonnet.beta.3/*.libsonnet lib +$ jb install https://github.com/grafana/cortex-jsonnet/cortex +``` + +3. Use the example monitoring.jsonnet.example: + +``` +$ mv vendor/cortex/cortex-manifests.jsonnet.example environments/default/main.jsonnet +``` + +4. Check what is in the example: + +``` +$ cat environments/default/main.jsonnet +.... +``` + +5. Generate the YAML manifests: + +``` +$ tk show environments/default +``` + +To generate the dashboards and alerts for Cortex: + +``` +$ cd cortex-mixin +$ jb install +$ jsonnet -S alerts.jsonnet +$ jsonnet -J vendor -S dashboards.jsonnet +$ jsonnet -J vendor -S recording_rules.jsonnet +``` diff --git a/cortex-mixin/alerts.jsonnet b/cortex-mixin/alerts.jsonnet new file mode 100644 index 00000000..e54b1704 --- /dev/null +++ b/cortex-mixin/alerts.jsonnet @@ -0,0 +1 @@ +std.manifestYamlDoc((import 'mixin.libsonnet').prometheus_alerts) diff --git a/cortex-mixin/alerts.libsonnet b/cortex-mixin/alerts.libsonnet new file mode 100644 index 00000000..a5a547b3 --- /dev/null +++ b/cortex-mixin/alerts.libsonnet @@ -0,0 +1,520 @@ +// According to https://developers.soundcloud.com/blog/alerting-on-slos : +local windows = [ + { long_period: '1h', short_period: '5m', for_period: '2m', factor: 14.4, severity: 'critical' }, + { long_period: '6h', short_period: '30m', for_period: '15m', factor: 6, severity: 'critical' }, + { long_period: '1d', short_period: '2h', for_period: '1h', factor: 3, severity: 'warning' }, + { long_period: '3d', short_period: '6h', for_period: '3h', factor: 1, severity: 'warning' }, +]; + +{ + _config+:: { + cortex_p99_latency_threshold_seconds: 2.5, + }, + + prometheus_alerts+:: { + groups+: [ + { + name: 'cortex_alerts', + rules: [ + { + alert: 'CortexIngesterUnhealthy', + 'for': '15m', + expr: ||| + min(cortex_ring_members{state="Unhealthy", job=~"[a-z]+/distributor"}) by (namespace, job) > 0 + |||, + labels: { + severity: 'critical', + }, + annotations: { + message: '{{ $labels.job }} reports more than one unhealthy ingester.', + }, + }, + { + alert: 'CortexFlushStuck', + expr: ||| + (cortex_ingester_memory_chunks / cortex_ingester_memory_series) > 1.3 + |||, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + message: '{{ $labels.job }}/{{ $labels.instance }} is stuck flushing chunks.', + }, + }, + { + alert: 'CortexRequestErrors', + expr: ||| + 100 * sum(rate(cortex_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) + / + sum(rate(cortex_request_duration_seconds_count[1m])) by (namespace, job, route) + > 1 + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. + |||, + }, + }, + { + alert: 'CortexRequestLatency', + expr: ||| + cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process"} + > + %(cortex_p99_latency_threshold_seconds)s + ||| % $._config, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + |||, + }, + }, + { + // We're syncing every 10mins, and this means with a 5min rate, we will have a NaN when syncs fail + // and we will never trigger the alert. + // We also have a 3h grace-period for creation of tables which means the we can fail for 3h before it's an outage. + alert: 'CortexTableSyncFailure', + expr: ||| + 100 * rate(cortex_dynamo_sync_tables_seconds_count{status_code!~"2.."}[15m]) + / + rate(cortex_dynamo_sync_tables_seconds_count[15m]) + > 10 + |||, + 'for': '30m', + labels: { + severity: 'critical', + }, + annotations: { + message: ||| + {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% errors syncing tables. + |||, + }, + }, + { + alert: 'CortexQueriesIncorrect', + expr: ||| + 100 * sum by (job, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m])) + / + sum by (job, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1 + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.job }} is reporting incorrect results for {{ printf "%.2f" $value }}% of queries. + |||, + }, + }, + { + alert: 'CortexBadOverrides', + expr: ||| + cortex_overrides_last_reload_successful{job!~".+/table-manager"} == 0 + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.job }} failed to reload overrides. + |||, + }, + }, + { + alert: 'CortexQuerierCapacityFull', + expr: ||| + prometheus_engine_queries_concurrent_max{job=~".+/querier"} - prometheus_engine_queries{job=~".+/querier"} == 0 + |||, + 'for': '5m', // We don't want to block for longer. + labels: { + severity: 'critical', + }, + annotations: { + message: ||| + {{ $labels.job }} is at capacity processing queries. + |||, + }, + }, + { + alert: 'CortexFrontendQueriesStuck', + expr: ||| + sum by (namespace) (cortex_query_frontend_queue_length{job=~".+/query-frontend"}) > 1 + |||, + 'for': '5m', // We don't want to block for longer. + labels: { + severity: 'critical', + }, + annotations: { + message: ||| + {{ $labels.job }} has {{ $value }} queued up queries. + |||, + }, + }, + { + alert: 'CortexCacheRequestErrors', + expr: ||| + 100 * sum(rate(cortex_cache_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, method) + / + sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (namespace, job, method) + > 1 + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.job }} cache {{ $labels.method }} is experiencing {{ printf "%.2f" $value }}% errors. + |||, + }, + }, + { + alert: 'CortexIngesterRestarts', + expr: ||| + rate(kube_pod_container_status_restarts_total{container="ingester"}[30m]) > 0 + |||, + labels: { + severity: 'critical', + }, + annotations: { + message: '{{ $labels.namespace }}/{{ $labels.pod }} is restarting', + }, + }, + { + alert: 'CortexTransferFailed', + expr: ||| + max_over_time(cortex_shutdown_duration_seconds_count{op="transfer",status!="success"}[15m]) + |||, + 'for': '5m', + labels: { + severity: 'critical', + }, + annotations: { + message: '{{ $labels.namespace }}/{{ $labels.pod }} transfer failed.', + }, + }, + { + alert: 'CortexOldChunkInMemory', + // We flush chunks after 6h and then keep them in memory for extra 15m. If chunks are older + // than 7h (= 25200 seconds), raise an alert. Ignore cortex_oldest_unflushed_chunk_timestamp_seconds + // that are zero (eg. distributors). + expr: ||| + (time() - cortex_oldest_unflushed_chunk_timestamp_seconds > 25200) and cortex_oldest_unflushed_chunk_timestamp_seconds > 0 + |||, + 'for': '5m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.namespace }}/{{ $labels.pod }} has very old unflushed chunk in memory. + |||, + }, + }, + ], + }, + { + name: 'cortex_slo_alerts', + rules: [ + { + alert: 'CortexWriteErrorBudgetBurn', + expr: ||| + ( + ( + 100 * namespace_job:cortex_gateway_write_slo_errors_per_request:ratio_rate%(long_period)s + > 0.1 * %(factor)f + ) + and + ( + 100 * namespace_job:cortex_gateway_write_slo_errors_per_request:ratio_rate%(short_period)s + > 0.1 * %(factor)f + ) + ) + ||| % window, + 'for': window.for_period, + labels: { + severity: window.severity, + period: window.long_period, // The annotation alone doesn't make this alert unique. + }, + annotations: { + summary: 'Cortex burns its write error budget too fast.', + description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s write requests in the last %(long_period)s are failing or too slow to meet the SLO." % window, + }, + } + for window in windows + ] + [ + { + alert: 'CortexReadErrorBudgetBurn', + expr: ||| + ( + ( + 100 * namespace_job:cortex_gateway_read_slo_errors_per_request:ratio_rate%(long_period)s + > 0.5 * %(factor)f + ) + and + ( + 100 * namespace_job:cortex_gateway_read_slo_errors_per_request:ratio_rate%(short_period)s + > 0.5 * %(factor)f + ) + ) + ||| % window, + 'for': window.for_period, + labels: { + severity: window.severity, + period: window.long_period, // The annotation alone doesn't make this alert unique. + }, + annotations: { + summary: 'Cortex burns its read error budget too fast.', + description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s read requests in the last %(long_period)s are failing or too slow to meet the SLO." % window, + }, + } + for window in windows + ] + [ + { + alert: 'LegacyCortexWriteErrorBudgetBurn', + expr: ||| + ( + ( + 100 * namespace_job:cortex_gw_write_slo_errors_per_request:ratio_rate%(long_period)s + > 0.1 * %(factor)f + ) + and + ( + 100 * namespace_job:cortex_gw_write_slo_errors_per_request:ratio_rate%(short_period)s + > 0.1 * %(factor)f + ) + ) + ||| % window, + 'for': window.for_period, + labels: { + severity: window.severity, + period: window.long_period, // The annotation alone doesn't make this alert unique. + }, + annotations: { + summary: 'Cortex burns its write error budget too fast.', + description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s write requests in the last %(long_period)s are failing or too slow to meet the SLO." % window, + }, + } + for window in windows + ] + [ + { + alert: 'LegacyCortexReadErrorBudgetBurn', + expr: ||| + ( + ( + 100 * namespace_job:cortex_gw_read_slo_errors_per_request:ratio_rate%(long_period)s + > 0.5 * %(factor)f + ) + and + ( + 100 * namespace_job:cortex_gw_read_slo_errors_per_request:ratio_rate%(short_period)s + > 0.5 * %(factor)f + ) + ) + ||| % window, + 'for': window.for_period, + labels: { + severity: window.severity, + period: window.long_period, // The annotation alone doesn't make this alert unique. + }, + annotations: { + summary: 'Cortex burns its read error budget too fast.', + description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s read requests in the last %(long_period)s are failing or too slow to meet the SLO." % window, + }, + } + for window in windows + ], + }, + { + name: 'cortex_gw_alerts', + rules: [ + { + alert: 'CortexGWRequestErrors', + expr: ||| + 100 * sum(rate(cortex_gw_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) + / + sum(rate(cortex_gw_request_duration_seconds_count[1m])) by (namespace, job, route) + > 0.1 + |||, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + message: ||| + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. + |||, + }, + }, + { + alert: 'CortexGWRequestLatency', + expr: ||| + namespace_job_route:cortex_gw_request_duration_seconds:99quantile{route!="metrics"} + > + %(cortex_p99_latency_threshold_seconds)s + ||| % $._config, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + message: ||| + {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + |||, + }, + }, + ], + }, + + { + name: 'cortex-provisioning', + rules: [ + { + alert: 'CortexProvisioningMemcachedTooSmall', + // 4 x in-memory series size = 24hrs of data. + expr: ||| + ( + 4 * + sum by(cluster, namespace) (cortex_ingester_memory_series{job=~".+/ingester"} * cortex_ingester_chunk_size_bytes_sum{job=~".+/ingester"} / cortex_ingester_chunk_size_bytes_count{job=~".+/ingester"}) + / 1e9 + ) + > + ( + sum by (cluster, namespace) (memcached_limit_bytes{job=~".+/memcached"}) / 1e9 + ) + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + Chunk memcached cluster for namespace {{ $labels.namespace }} are too small, should be at least {{ printf "%.2f" $value }}GB. + |||, + }, + }, + { + alert: 'CortexProvisioningTooManyActiveSeries', + // 1 million active series per ingester max. + expr: ||| + avg by (cluster, namespace) (cortex_ingester_memory_series{job=~".+/ingester"}) > 1.1e6 + and + sum by (cluster, namespace) (rate(cortex_ingester_received_chunks{job=~".+/ingester"}[1h])) == 0 + |||, + 'for': '1h', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + Too many active series for ingesters in namespace {{ $labels.namespace }}, add more ingesters. + |||, + }, + }, + { + alert: 'CortexProvisioningTooManyWrites', + // 80k writes / s per ingester max. + expr: ||| + avg by (cluster,namespace) (rate(cortex_ingester_ingested_samples_total[1m])) > 80e3 + |||, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + Too much write QPS for ingesters in namespace {{ $labels.namespace }}, add more ingesters. + |||, + }, + }, + { + alert: 'CortexProvisioningTooMuchMemory', + expr: ||| + avg by (cluster, namespace) (container_memory_working_set_bytes{container_name="ingester"} / container_spec_memory_limit_bytes{container_name="ingester"}) > 0.7 + |||, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + message: ||| + Too much memory being used by ingesters in namespace {{ $labels.namespace }}, add more ingesters. + |||, + }, + }, + ], + }, + { + name: 'memcached', + rules: [ + { + alert: 'MemcachedDown', + expr: ||| + memcached_up == 0 + |||, + 'for': '15m', + labels: { + severity: 'critical', + }, + annotations: { + message: ||| + Memcached Instance {{ $labels.instance }} is down for more than 15mins. + |||, + }, + }, + ], + }, + { + name: 'ruler_alerts', + rules: [ + { + alert: 'CortexRulerFailedEvaluations', + expr: ||| + sum(rate(cortex_prometheus_rule_evaluation_failures_total[1m])) by (namespace, job) + / + sum(rate(cortex_prometheus_rule_evaluation_total[1m])) by (namespace, job) + > 0.01 + |||, + 'for': '5m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% errors. + |||, + }, + }, + { + alert: 'CortexRulerMissedEvaluations', + expr: ||| + sum(rate(cortex_prometheus_rule_group_missed_iterations_total[1m])) by (namespace, job) + / + sum(rate(cortex_prometheus_rule_group_iterations_total[1m])) by (namespace, job) + > 0.01 + |||, + 'for': '5m', + labels: { + severity: 'warning', + }, + annotations: { + message: ||| + {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% missed iterations. + |||, + }, + }, + ], + }, + ], + }, +} diff --git a/cortex-mixin/dashboards.jsonnet b/cortex-mixin/dashboards.jsonnet new file mode 100644 index 00000000..fb102817 --- /dev/null +++ b/cortex-mixin/dashboards.jsonnet @@ -0,0 +1,6 @@ +local dashboards = (import 'mixin.libsonnet').dashboards; + +{ + [name]: dashboards[name] + for name in std.objectFields(dashboards) +} diff --git a/cortex-mixin/dashboards.libsonnet b/cortex-mixin/dashboards.libsonnet new file mode 100644 index 00000000..44003655 --- /dev/null +++ b/cortex-mixin/dashboards.libsonnet @@ -0,0 +1,649 @@ +local utils = (import 'mixin-utils/utils.libsonnet'); + +local g = (import 'grafana-builder/grafana.libsonnet') + { + qpsPanel(selector):: + super.qpsPanel(selector) + { + targets: [ + target { + interval: '1m', + } + for target in super.targets + ], + }, + + latencyPanel(metricName, selector, multiplier='1e3'):: + super.latencyPanel(metricName, selector, multiplier) + { + targets: [ + target { + interval: '1m', + } + for target in super.targets + ], + }, +}; + +{ + _config+:: { + storage_backend: error 'must specify storage backend (cassandra, gcp)', + gcs_enabled: false, + }, + + dashboards+: { + 'cortex-writes.json': + if $._config.gcs_enabled then + $.cortex_writes_dashboard.addRow( + g.row('GCS') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_gcs_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="POST"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_gcs_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'POST')]) + ) + ) + else $.cortex_writes_dashboard, + + 'cortex-reads.json': + if $._config.gcs_enabled then + $.cortex_reads_dashboard.addRow( + g.row('GCS') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_gcs_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="GET"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_gcs_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'GET')]) + ) + ) + else $.cortex_reads_dashboard, + + 'cortex-chunks.json': + g.dashboard('Cortex / Chunks') + .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster') + .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace') + .addRow( + g.row('Active Series / Chunks') + .addPanel( + g.panel('Series') + + g.queryPanel('sum(cortex_ingester_memory_series{cluster=~"$cluster", job=~"($namespace)/ingester"})', 'series'), + ) + .addPanel( + g.panel('Chunks per series') + + g.queryPanel('sum(cortex_ingester_memory_chunks{cluster=~"$cluster", job=~"($namespace)/ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster", job=~"($namespace)/ingester"})', 'chunks'), + ) + ) + .addRow( + g.row('Flush Stats') + .addPanel( + g.panel('Utilization') + + g.latencyPanel('cortex_ingester_chunk_utilization', '{cluster=~"$cluster", job=~"($namespace)/ingester"}', multiplier='1') + + { yaxes: g.yaxes('percentunit') }, + ) + .addPanel( + g.panel('Age') + + g.latencyPanel('cortex_ingester_chunk_age_seconds', '{cluster=~"$cluster", job=~"($namespace)/ingester"}'), + ), + ) + .addRow( + g.row('Flush Stats') + .addPanel( + g.panel('Size') + + g.latencyPanel('cortex_ingester_chunk_length', '{cluster=~"$cluster", job=~"($namespace)/ingester"}', multiplier='1') + + { yaxes: g.yaxes('short') }, + ) + .addPanel( + g.panel('Entries') + + g.queryPanel('sum(rate(cortex_chunk_store_index_entries_per_chunk_sum{cluster=~"$cluster", job=~"($namespace)/ingester"}[5m])) / sum(rate(cortex_chunk_store_index_entries_per_chunk_count{cluster=~"$cluster", job=~"($namespace)/ingester"}[5m]))', 'entries'), + ), + ) + .addRow( + g.row('Flush Stats') + .addPanel( + g.panel('Queue Length') + + g.queryPanel('cortex_ingester_flush_queue_length{cluster=~"$cluster", job=~"($namespace)/ingester"}', '{{instance}}'), + ) + .addPanel( + g.panel('Flush Rate') + + g.qpsPanel('cortex_ingester_chunk_age_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester"}'), + ), + ), + + 'cortex-queries.json': + g.dashboard('Cortex / Queries') + .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster') + .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace') + .addRow( + g.row('Query Frontend') + .addPanel( + g.panel('Queue Duration') + + g.latencyPanel('cortex_query_frontend_queue_duration_seconds', '{cluster=~"$cluster", job=~"($namespace)/query-frontend"}'), + ) + .addPanel( + g.panel('Retries') + + g.latencyPanel('cortex_query_frontend_retries', '{cluster=~"$cluster", job=~"($namespace)/query-frontend"}', multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + .addPanel( + g.panel('Queue Length') + + g.queryPanel('cortex_query_frontend_queue_length{cluster=~"$cluster", job=~"($namespace)/query-frontend"}', '{{cluster}} / {{namespace}} / {{instance}}'), + ) + ) + .addRow( + g.row('Query Frontend - Results Cache') + .addPanel( + g.panel('Cache Hit %') + + g.queryPanel('sum(rate(cortex_cache_hits{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m])) / sum(rate(cortex_cache_fetched_keys{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m]))', 'Hit Rate') + + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, + ) + .addPanel( + g.panel('Cache misses') + + g.queryPanel('sum(rate(cortex_cache_fetched_keys{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m])) - sum(rate(cortex_cache_hits{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m]))', 'Miss Rate'), + ) + ) + .addRow( + g.row('Querier') + .addPanel( + g.panel('Stages') + + g.queryPanel('max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster",job=~"($namespace)/querier"}) * 1e3', '{{slice}}') + + { yaxes: g.yaxes('ms') } + + g.stack, + ) + .addPanel( + g.panel('Chunk cache misses') + + g.queryPanel('sum(rate(cortex_cache_fetched_keys{cluster=~"$cluster",job=~"($namespace)/querier",name="chunksmemcache"}[1m])) - sum(rate(cortex_cache_hits{cluster=~"$cluster",job=~"($namespace)/querier",name="chunksmemcache"}[1m]))', 'Hit rate'), + ) + .addPanel( + g.panel('Chunk cache corruptions') + + g.queryPanel('sum(rate(cortex_cache_corrupt_chunks_total{cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))', 'Corrupt chunks'), + ) + ) + .addRow( + g.row('Querier - Index Cache') + .addPanel( + g.panel('Total entries') + + g.queryPanel('sum(querier_cache_added_new_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}) - sum(querier_cache_evicted_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"})', 'Entries'), + ) + .addPanel( + g.panel('Cache Hit %') + + g.queryPanel('(sum(rate(querier_cache_gets_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m])) - sum(rate(querier_cache_misses_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))) / sum(rate(querier_cache_gets_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))', 'hit rate') + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, + ) + .addPanel( + g.panel('Churn Rate') + + g.queryPanel('sum(rate(querier_cache_evicted_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))', 'churn rate'), + ) + ) + .addRow( + g.row('Ingester') + .addPanel( + g.panel('Series per Query') + + utils.latencyRecordingRulePanel('cortex_ingester_queried_series', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')], multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + .addPanel( + g.panel('Chunks per Query') + + utils.latencyRecordingRulePanel('cortex_ingester_queried_chunks', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')], multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + .addPanel( + g.panel('Samples per Query') + + utils.latencyRecordingRulePanel('cortex_ingester_queried_samples', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')], multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + ) + .addRow( + g.row('Chunk Store') + .addPanel( + g.panel('Index Lookups per Query') + + utils.latencyRecordingRulePanel('cortex_chunk_store_index_lookups_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + .addPanel( + g.panel('Series (pre-intersection) per Query') + + utils.latencyRecordingRulePanel('cortex_chunk_store_series_pre_intersection_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + .addPanel( + g.panel('Series (post-intersection) per Query') + + utils.latencyRecordingRulePanel('cortex_chunk_store_series_post_intersection_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + .addPanel( + g.panel('Chunks per Query') + + utils.latencyRecordingRulePanel('cortex_chunk_store_chunks_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) + + { yaxes: g.yaxes('short') }, + ) + ), + + 'frontend.json': + g.dashboard('Frontend') + .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster') + .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace') + .addRow( + g.row('Cortex Reqs (cortex_gw)') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_gw_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw')]) + ) + ), + + 'ruler.json': + g.dashboard('Cortex / Ruler') + .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster') + .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace') + .addRow( + g.row('Rule Evaluations') + .addPanel( + g.panel('EPS') + + g.queryPanel('sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))', 'rules processed'), + ) + .addPanel( + g.panel('Latency') + + g.queryPanel( + ||| + sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval])) + / + sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval])) + |||, 'average' + ), + ) + ) + .addRow( + g.row('Group Evaluations') + .addPanel( + g.panel('Missed Iterations') + + g.queryPanel('sum(rate(prometheus_rule_group_iterations_missed_total{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))', 'iterations missed'), + ) + .addPanel( + g.panel('Latency') + + g.queryPanel( + ||| + sum (rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval])) + / + sum (rate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval])) + |||, 'average' + ), + ) + ), + + 'cortex-scaling.json': + g.dashboard('Cortex / Scaling') + .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster') + .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace') + .addRow( + g.row('Workload-based scaling') + .addPanel( + g.panel('Workload-based scaling') + { sort: { col: 1, desc: false } } + + g.tablePanel([ + ||| + sum by (cluster, namespace, deployment) ( + kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace", deployment=~"ingester|memcached"} + or + label_replace( + kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace", deployment=~"ingester|memcached"}, + "deployment", "$1", "statefulset", "(.*)" + ) + ) + |||, + ||| + quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(cortex_distributor_received_samples_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "ingester", "cluster", ".*"))[1h:]) + * 3 / 80e3 + |||, + ||| + label_replace( + sum by(cluster, namespace) ( + cortex_ingester_memory_series{cluster=~"$cluster", namespace=~"$namespace"} + ) / 1e+6, + "deployment", "ingester", "cluster", ".*" + ) + or + label_replace( + sum by (cluster, namespace) ( + 4 * cortex_ingester_memory_series{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"} + * + cortex_ingester_chunk_size_bytes_sum{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"} + / + cortex_ingester_chunk_size_bytes_count{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"} + ) + / + avg by (cluster, namespace) (memcached_limit_bytes{cluster=~"$cluster", namespace=~"$namespace", job=~".+/memcached"}), + "deployment", "memcached", "namespace", ".*" + ) + |||, + ], { + cluster: { alias: 'Cluster' }, + namespace: { alias: 'Namespace' }, + deployment: { alias: 'Deployment' }, + 'Value #A': { alias: 'Current Replicas', decimals: 0 }, + 'Value #B': { alias: 'Required Replicas, by ingestion rate', decimals: 0 }, + 'Value #C': { alias: 'Required Replicas, by active series', decimals: 0 }, + }) + ) + ) + .addRow( + (g.row('Resource-based scaling') + { height: '500px' }) + .addPanel( + g.panel('Resource-based scaling') + { sort: { col: 1, desc: false } } + + g.tablePanel([ + ||| + sum by (cluster, namespace, deployment) ( + kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"} + or + label_replace( + kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"}, + "deployment", "$1", "statefulset", "(.*)" + ) + ) + |||, + ||| + sum by (cluster, namespace, deployment) ( + kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"} + or + label_replace( + kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"}, + "deployment", "$1", "statefulset", "(.*)" + ) + ) + * + quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(container_cpu_usage_seconds_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "$1", "pod_name", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:]) + / + sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_cpu_cores{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))")) + |||, + ||| + sum by (cluster, namespace, deployment) ( + kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"} + or + label_replace( + kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"}, + "deployment", "$1", "statefulset", "(.*)" + ) + ) + * + quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(container_memory_usage_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod_name", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:1m]) + / + sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_memory_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))")) + |||, + ], { + cluster: { alias: 'Cluster' }, + namespace: { alias: 'Namespace' }, + deployment: { alias: 'Deployment' }, + 'Value #A': { alias: 'Current Replicas', decimals: 0 }, + 'Value #B': { alias: 'Required Replicas, by CPU usage', decimals: 0 }, + 'Value #C': { alias: 'Required Replicas, by Memory usage', decimals: 0 }, + }) + ) + ), + }, + + cortex_writes_dashboard:: + g.dashboard('Cortex / Writes') + .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster') + .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace') + .addRow( + (g.row('Headlines') + + { + height: '100px', + showTitle: false, + }) + .addPanel( + g.panel('Samples / s') + + g.statPanel('sum(cluster_namespace:cortex_distributor_received_samples:rate5m{cluster=~"$cluster", namespace=~"$namespace"})', format='reqps') + ) + .addPanel( + g.panel('Active Series') + + g.statPanel(||| + sum(cortex_ingester_memory_series{cluster=~"$cluster", job=~"($namespace)/ingester"} + / on(namespace) group_left + max by (namespace) (cortex_distributor_replication_factor{cluster=~"$cluster", job=~"($namespace)/distributor"})) + |||, format='short') + ) + .addPanel( + g.panel('QPS') + + g.statPanel('sum(rate(cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="cortex-write"}[5m]))', format='reqps') + ) + ) + .addRow( + g.row('Legacy Gateway') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="cortex-write"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_gw_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.eq('route', 'cortex-write')]) + ) + ) + .addRow( + g.row('Gateway') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="api_prom_push"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.eq('route', 'api_prom_push')]) + ) + ) + .addRow( + g.row('Distributor') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/distributor"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/distributor')]) + ) + ) + .addRow( + g.row('Etcd (HA Dedupe)') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_kv_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/distributor"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_kv_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/distributor')]) + ) + ) + .addRow( + g.row('Ingester') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester",route="/cortex.Ingester/Push"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('route', '/cortex.Ingester/Push')]) + ) + ) + .addRow( + g.row('Consul (Ring)') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_kv_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_kv_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')]) + ) + ) + .addRow( + g.row('Memcached') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_memcache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester",method="Memcache.Put"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_memcache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('method', 'Memcache.Put')]) + ) + ) + .addRow({ + cassandra: + g.row('Cassandra') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_cassandra_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="INSERT"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_cassandra_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('operation', 'INSERT')]) + ), + + gcp: + g.row('BigTable') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_bigtable_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="/google.bigtable.v2.Bigtable/MutateRows"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_bigtable_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')]) + ), + + dynamodb: + g.row('DynamoDB') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_dynamo_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="DynamoDB.BatchWriteItem"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_dynamo_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('operation', 'DynamoDB.BatchWriteItem')]) + ), + }[$._config.storage_backend]), + + cortex_reads_dashboard:: + g.dashboard('Cortex / Reads') + .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster') + .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace') + .addRow( + g.row('Legacy Gateway') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="cortex-read"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_gw_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.eq('route', 'cortex-read')]) + ) + ) + .addRow( + g.row('Gateway') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route=~"(api_prom_api_v1_query_range|api_prom_api_v1_query|api_prom_api_v1_label_name_values|api_prom_api_v1_series|api_prom_api_v1_labels)"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.re('route', '(api_prom_api_v1_query_range|api_prom_api_v1_query|api_prom_api_v1_label_name_values|api_prom_api_v1_series|api_prom_api_v1_labels)')]) + ) + ) + .addRow( + g.row('Query Frontend') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/query-frontend"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/query-frontend'), utils.selector.neq('route', '/frontend.Frontend/Process')]) + ) + ) + .addRow( + g.row('Cache - Query Results') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_cache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/query-frontend"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_cache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/query-frontend')]) + ) + ) + .addRow( + g.row('Querier') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')]) + ) + ) + .addRow( + g.row('Ingester') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester",route!~"/cortex.Ingester/Push|metrics|ready|traces"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.nre('route', '/cortex.Ingester/Push|metrics|ready')]) + ) + ) + .addRow( + g.row('Memcached - Index') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_cache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier",method="store.index-cache-read.memcache.fetch"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_cache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('method', 'store.index-cache-read.memcache.fetch')]) + ) + ) + .addRow( + g.row('Memcached - Chunks') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_cache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier",method="chunksmemcache.fetch"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_cache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('method', 'chunksmemcache.fetch')]) + ) + ) + .addRow({ + cassandra: + g.row('Cassandra') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_cassandra_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="SELECT"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_cassandra_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'SELECT')]) + ), + + gcp: + g.row('BigTable') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_bigtable_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="/google.bigtable.v2.Bigtable/ReadRows"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_bigtable_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')]) + ), + + dynamodb: + g.row('DynamoDB') + .addPanel( + g.panel('QPS') + + g.qpsPanel('cortex_dynamo_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="DynamoDB.QueryPages"}') + ) + .addPanel( + g.panel('Latency') + + utils.latencyRecordingRulePanel('cortex_dynamo_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'DynamoDB.QueryPages')]) + ), + }[$._config.storage_backend]), +} diff --git a/cortex-mixin/jsonnetfile.json b/cortex-mixin/jsonnetfile.json new file mode 100644 index 00000000..87e724d5 --- /dev/null +++ b/cortex-mixin/jsonnetfile.json @@ -0,0 +1,24 @@ +{ + "dependencies": [ + { + "name": "grafana-builder", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "grafana-builder" + } + }, + "version": "master" + }, + { + "name": "mixin-utils", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "mixin-utils" + } + }, + "version": "master" + } + ] +} diff --git a/cortex-mixin/jsonnetfile.lock.json b/cortex-mixin/jsonnetfile.lock.json new file mode 100644 index 00000000..fe50a404 --- /dev/null +++ b/cortex-mixin/jsonnetfile.lock.json @@ -0,0 +1,26 @@ +{ + "dependencies": [ + { + "name": "grafana-builder", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "grafana-builder" + } + }, + "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2", + "sum": "ELsYwK+kGdzX1mee2Yy+/b2mdO4Y503BOCDkFzwmGbE=" + }, + { + "name": "mixin-utils", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "mixin-utils" + } + }, + "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2", + "sum": "J1iExBloZLjVEvdzHVjvP9AVTqDOJSfFOtBoeQ7EhKk=" + } + ] +} diff --git a/cortex-mixin/mixin.libsonnet b/cortex-mixin/mixin.libsonnet new file mode 100644 index 00000000..b2b2f10d --- /dev/null +++ b/cortex-mixin/mixin.libsonnet @@ -0,0 +1,3 @@ +(import 'dashboards.libsonnet') + +(import 'alerts.libsonnet') + +(import 'recording_rules.libsonnet') diff --git a/cortex-mixin/recording_rules.jsonnet b/cortex-mixin/recording_rules.jsonnet new file mode 100644 index 00000000..4cda6c6f --- /dev/null +++ b/cortex-mixin/recording_rules.jsonnet @@ -0,0 +1 @@ +std.manifestYamlDoc((import 'mixin.libsonnet').prometheus_rules) diff --git a/cortex-mixin/recording_rules.libsonnet b/cortex-mixin/recording_rules.libsonnet new file mode 100644 index 00000000..7291ac5c --- /dev/null +++ b/cortex-mixin/recording_rules.libsonnet @@ -0,0 +1,114 @@ +local utils = import 'mixin-utils/utils.libsonnet'; +local windows = [ + { period: '5m' }, + { period: '30m' }, + { period: '1h' }, + { period: '2h' }, + { period: '6h' }, + { period: '1d' }, + { period: '3d' }, +]; + +{ + prometheus_rules+:: { + groups+: [{ + name: 'cortex_rules', + rules: + utils.histogramRules('cortex_request_duration_seconds', ['cluster', 'job']) + + utils.histogramRules('cortex_request_duration_seconds', ['cluster', 'job', 'route']) + + utils.histogramRules('cortex_request_duration_seconds', ['cluster', 'namespace', 'job', 'route']) + + utils.histogramRules('cortex_memcache_request_duration_seconds', ['cluster', 'job', 'method']) + + utils.histogramRules('cortex_cache_request_duration_seconds', ['cluster', 'job']) + + utils.histogramRules('cortex_cache_request_duration_seconds', ['cluster', 'job', 'method']) + + utils.histogramRules('cortex_bigtable_request_duration_seconds', ['cluster', 'job', 'operation']) + + utils.histogramRules('cortex_cassandra_request_duration_seconds', ['cluster', 'job', 'operation']) + + utils.histogramRules('cortex_dynamo_request_duration_seconds', ['cluster', 'job', 'operation']) + + utils.histogramRules('cortex_query_frontend_retries', ['cluster', 'job']) + + utils.histogramRules('cortex_query_frontend_queue_duration_seconds', ['cluster', 'job']) + + utils.histogramRules('cortex_ingester_queried_series', ['cluster', 'job']) + + utils.histogramRules('cortex_ingester_queried_chunks', ['cluster', 'job']) + + utils.histogramRules('cortex_ingester_queried_samples', ['cluster', 'job']) + + utils.histogramRules('cortex_chunk_store_index_lookups_per_query', ['cluster', 'job']) + + utils.histogramRules('cortex_chunk_store_series_pre_intersection_per_query', ['cluster', 'job']) + + utils.histogramRules('cortex_chunk_store_series_post_intersection_per_query', ['cluster', 'job']) + + utils.histogramRules('cortex_chunk_store_chunks_per_query', ['cluster', 'job']) + + utils.histogramRules('cortex_database_request_duration_seconds', ['cluster', 'job', 'method']) + + utils.histogramRules('cortex_gcs_request_duration_seconds', ['cluster', 'job', 'operation']) + + utils.histogramRules('cortex_kv_request_duration_seconds', ['cluster', 'job']), + }, { + name: 'frontend_rules', + rules: + utils.histogramRules('tsdb_gw_request_duration_seconds', ['cluster', 'job']) + + utils.histogramRules('tsdb_gw_request_duration_seconds', ['cluster', 'job', 'route']) + + utils.histogramRules('tsdb_gw_request_duration_seconds', ['cluster', 'namespace', 'job', 'route']) + + utils.histogramRules('cortex_gw_request_duration_seconds', ['cluster', 'job']) + + utils.histogramRules('cortex_gw_request_duration_seconds', ['cluster', 'job', 'route']) + + utils.histogramRules('cortex_gw_request_duration_seconds', ['cluster', 'namespace', 'job', 'route']), + }, { + name: 'cortex_slo_rules', + rules: [ + { + record: 'namespace_job:cortex_gateway_write_slo_errors_per_request:ratio_rate%(period)s' % window, + expr: ||| + 1 - + ( + sum by (namespace, job) (rate(cortex_request_duration_seconds_bucket{status_code!~"5..", le="1", route="api_prom_push", job=~".*/cortex-gw"}[%(period)s])) + / + sum by (namespace, job) (rate(cortex_request_duration_seconds_count{route="api_prom_push", job=~".*/cortex-gw"}[%(period)s])) + ) + ||| % window, + } + for window in windows + ] + [ + { + record: 'namespace_job:cortex_gateway_read_slo_errors_per_request:ratio_rate%(period)s' % window, + expr: ||| + 1 - + ( + sum by (namespace, job) (rate(cortex_request_duration_seconds_bucket{status_code!~"5..",le="2.5",route=~"api_prom_api_v1_query.*", job=~".*/cortex-gw"}[%(period)s])) + / + sum by (namespace, job) (rate(cortex_request_duration_seconds_count{route=~"api_prom_api_v1_query.*", job=~".*/cortex-gw"}[%(period)s])) + ) + ||| % window, + } + for window in windows + ] + [ + { + record: 'namespace_job:cortex_gw_write_slo_errors_per_request:ratio_rate%(period)s' % window, + expr: ||| + 1 - + ( + sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_bucket{status_code!~"error|5..",le="1",route="cortex-write"}[%(period)s])) + / + sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_count{route="cortex-write"}[%(period)s])) + ) + ||| % window, + } + for window in windows + ] + [ + { + record: 'namespace_job:cortex_gw_read_slo_errors_per_request:ratio_rate%(period)s' % window, + expr: ||| + 1 - + ( + sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_bucket{status_code!~"error|5..",le="2.5",route="cortex-read"}[%(period)s])) + / + sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_count{route="cortex-read"}[%(period)s])) + ) + ||| % window, + } + for window in windows + ], + }, { + name: 'cortex_received_samples', + rules: [ + { + record: 'cluster_namespace:cortex_distributor_received_samples:rate5m', + expr: ||| + sum by (cluster, namespace) (rate(cortex_distributor_received_samples_total{job=~".*/distributor"}[5m])) + |||, + }, + ], + }], + }, +} diff --git a/cortex/alertmanager.libsonnet b/cortex/alertmanager.libsonnet new file mode 100644 index 00000000..168f6be2 --- /dev/null +++ b/cortex/alertmanager.libsonnet @@ -0,0 +1,32 @@ +{ + local container = $.core.v1.container, + + alertmanager_args:: + { + target: 'alertmanager', + 'log.level': 'debug', + + 'alertmanager.storage.type': 'gcs', + 'alertmanager.gcs.bucketname': '%(cluster)s-cortex-configdb-%(namespace)s' % $._config, + 'alertmanager.web.external-url': 'http://alertmanager.%s.svc.cluster.local/alertmanager' % $._config.namespace, + }, + + alertmanager_container:: + container.new('alertmanager', $._images.alertmanager) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.alertmanager_args)) + + $.util.resourcesRequests('100m', '1Gi') + + $.jaeger_mixin, + + local deployment = $.apps.v1beta1.deployment, + + alertmanager_deployment: + deployment.new('alertmanager', 1, [$.alertmanager_container]) + + deployment.mixin.spec.template.spec.withRestartPolicy('Always') + + $.util.antiAffinity, + + local service = $.core.v1.service, + + alertmanager_server: + $.util.serviceFor($.alertmanager_deployment), +} diff --git a/cortex/common.libsonnet b/cortex/common.libsonnet new file mode 100644 index 00000000..62a5a338 --- /dev/null +++ b/cortex/common.libsonnet @@ -0,0 +1,14 @@ +{ + namespace: + $.core.v1.namespace.new($._config.namespace), + + util+:: { + local containerPort = $.core.v1.containerPort, + + defaultPorts:: + [ + containerPort.newNamed('http-metrics', 80), + containerPort.newNamed('grpc', 9095), + ], + }, +} diff --git a/cortex/config.libsonnet b/cortex/config.libsonnet new file mode 100644 index 00000000..17350551 --- /dev/null +++ b/cortex/config.libsonnet @@ -0,0 +1,251 @@ +{ + _config+: { + namespace: error 'must define namespace', + cluster: error 'must define cluster', + replication_factor: 3, + + storage_backend: error 'must specify storage backend (cassandra, gcp)', + table_prefix: $._config.namespace, + cassandra_addresses: error 'must specify cassandra addresses', + bigtable_instance: error 'must specify bigtable instance', + bigtable_project: error 'must specify bigtable project', + aws_region: error 'must specify AWS region', + s3_bucket_name: error 'must specify S3 bucket name', + + // schema is used to generate the storage schema yaml file used by + // the Cortex chunks storage: + // - More information: https://github.com/cortexproject/cortex/pull/1072 + // - TSDB integration doesn't rely on the Cortex chunks store, so doesn't + // support the schema config. + schema: if $._config.storage_engine != 'tsdb' then + error 'must specify a schema config' + else + [], + + max_series_per_user: 250000, + max_series_per_metric: 10000, + max_chunk_idle: '15m', + + test_exporter_enabled: false, + test_exporter_start_time: error 'must specify test exporter start time', + test_exporter_user_id: error 'must specify test exporter used id', + + querierConcurrency: 8, + querier_ingester_streaming_enabled: $._config.storage_engine != 'tsdb', + + jaeger_agent_host: null, + + // Use the Cortex chunks storage engine by default, while giving the ability + // to switch to tsdb storage. + storage_engine: 'chunks', + storage_tsdb_bucket_name: error 'must specify GCS bucket name to store TSDB blocks', + + // TSDB storage engine doesn't require the table manager. + table_manager_enabled: $._config.storage_engine != 'tsdb', + + // TSDB storage engine doesn't require memcached for chunks or chunk indexes. + memcached_index_queries_enabled: $._config.storage_engine != 'tsdb', + memcached_index_writes_enabled: $._config.storage_engine != 'tsdb', + memcached_chunks_enabled: $._config.storage_engine != 'tsdb', + + enabledBackends: [ + backend + for backend in std.split($._config.storage_backend, ',') + ], + + client_configs: { + aws: + if std.count($._config.enabledBackends, 'aws') > 0 then { + 'dynamodb.api-limit': 10, + 'dynamodb.url': 'https://%s' % $._config.aws_region, + 's3.url': 'https://%s/%s' % [$._config.aws_region, $._config.s3_bucket_name], + } else {}, + cassandra: + if std.count($._config.enabledBackends, 'cassandra') > 0 then { + 'cassandra.keyspace': $._config.namespace, + 'cassandra.addresses': $._config.cassandra_addresses, + 'cassandra.replication-factor': $._config.replication_factor, + } else {}, + gcp: + if std.count($._config.enabledBackends, 'gcp') > 0 then { + 'bigtable.project': $._config.bigtable_project, + 'bigtable.instance': $._config.bigtable_instance, + } else {}, + }, + + storeConfig: self.storeMemcachedChunksConfig, + + storeMemcachedChunksConfig: if $._config.memcached_chunks_enabled then + { + 'memcached.hostname': 'memcached.%s.svc.cluster.local' % $._config.namespace, + 'memcached.service': 'memcached-client', + 'memcached.timeout': '3s', + 'memcached.batchsize': 1024, + 'memcached.consistent-hash': true, + } + else {}, + + storageConfig: + $._config.client_configs.aws + + $._config.client_configs.cassandra + + $._config.client_configs.gcp + + $._config.storageTSDBConfig + + { 'config-yaml': '/etc/cortex/schema/config.yaml' }, + + // TSDB blocks storage configuration, used only when 'tsdb' storage + // engine is explicitly enabled. + storageTSDBConfig: if $._config.storage_engine == 'tsdb' then { + 'store.engine': 'tsdb', + 'experimental.tsdb.dir': '/tmp/tsdb', + 'experimental.tsdb.sync-dir': '/tmp/tsdb', + 'experimental.tsdb.block-ranges-period': '2h', + 'experimental.tsdb.retention-period': '1h', + 'experimental.tsdb.ship-interval': '1m', + 'experimental.tsdb.backend': 'gcs', + 'experimental.tsdb.gcs.bucket-name': $._config.storage_tsdb_bucket_name, + } else {}, + + // Shared between the Ruler and Querier + queryConfig: { + // Use iterators to merge chunks, to reduce memory usage. + 'querier.ingester-streaming': $._config.querier_ingester_streaming_enabled, + 'querier.batch-iterators': true, + + // Don't query the chunk store for data younger than max_chunk_idle. + 'store.min-chunk-age': $._config.max_chunk_idle, + + // Don't query ingesters for older queries. + // Chunks are 6hrs right now. Add some slack for safety. + 'querier.query-ingesters-within': '12h', + + 'limits.per-user-override-config': '/etc/cortex/overrides.yaml', + + // Limit the size of the rows we read from the index. + 'store.cardinality-limit': 1e6, + + // Don't allow individual queries of longer than 31days. Due to day query + // splitting in the frontend, the reality is this only limits rate(foo[31d]) + // type queries. + 'store.max-query-length': '744h', + } + ( + if $._config.memcached_index_queries_enabled then + { + // Setting for index cache. + 'store.index-cache-validity': '14m', // ingester.retain-period=15m, 1m less for safety. + 'store.index-cache-read.cache.enable-fifocache': true, + 'store.index-cache-read.fifocache.size': 102400, + 'store.index-cache-read.memcached.hostname': 'memcached-index-queries.%(namespace)s.svc.cluster.local' % $._config, + 'store.index-cache-read.memcached.service': 'memcached-client', + 'store.index-cache-read.memcached.timeout': '500ms', + 'store.index-cache-read.memcached.consistent-hash': true, + 'store.cache-lookups-older-than': '36h', + } + else {} + ), + + ringConfig: { + 'consul.hostname': 'consul.%s.svc.cluster.local:8500' % $._config.namespace, + 'consul.consistent-reads': false, + 'ring.prefix': '', + }, + + // Some distributor config is shared with the querier. + distributorConfig: { + 'distributor.replication-factor': $._config.replication_factor, + 'distributor.shard-by-all-labels': true, + 'distributor.health-check-ingesters': true, + 'ring.heartbeat-timeout': '10m', + 'consul.consistent-reads': false, + }, + + overrides: { + // === Per-tenant usage limits. === + // These are the defaults. These are not global limits but per instance limits. + // + // small_user: { + // ingestion_rate: 10,000 + // ingestion_burst_size: 20,000 + // + // max_series_per_user: 250,000 + // max_series_per_metric: 10,000 + // + // max_series_per_query: 10,000 + // max_samples_per_query: 100,000 + // }, + + medium_user:: { + ingestion_rate: 25000, + ingestion_burst_size: 50000, + + max_series_per_metric: 100000, + max_series_per_user: 500000, + + max_series_per_query: 100000, + max_samples_per_query: 1000000, + }, + + big_user:: { + ingestion_rate: 50000, + ingestion_burst_size: 70000, + + max_series_per_metric: 100000, + max_series_per_user: 1000000, + + max_series_per_query: 100000, + max_samples_per_query: 1000000, + }, + + super_user:: { + ingestion_rate: 200000, + ingestion_burst_size: 240000, + + max_series_per_metric: 200000, + max_series_per_user: 2000000, + + max_series_per_query: 100000, + max_samples_per_query: 1000000, + }, + }, + + schemaID: std.md5(std.toString($._config.schema)), + + enable_pod_priorities: true, + }, + + local configMap = $.core.v1.configMap, + + overrides_config: + configMap.new('overrides') + + configMap.withData({ + 'overrides.yaml': $.util.manifestYaml({ + overrides: $._config.overrides, + }), + }), + + storage_config: + configMap.new('schema-' + $._config.schemaID) + + configMap.withData({ + 'config.yaml': $.util.manifestYaml({ + configs: $._config.schema, + }), + }), + + local deployment = $.apps.v1beta1.deployment, + storage_config_mixin:: + deployment.mixin.spec.template.metadata.withAnnotationsMixin({ schemaID: $._config.schemaID },) + + $.util.configVolumeMount('schema-' + $._config.schemaID, '/etc/cortex/schema'), + + // This removed the CPU limit from the config. NB won't show up in subset + // diffs, but ks apply will do the right thing. + removeCPULimitsMixin:: { + resources+: { + // Can't use super.memory in limits, as we want to + // override the whole limits struct. + local memoryLimit = super.limits.memory, + + limits: { + memory: memoryLimit, + }, + }, + }, +} diff --git a/cortex/consul.libsonnet b/cortex/consul.libsonnet new file mode 100644 index 00000000..9ece317d --- /dev/null +++ b/cortex/consul.libsonnet @@ -0,0 +1,59 @@ +local consul = import 'consul/consul.libsonnet'; + +{ + _config+:: { + consul_replicas: 1, + other_namespaces+: [], + }, + + consul: consul { + _config+:: { + consul_replicas: $._config.consul_replicas, + namespace: $._config.namespace, + }, + + // Snapshot the raft.db very frequently, to stop it getting to big. + consul_config+:: { + raft_snapshot_threshold: 128, + raft_trailing_logs: 10e3, + }, + + local container = $.core.v1.container, + + consul_container+:: + container.withArgsMixin([ + '-ui-content-path=/%s/consul/' % $._config.namespace, + ]) + + $.util.resourcesRequests('4', '4Gi'), + + local deployment = $.apps.v1beta1.deployment, + local podAntiAffinity = deployment.mixin.spec.template.spec.affinity.podAntiAffinity, + local volume = $.core.v1.volume, + consul_deployment+: + + // Keep the consul state on a ramdisk, as they are ephemeral to us. + $.util.emptyVolumeMount( + 'data', + '/consul/data/', + volumeMixin=volume.mixin.emptyDir.withMedium('Memory'), + ) + + + // Ensure Consul is not scheduled on the same host as an ingester + // (in any namespace - hence other_namespaces). + podAntiAffinity.withRequiredDuringSchedulingIgnoredDuringExecutionMixin([ + podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.new() + + podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.mixin.labelSelector.withMatchLabels({ name: 'ingester' }) + + podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.withNamespaces([$._config.namespace] + $._config.other_namespaces) + + podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.withTopologyKey('kubernetes.io/hostname'), + ]) + + + $.util.podPriority('high'), + + // Don't healthcheck services, adds load to consul. + consul_exporter+:: + container.withArgsMixin([ + '--no-consul.health-summary', + '--consul.allow_stale', + ]), + }, +} diff --git a/cortex/cortex-manifests.jsonnet.example b/cortex/cortex-manifests.jsonnet.example new file mode 100644 index 00000000..7edc14cd --- /dev/null +++ b/cortex/cortex-manifests.jsonnet.example @@ -0,0 +1,26 @@ +local cortex = import "cortex/cortex.libsonnet"; + +cortex { + _config+:: { + namespace: "default", + schema: [{ + from: '2019-11-15', + store: 'bigtable-hashed', + object_store: 'gcs', + schema: 'v10', + index: { + prefix: 'dev_index_', + period: '168h', + }, + chunks: { + prefix: 'dev_chunks_', + period: '168h', + }, + }], + + storage_backend: 'gcp', + bigtable_instance: 'example-instance-prod', + bigtable_project: 'example-project1-cortex', + }, +} + diff --git a/cortex/cortex.libsonnet b/cortex/cortex.libsonnet new file mode 100644 index 00000000..430c1d43 --- /dev/null +++ b/cortex/cortex.libsonnet @@ -0,0 +1,18 @@ +(import 'ksonnet-util/kausal.libsonnet') + +(import 'ksonnet-util/jaeger.libsonnet') + +(import 'images.libsonnet') + +(import 'common.libsonnet') + +(import 'config.libsonnet') + +(import 'consul.libsonnet') + + +// Cortex services +(import 'distributor.libsonnet') + +(import 'ingester.libsonnet') + +(import 'querier.libsonnet') + +(import 'query-frontend.libsonnet') + +(import 'table-manager.libsonnet') + + +// Supporting services +(import 'etcd.libsonnet') + +(import 'memcached.libsonnet') + +(import 'test-exporter.libsonnet') diff --git a/cortex/distributor.libsonnet b/cortex/distributor.libsonnet new file mode 100644 index 00000000..d5b147d8 --- /dev/null +++ b/cortex/distributor.libsonnet @@ -0,0 +1,52 @@ +{ + local container = $.core.v1.container, + local containerPort = $.core.v1.containerPort, + + distributor_args:: + $._config.ringConfig + + $._config.distributorConfig + + { + target: 'distributor', + + 'distributor.ingestion-rate-limit': 10000, + 'distributor.ingestion-burst-size': 20000, + 'validation.reject-old-samples': true, + 'validation.reject-old-samples.max-age': '12h', + 'limits.per-user-override-config': '/etc/cortex/overrides.yaml', + 'distributor.remote-timeout': '20s', + + 'distributor.ha-tracker.enable': true, + 'distributor.ha-tracker.enable-for-all-users': true, + 'distributor.ha-tracker.store': 'etcd', + 'distributor.ha-tracker.etcd.endpoints': 'etcd-client.%s.svc.cluster.local.:2379' % $._config.namespace, + 'distributor.ha-tracker.prefix': 'prom_ha/', + + // The memory requests are 2G, and we barely use 100M. + // By adding a ballast of 1G, we can drastically reduce GC, but also keep the usage at + // around 1.25G, reducing the 99%ile. + 'mem-ballast-size-bytes': 1 << 30, // 1GB + }, + + distributor_container:: + container.new('distributor', $._images.distributor) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.distributor_args)) + + $.util.resourcesRequests('2', '2Gi') + + $.util.resourcesLimits('6', '4Gi') + + $.jaeger_mixin, + + local deployment = $.apps.v1beta1.deployment, + + distributor_deployment: + deployment.new('distributor', 3, [ + $.distributor_container, + ]) + + $.util.antiAffinity + + $.util.configVolumeMount('overrides', '/etc/cortex'), + + local service = $.core.v1.service, + + distributor_service: + $.util.serviceFor($.distributor_deployment) + + service.mixin.spec.withClusterIp('None'), +} diff --git a/cortex/etcd.libsonnet b/cortex/etcd.libsonnet new file mode 100644 index 00000000..41981db9 --- /dev/null +++ b/cortex/etcd.libsonnet @@ -0,0 +1,9 @@ +local etcd_cluster = import 'etcd-operator/etcd-cluster.libsonnet'; + +etcd_cluster { + etcd: + $.etcd_cluster('etcd', env=[{ + name: 'ETCD_AUTO_COMPACTION_RETENTION', + value: '1h', + }]), +} diff --git a/cortex/images.libsonnet b/cortex/images.libsonnet new file mode 100644 index 00000000..8d5ccce4 --- /dev/null +++ b/cortex/images.libsonnet @@ -0,0 +1,21 @@ +{ + _images+:: { + // Various third-party images. + memcached: 'memcached:1.5.17-alpine', + memcachedExporter: 'prom/memcached-exporter:v0.6.0', + postgresql: 'postgres:9.6.11-alpine', + + // Our services. + cortex: 'cortexproject/cortex:master-37c1f178', + + distributor: self.cortex, + ingester: self.cortex, + querier: self.cortex, + query_frontend: self.cortex, + tableManager: self.cortex, + // TODO(gouthamve/jtlisi): Upstream the ruler and AM configs. + ruler: 'jtlisi/cortex:20190806_prommanager_ruler_with_api-50343f8d', + alertmanager: 'jtlisi/cortex:20190819_alertmanager_update-165b393a', + testExporter: 'cortexproject/test-exporter:master-ef99cdaf', + }, +} diff --git a/cortex/ingester.libsonnet b/cortex/ingester.libsonnet new file mode 100644 index 00000000..0e08ba15 --- /dev/null +++ b/cortex/ingester.libsonnet @@ -0,0 +1,69 @@ +{ + local container = $.core.v1.container, + + ingester_args:: + $._config.ringConfig + + $._config.storeConfig + + $._config.storageConfig + + { + target: 'ingester', + + // Ring config. + 'ingester.num-tokens': 512, + 'ingester.join-after': '30s', + 'ingester.max-transfer-retries': 60, // Each retry is backed off by 5s, so 5mins for new ingester to come up. + 'ingester.claim-on-rollout': true, + 'ingester.heartbeat-period': '15s', + + // Chunk building/flushing config. + 'ingester.chunk-encoding': 3, // Bigchunk encoding + 'ingester.retain-period': '15m', + 'ingester.max-chunk-age': '6h', + 'ingester.spread-flushes': true, + + // Limits config. + 'ingester.max-chunk-idle': $._config.max_chunk_idle, + 'ingester.max-series-per-user': $._config.max_series_per_user, + 'ingester.max-series-per-metric': $._config.max_series_per_metric, + 'limits.per-user-override-config': '/etc/cortex/overrides.yaml', + 'server.grpc-max-concurrent-streams': 100000, + } + ( + if $._config.memcached_index_writes_enabled then + { + // Setup index write deduping. + 'store.index-cache-write.memcached.hostname': 'memcached-index-writes.%(namespace)s.svc.cluster.local' % $._config, + 'store.index-cache-write.memcached.service': 'memcached-client', + 'store.index-cache-write.memcached.consistent-hash': true, + } + else {} + ), + + ingester_container:: + container.new('ingester', $._images.ingester) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.ingester_args)) + + container.mixin.readinessProbe.httpGet.withPath('/ready') + + container.mixin.readinessProbe.httpGet.withPort(80) + + container.mixin.readinessProbe.withInitialDelaySeconds(15) + + container.mixin.readinessProbe.withTimeoutSeconds(1) + + + $.util.resourcesRequests('4', '15Gi') + + $.util.resourcesLimits(null, '25Gi') + + $.jaeger_mixin, + + local deployment = $.apps.v1beta1.deployment, + + ingester_deployment: + deployment.new('ingester', 3, [$.ingester_container]) + + $.util.antiAffinity + + $.util.configVolumeMount('overrides', '/etc/cortex') + + deployment.mixin.spec.withMinReadySeconds(60) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) + + deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(4800) + + $.storage_config_mixin + + $.util.podPriority('high'), + + ingester_service: + $.util.serviceFor($.ingester_deployment), +} diff --git a/cortex/jsonnetfile.json b/cortex/jsonnetfile.json new file mode 100644 index 00000000..375b9813 --- /dev/null +++ b/cortex/jsonnetfile.json @@ -0,0 +1,44 @@ +{ + "dependencies": [ + { + "name": "consul", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "consul" + } + }, + "version": "master" + }, + { + "name": "etcd-operator", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "etcd-operator" + } + }, + "version": "master" + }, + { + "name": "ksonnet-util", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "ksonnet-util" + } + }, + "version": "master" + }, + { + "name": "memcached", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "memcached" + } + }, + "version": "master" + } + ] +} diff --git a/cortex/jsonnetfile.lock.json b/cortex/jsonnetfile.lock.json new file mode 100644 index 00000000..e4f26b0b --- /dev/null +++ b/cortex/jsonnetfile.lock.json @@ -0,0 +1,48 @@ +{ + "dependencies": [ + { + "name": "consul", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "consul" + } + }, + "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2", + "sum": "S3cLCI5OLpSdwqsAWkNtdGXTlFTpuVGB29m6CXw8xHI=" + }, + { + "name": "etcd-operator", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "etcd-operator" + } + }, + "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2", + "sum": "KUklp389C8zcSrYjRkIy00w81gP1HGU3eDmxghqtmBs=" + }, + { + "name": "ksonnet-util", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "ksonnet-util" + } + }, + "version": "250bf5499d81e5e77e1e5ed2242c89ad27485aec", + "sum": "8gmmSMANOAs4dfP5a09Y+nE9pd8E4TMpk3YPKxT4ys0=" + }, + { + "name": "memcached", + "source": { + "git": { + "remote": "https://github.com/grafana/jsonnet-libs", + "subdir": "memcached" + } + }, + "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2", + "sum": "hroD9u119YWI5g2SnspmSgMDJUMyXFZDnjymhUS6Pjs=" + } + ] +} diff --git a/cortex/memcached.libsonnet b/cortex/memcached.libsonnet new file mode 100644 index 00000000..bd00189f --- /dev/null +++ b/cortex/memcached.libsonnet @@ -0,0 +1,63 @@ +local memcached = import 'memcached/memcached.libsonnet'; + +memcached { + memcached+:: { + cpu_limits:: null, + + deployment: {}, + + local statefulSet = $.apps.v1beta1.statefulSet, + + statefulSet: + statefulSet.new(self.name, 3, [ + self.memcached_container, + self.memcached_exporter, + ], []) + + statefulSet.mixin.spec.withServiceName(self.name) + + $.util.antiAffinity, + + local service = $.core.v1.service, + + service: + $.util.serviceFor(self.statefulSet) + + service.mixin.spec.withClusterIp('None'), + }, + + // Dedicated memcached instance used to cache query results. + memcached_frontend: $.memcached { + name: 'memcached-frontend', + max_item_size: '5m', + }, + + // Dedicated memcached instance used to temporarily cache index lookups. + memcached_index_queries: if $._config.memcached_index_queries_enabled then + $.memcached { + name: 'memcached-index-queries', + max_item_size: '5m', + } + else {}, + + // Dedicated memcached instance used to dedupe writes to the index. + memcached_index_writes: if $._config.memcached_index_writes_enabled then + $.memcached { + name: 'memcached-index-writes', + } + else {}, + + // Memcached instance used to cache chunks. + memcached_chunks: if $._config.memcached_chunks_enabled then + $.memcached { + name: 'memcached', + + // Save memory by more tightly provisioning memcached chunks. + memory_limit_mb: 6 * 1024, + overprovision_factor: 1.05, + + local container = $.core.v1.container, + + // Raise connection limits now our clusters are bigger. + memcached_container+:: + container.withArgsMixin(['-c 4096']), + } + else {}, +} diff --git a/cortex/postgresql.libsonnet b/cortex/postgresql.libsonnet new file mode 100644 index 00000000..d63eb8c6 --- /dev/null +++ b/cortex/postgresql.libsonnet @@ -0,0 +1,29 @@ +{ + local container = $.core.v1.container, + local containerPort = $.core.v1.containerPort, + + _config+: { + pgUser: 'cortex', + pgPassword: '1234', + }, + + postgresql_container:: + container.new('postgres', $._images.postgresql) + + container.withPorts([ + containerPort.newNamed('postgresql', 5432), + ]) + + container.withEnvMap({ + POSTGRES_USER: $._config.pgUser, + POSTGRES_DB: 'configs', + }) + + $.util.resourcesRequests('2', '1Gi') + + $.util.resourcesLimits('4', '2Gi'), + + local deployment = $.apps.v1beta1.deployment, + postgresql_deployment: + deployment.new('postgresql', 1, [$.postgresql_container]), + + local service = $.core.v1.service, + postgresql_service: + $.util.serviceFor($.postgresql_deployment), +} diff --git a/cortex/querier.libsonnet b/cortex/querier.libsonnet new file mode 100644 index 00000000..fda924c7 --- /dev/null +++ b/cortex/querier.libsonnet @@ -0,0 +1,52 @@ +{ + local container = $.core.v1.container, + + querier_args:: + $._config.ringConfig + + $._config.storeConfig + + $._config.storageConfig + + $._config.queryConfig + + $._config.distributorConfig + + { + target: 'querier', + + // Increase HTTP server response write timeout, as we were seeing some + // queries that return a lot of data timeing out. + 'server.http-write-timeout': '1m', + + // Limit query concurrency to prevent multi large queries causing an OOM. + 'querier.max-concurrent': $._config.querierConcurrency, + + // Limit to N/2 worker threads per frontend, as we have two frontends. + 'querier.worker-parallelism': $._config.querierConcurrency / 2, + 'querier.frontend-address': 'query-frontend.%(namespace)s.svc.cluster.local:9095' % $._config, + 'querier.frontend-client.grpc-max-send-msg-size': 100 << 20, + + 'log.level': 'debug', + }, + + querier_container:: + container.new('querier', $._images.querier) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.querier_args)) + + $.util.resourcesRequests('1', '12Gi') + + $.util.resourcesLimits(null, '24Gi') + + $.jaeger_mixin + + container.withEnvMap({ + JAEGER_REPORTER_MAX_QUEUE_SIZE: '1024', // Default is 100. + }), + + local deployment = $.apps.v1beta1.deployment, + + querier_deployment: + deployment.new('querier', 3, [$.querier_container]) + + $.util.antiAffinity + + $.util.configVolumeMount('overrides', '/etc/cortex') + + $.storage_config_mixin, + + local service = $.core.v1.service, + + querier_service: + $.util.serviceFor($.querier_deployment) + + service.mixin.spec.withSelector({ name: 'query-frontend' }), +} diff --git a/cortex/query-frontend.libsonnet b/cortex/query-frontend.libsonnet new file mode 100644 index 00000000..a9e64abe --- /dev/null +++ b/cortex/query-frontend.libsonnet @@ -0,0 +1,60 @@ +{ + local container = $.core.v1.container, + + query_frontend_args:: { + target: 'query-frontend', + + // Need log.level=debug so all queries are logged, needed for analyse.py. + 'log.level': 'debug', + + // Increase HTTP server response write timeout, as we were seeing some + // queries that return a lot of data timeing out. + 'server.http-write-timeout': '1m', + + // Split long queries up into multiple day-long queries. + 'querier.split-queries-by-day': true, + + // Cache query results. + 'querier.align-querier-with-step': true, + 'querier.cache-results': true, + 'frontend.memcached.hostname': 'memcached-frontend.%s.svc.cluster.local' % $._config.namespace, + 'frontend.memcached.service': 'memcached-client', + 'frontend.memcached.timeout': '500ms', + 'frontend.memcached.consistent-hash': true, + + // So that exporters like cloudwatch can still send in data and be un-cached. + 'frontend.max-cache-freshness': '10m', + + // Compress HTTP responses; improves latency for very big results and slow + // connections. + 'querier.compress-http-responses': true, + + // So it can recieve big responses from the querier. + 'server.grpc-max-recv-msg-size-bytes': 100 << 20, + + // Limit queries to 500 days, allow this to be override per-user. + 'store.max-query-length': '12000h', // 500 Days + 'limits.per-user-override-config': '/etc/cortex/overrides.yaml', + }, + + query_frontend_container:: + container.new('query-frontend', $._images.query_frontend) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.query_frontend_args)) + + $.util.resourcesRequests('2', '600Mi') + + $.util.resourcesLimits(null, '1200Mi') + + $.jaeger_mixin, + + local deployment = $.apps.v1beta1.deployment, + + query_frontend_deployment: + deployment.new('query-frontend', 2, [$.query_frontend_container]) + + $.util.configVolumeMount('overrides', '/etc/cortex') + + $.util.antiAffinity, + + local service = $.core.v1.service, + + query_frontend_service: + $.util.serviceFor($.query_frontend_deployment) + + service.mixin.spec.withClusterIp('None'), +} diff --git a/cortex/ruler.libsonnet b/cortex/ruler.libsonnet new file mode 100644 index 00000000..749c8f84 --- /dev/null +++ b/cortex/ruler.libsonnet @@ -0,0 +1,53 @@ +{ + local container = $.core.v1.container, + + ruler_args:: + $._config.ringConfig + + $._config.storeConfig + + $._config.storageConfig + + $._config.queryConfig + + $._config.distributorConfig + + { + target: 'ruler', + // Alertmanager configs + 'ruler.alertmanager-url': 'http://alertmanager.%s.svc.cluster.local/alertmanager' % $._config.namespace, + + // Ring Configs + 'ruler.enable-sharding': true, + 'ruler.consul.hostname': 'consul.%s.svc.cluster.local:8500' % $._config.namespace, + 'ruler.consul.consistent-reads': false, + 'ruler.prefix': 'rulers/', + 'ruler.distributor.replication-factor': 1, + 'ruler.claim-on-rollout': true, + 'ruler.join-after': '15s', + 'ruler.ring.heartbeat-timeout': '10m', + 'ruler.heartbeat-period': '1m', + 'ruler.search-pending-for': '1m', + + // Rule Storage Configs + 'ruler.storage.type': 'gcs', + 'rules.gcs.bucketname': '%(cluster)s-cortex-configdb-%(namespace)s' % $._config, + }, + + ruler_container:: + container.new('ruler', $._images.ruler) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.ruler_args)) + + $.util.resourcesRequests('1', '6Gi') + + $.util.resourcesLimits('16', '16Gi') + + $.jaeger_mixin, + + local deployment = $.apps.v1beta1.deployment, + + ruler_deployment: + deployment.new('ruler', 2, [$.ruler_container]) + + deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(600) + + $.util.antiAffinity + + $.util.configVolumeMount('overrides', '/etc/cortex') + + $.storage_config_mixin, + + local service = $.core.v1.service, + + ruler_service: + $.util.serviceFor($.ruler_deployment), +} diff --git a/cortex/table-manager.libsonnet b/cortex/table-manager.libsonnet new file mode 100644 index 00000000..b392cd93 --- /dev/null +++ b/cortex/table-manager.libsonnet @@ -0,0 +1,53 @@ +{ + local container = $.core.v1.container, + + table_manager_args:: + $._config.storageConfig + { + target: 'table-manager', + + // Cassandra / BigTable doesn't use these fields, so set them to zero + 'dynamodb.chunk-table.inactive-read-throughput': 0, + 'dynamodb.chunk-table.inactive-write-throughput': 0, + 'dynamodb.chunk-table.read-throughput': 0, + 'dynamodb.chunk-table.write-throughput': 0, + 'dynamodb.periodic-table.inactive-read-throughput': 0, + 'dynamodb.periodic-table.inactive-write-throughput': 0, + 'dynamodb.periodic-table.read-throughput': 0, + 'dynamodb.periodic-table.write-throughput': 0, + + // Rate limit Bigtable Admin calls. Google seem to limit to ~100QPS, + // and given 2yrs worth of tables (~100) a sync will table 20s. This + // allows you to run upto 20 independant Cortex clusters on the same + // Google project before running into issues. + 'dynamodb.poll-interval': '10m', + 'dynamodb.periodic-table.grace-period': '3h', + 'bigtable.grpc-client-rate-limit': 5.0, + 'bigtable.grpc-client-rate-limit-burst': 5, + 'bigtable.backoff-on-ratelimits': true, + 'bigtable.table-cache.enabled': true, + }, + + table_manager_container:: + if $._config.table_manager_enabled then + container.new('table-manager', $._images.tableManager) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.table_manager_args)) + + $.util.resourcesRequests('100m', '100Mi') + + $.util.resourcesLimits('200m', '200Mi') + + $.jaeger_mixin + else {}, + + local deployment = $.apps.v1beta1.deployment, + + table_manager_deployment: + if $._config.table_manager_enabled then + deployment.new('table-manager', 1, [$.table_manager_container]) + + $.storage_config_mixin + else {}, + + table_manager_service: + if $._config.table_manager_enabled then + $.util.serviceFor($.table_manager_deployment) + else {}, +} diff --git a/cortex/test-exporter.libsonnet b/cortex/test-exporter.libsonnet new file mode 100644 index 00000000..31d7a2c9 --- /dev/null +++ b/cortex/test-exporter.libsonnet @@ -0,0 +1,40 @@ +{ + local container = $.core.v1.container, + local containerPort = $.core.v1.containerPort, + + test_exporter_args:: { + 'user-id': $._config.test_exporter_user_id, + 'prometheus-address': 'http://query-frontend.%(namespace)s.svc.cluster.local/api/prom' % $._config, + 'test-query-start': $._config.test_exporter_start_time, + 'extra-selectors': 'job="%(namespace)s/test-exporter"' % $._config, + 'test-query-min-size': '1m', + 'test-epsilion': '0.05', // There is enough jitter in our system for scrapes to be off by 5%. + }, + + test_exporter_container:: + if !($._config.test_exporter_enabled) + then {} + else + container.new('test-exporter', $._images.testExporter) + + container.withPorts($.util.defaultPorts) + + container.withArgsMixin($.util.mapToFlags($.test_exporter_args)) + + $.util.resourcesRequests('100m', '100Mi') + + $.util.resourcesLimits('100m', '100Mi') + + $.jaeger_mixin, + + local deployment = $.apps.v1beta1.deployment, + + test_exporter_deployment: + if !($._config.test_exporter_enabled) + then {} + else + deployment.new('test-exporter', 1, [ + $.test_exporter_container, + ]), + + test_exporter_service: + if !($._config.test_exporter_enabled) + then {} + else + $.util.serviceFor($.test_exporter_deployment), +}