From 3ff1d4cfcbfa28de1b83c33d42d74749e4c9c97b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Sat, 16 Nov 2019 13:25:58 +0000
Subject: [PATCH] Initial commit just move everything over

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 README.md                               |  55 ++
 cortex-mixin/alerts.jsonnet             |   1 +
 cortex-mixin/alerts.libsonnet           | 520 +++++++++++++++++++
 cortex-mixin/dashboards.jsonnet         |   6 +
 cortex-mixin/dashboards.libsonnet       | 649 ++++++++++++++++++++++++
 cortex-mixin/jsonnetfile.json           |  24 +
 cortex-mixin/jsonnetfile.lock.json      |  26 +
 cortex-mixin/mixin.libsonnet            |   3 +
 cortex-mixin/recording_rules.jsonnet    |   1 +
 cortex-mixin/recording_rules.libsonnet  | 114 +++++
 cortex/alertmanager.libsonnet           |  32 ++
 cortex/common.libsonnet                 |  14 +
 cortex/config.libsonnet                 | 251 +++++++++
 cortex/consul.libsonnet                 |  59 +++
 cortex/cortex-manifests.jsonnet.example |  26 +
 cortex/cortex.libsonnet                 |  18 +
 cortex/distributor.libsonnet            |  52 ++
 cortex/etcd.libsonnet                   |   9 +
 cortex/images.libsonnet                 |  21 +
 cortex/ingester.libsonnet               |  69 +++
 cortex/jsonnetfile.json                 |  44 ++
 cortex/jsonnetfile.lock.json            |  48 ++
 cortex/memcached.libsonnet              |  63 +++
 cortex/postgresql.libsonnet             |  29 ++
 cortex/querier.libsonnet                |  52 ++
 cortex/query-frontend.libsonnet         |  60 +++
 cortex/ruler.libsonnet                  |  53 ++
 cortex/table-manager.libsonnet          |  53 ++
 cortex/test-exporter.libsonnet          |  40 ++
 29 files changed, 2392 insertions(+)
 create mode 100644 README.md
 create mode 100644 cortex-mixin/alerts.jsonnet
 create mode 100644 cortex-mixin/alerts.libsonnet
 create mode 100644 cortex-mixin/dashboards.jsonnet
 create mode 100644 cortex-mixin/dashboards.libsonnet
 create mode 100644 cortex-mixin/jsonnetfile.json
 create mode 100644 cortex-mixin/jsonnetfile.lock.json
 create mode 100644 cortex-mixin/mixin.libsonnet
 create mode 100644 cortex-mixin/recording_rules.jsonnet
 create mode 100644 cortex-mixin/recording_rules.libsonnet
 create mode 100644 cortex/alertmanager.libsonnet
 create mode 100644 cortex/common.libsonnet
 create mode 100644 cortex/config.libsonnet
 create mode 100644 cortex/consul.libsonnet
 create mode 100644 cortex/cortex-manifests.jsonnet.example
 create mode 100644 cortex/cortex.libsonnet
 create mode 100644 cortex/distributor.libsonnet
 create mode 100644 cortex/etcd.libsonnet
 create mode 100644 cortex/images.libsonnet
 create mode 100644 cortex/ingester.libsonnet
 create mode 100644 cortex/jsonnetfile.json
 create mode 100644 cortex/jsonnetfile.lock.json
 create mode 100644 cortex/memcached.libsonnet
 create mode 100644 cortex/postgresql.libsonnet
 create mode 100644 cortex/querier.libsonnet
 create mode 100644 cortex/query-frontend.libsonnet
 create mode 100644 cortex/ruler.libsonnet
 create mode 100644 cortex/table-manager.libsonnet
 create mode 100644 cortex/test-exporter.libsonnet

diff --git a/README.md b/README.md
new file mode 100644
index 00000000..388d5ff6
--- /dev/null
+++ b/README.md
@@ -0,0 +1,55 @@
+# Jsonnet for Cortex
+
+This repo has the jsonnet for deploying cortex and the related monitoring in Kubernetes.
+
+To generate the YAMLs for deploying Cortex:
+
+1. Make sure you have tanka and jb installed:
+
+```
+$ go get -u github.com/grafana/tanka/cmd/tk
+$ go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
+```
+
+2. Initialise the application and download the cortex jsonnet lib.
+
+```
+$ tk init
+```
+
+3. Install the cortex jsonnet.
+
+```
+$ jb install github.com/ksonnet/ksonnet-lib/ksonnet.beta.3
+$ cp vendor/ksonnet.beta.3/*.libsonnet lib
+$ jb install https://github.com/grafana/cortex-jsonnet/cortex
+```
+
+3. Use the example monitoring.jsonnet.example:
+
+```
+$ mv vendor/cortex/cortex-manifests.jsonnet.example environments/default/main.jsonnet
+```
+
+4. Check what is in the example:
+
+```
+$ cat environments/default/main.jsonnet
+....
+```
+
+5. Generate the YAML manifests:
+
+```
+$ tk show environments/default
+```
+
+To generate the dashboards and alerts for Cortex:
+
+```
+$ cd cortex-mixin
+$ jb install
+$ jsonnet -S alerts.jsonnet
+$ jsonnet -J vendor -S dashboards.jsonnet
+$ jsonnet -J vendor -S recording_rules.jsonnet
+```
diff --git a/cortex-mixin/alerts.jsonnet b/cortex-mixin/alerts.jsonnet
new file mode 100644
index 00000000..e54b1704
--- /dev/null
+++ b/cortex-mixin/alerts.jsonnet
@@ -0,0 +1 @@
+std.manifestYamlDoc((import 'mixin.libsonnet').prometheus_alerts)
diff --git a/cortex-mixin/alerts.libsonnet b/cortex-mixin/alerts.libsonnet
new file mode 100644
index 00000000..a5a547b3
--- /dev/null
+++ b/cortex-mixin/alerts.libsonnet
@@ -0,0 +1,520 @@
+// According to https://developers.soundcloud.com/blog/alerting-on-slos :
+local windows = [
+  { long_period: '1h', short_period: '5m', for_period: '2m', factor: 14.4, severity: 'critical' },
+  { long_period: '6h', short_period: '30m', for_period: '15m', factor: 6, severity: 'critical' },
+  { long_period: '1d', short_period: '2h', for_period: '1h', factor: 3, severity: 'warning' },
+  { long_period: '3d', short_period: '6h', for_period: '3h', factor: 1, severity: 'warning' },
+];
+
+{
+  _config+:: {
+    cortex_p99_latency_threshold_seconds: 2.5,
+  },
+
+  prometheus_alerts+:: {
+    groups+: [
+      {
+        name: 'cortex_alerts',
+        rules: [
+          {
+            alert: 'CortexIngesterUnhealthy',
+            'for': '15m',
+            expr: |||
+              min(cortex_ring_members{state="Unhealthy", job=~"[a-z]+/distributor"}) by (namespace, job) > 0
+            |||,
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: '{{ $labels.job }} reports more than one unhealthy ingester.',
+            },
+          },
+          {
+            alert: 'CortexFlushStuck',
+            expr: |||
+              (cortex_ingester_memory_chunks / cortex_ingester_memory_series) > 1.3
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: '{{ $labels.job }}/{{ $labels.instance }} is stuck flushing chunks.',
+            },
+          },
+          {
+            alert: 'CortexRequestErrors',
+            expr: |||
+              100 * sum(rate(cortex_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route)
+                /
+              sum(rate(cortex_request_duration_seconds_count[1m])) by (namespace, job, route)
+                > 1
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexRequestLatency',
+            expr: |||
+              cluster_namespace_job_route:cortex_request_duration_seconds:99quantile{route!~"metrics|/frontend.Frontend/Process"}
+                 >
+              %(cortex_p99_latency_threshold_seconds)s
+            ||| % $._config,
+            'for': '15m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+              |||,
+            },
+          },
+          {
+            // We're syncing every 10mins, and this means with a 5min rate, we will have a NaN when syncs fail
+            // and we will never trigger the alert.
+            // We also have a 3h grace-period for creation of tables which means the we can fail for 3h before it's an outage.
+            alert: 'CortexTableSyncFailure',
+            expr: |||
+              100 * rate(cortex_dynamo_sync_tables_seconds_count{status_code!~"2.."}[15m])
+                /
+              rate(cortex_dynamo_sync_tables_seconds_count[15m])
+                > 10
+            |||,
+            'for': '30m',
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% errors syncing tables.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexQueriesIncorrect',
+            expr: |||
+              100 * sum by (job, namespace) (rate(test_exporter_test_case_result_total{result="fail"}[5m]))
+                /
+              sum by (job, namespace) (rate(test_exporter_test_case_result_total[5m])) > 1
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} is reporting incorrect results for {{ printf "%.2f" $value }}% of queries.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexBadOverrides',
+            expr: |||
+              cortex_overrides_last_reload_successful{job!~".+/table-manager"} == 0
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} failed to reload overrides.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexQuerierCapacityFull',
+            expr: |||
+              prometheus_engine_queries_concurrent_max{job=~".+/querier"} - prometheus_engine_queries{job=~".+/querier"} == 0
+            |||,
+            'for': '5m',  // We don't want to block for longer.
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} is at capacity processing queries.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexFrontendQueriesStuck',
+            expr: |||
+              sum by (namespace) (cortex_query_frontend_queue_length{job=~".+/query-frontend"}) > 1
+            |||,
+            'for': '5m',  // We don't want to block for longer.
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} has {{ $value }} queued up queries.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexCacheRequestErrors',
+            expr: |||
+              100 * sum(rate(cortex_cache_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, method)
+                /
+              sum(rate(cortex_cache_request_duration_seconds_count[1m])) by (namespace, job, method)
+                > 1
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} cache {{ $labels.method }} is experiencing {{ printf "%.2f" $value }}% errors.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexIngesterRestarts',
+            expr: |||
+              rate(kube_pod_container_status_restarts_total{container="ingester"}[30m]) > 0
+            |||,
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: '{{ $labels.namespace }}/{{ $labels.pod }} is restarting',
+            },
+          },
+          {
+            alert: 'CortexTransferFailed',
+            expr: |||
+              max_over_time(cortex_shutdown_duration_seconds_count{op="transfer",status!="success"}[15m])
+            |||,
+            'for': '5m',
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: '{{ $labels.namespace }}/{{ $labels.pod }} transfer failed.',
+            },
+          },
+          {
+            alert: 'CortexOldChunkInMemory',
+            // We flush chunks after 6h and then keep them in memory for extra 15m. If chunks are older
+            // than 7h (= 25200 seconds), raise an alert. Ignore cortex_oldest_unflushed_chunk_timestamp_seconds
+            // that are zero (eg. distributors).
+            expr: |||
+              (time() - cortex_oldest_unflushed_chunk_timestamp_seconds > 25200) and cortex_oldest_unflushed_chunk_timestamp_seconds > 0
+            |||,
+            'for': '5m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.namespace }}/{{ $labels.pod }} has very old unflushed chunk in memory.
+              |||,
+            },
+          },
+        ],
+      },
+      {
+        name: 'cortex_slo_alerts',
+        rules: [
+          {
+            alert: 'CortexWriteErrorBudgetBurn',
+            expr: |||
+              (
+                (
+                100 * namespace_job:cortex_gateway_write_slo_errors_per_request:ratio_rate%(long_period)s
+                > 0.1 * %(factor)f
+                )
+              and
+                (
+                100 * namespace_job:cortex_gateway_write_slo_errors_per_request:ratio_rate%(short_period)s
+                > 0.1 * %(factor)f
+                )
+              )
+            ||| % window,
+            'for': window.for_period,
+            labels: {
+              severity: window.severity,
+              period: window.long_period,  // The annotation alone doesn't make this alert unique.
+            },
+            annotations: {
+              summary: 'Cortex burns its write error budget too fast.',
+              description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s write requests in the last %(long_period)s are failing or too slow to meet the SLO." % window,
+            },
+          }
+          for window in windows
+        ] + [
+          {
+            alert: 'CortexReadErrorBudgetBurn',
+            expr: |||
+              (
+                (
+                100 * namespace_job:cortex_gateway_read_slo_errors_per_request:ratio_rate%(long_period)s
+                > 0.5 * %(factor)f
+                )
+              and
+                (
+                100 * namespace_job:cortex_gateway_read_slo_errors_per_request:ratio_rate%(short_period)s
+                > 0.5 * %(factor)f
+                )
+              )
+            ||| % window,
+            'for': window.for_period,
+            labels: {
+              severity: window.severity,
+              period: window.long_period,  // The annotation alone doesn't make this alert unique.
+            },
+            annotations: {
+              summary: 'Cortex burns its read error budget too fast.',
+              description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s read requests in the last %(long_period)s are failing or too slow to meet the SLO." % window,
+            },
+          }
+          for window in windows
+        ] + [
+          {
+            alert: 'LegacyCortexWriteErrorBudgetBurn',
+            expr: |||
+              (
+                (
+                100 * namespace_job:cortex_gw_write_slo_errors_per_request:ratio_rate%(long_period)s
+                > 0.1 * %(factor)f
+                )
+              and
+                (
+                100 * namespace_job:cortex_gw_write_slo_errors_per_request:ratio_rate%(short_period)s
+                > 0.1 * %(factor)f
+                )
+              )
+            ||| % window,
+            'for': window.for_period,
+            labels: {
+              severity: window.severity,
+              period: window.long_period,  // The annotation alone doesn't make this alert unique.
+            },
+            annotations: {
+              summary: 'Cortex burns its write error budget too fast.',
+              description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s write requests in the last %(long_period)s are failing or too slow to meet the SLO." % window,
+            },
+          }
+          for window in windows
+        ] + [
+          {
+            alert: 'LegacyCortexReadErrorBudgetBurn',
+            expr: |||
+              (
+                (
+                100 * namespace_job:cortex_gw_read_slo_errors_per_request:ratio_rate%(long_period)s
+                > 0.5 * %(factor)f
+                )
+              and
+                (
+                100 * namespace_job:cortex_gw_read_slo_errors_per_request:ratio_rate%(short_period)s
+                > 0.5 * %(factor)f
+                )
+              )
+            ||| % window,
+            'for': window.for_period,
+            labels: {
+              severity: window.severity,
+              period: window.long_period,  // The annotation alone doesn't make this alert unique.
+            },
+            annotations: {
+              summary: 'Cortex burns its read error budget too fast.',
+              description: "{{ $value | printf `%%.2f` }}%% of {{ $labels.job }}'s read requests in the last %(long_period)s are failing or too slow to meet the SLO." % window,
+            },
+          }
+          for window in windows
+        ],
+      },
+      {
+        name: 'cortex_gw_alerts',
+        rules: [
+          {
+            alert: 'CortexGWRequestErrors',
+            expr: |||
+              100 * sum(rate(cortex_gw_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route)
+                /
+              sum(rate(cortex_gw_request_duration_seconds_count[1m])) by (namespace, job, route)
+                > 0.1
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexGWRequestLatency',
+            expr: |||
+              namespace_job_route:cortex_gw_request_duration_seconds:99quantile{route!="metrics"}
+                >
+              %(cortex_p99_latency_threshold_seconds)s
+            ||| % $._config,
+            'for': '15m',
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
+              |||,
+            },
+          },
+        ],
+      },
+
+      {
+        name: 'cortex-provisioning',
+        rules: [
+          {
+            alert: 'CortexProvisioningMemcachedTooSmall',
+            // 4 x in-memory series size = 24hrs of data.
+            expr: |||
+              (
+                4 *
+                sum by(cluster, namespace) (cortex_ingester_memory_series{job=~".+/ingester"} * cortex_ingester_chunk_size_bytes_sum{job=~".+/ingester"} / cortex_ingester_chunk_size_bytes_count{job=~".+/ingester"})
+                 / 1e9
+              )
+                >
+              (
+                sum by (cluster, namespace) (memcached_limit_bytes{job=~".+/memcached"}) / 1e9
+              )
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                Chunk memcached cluster for namespace {{ $labels.namespace }} are too small, should be at least {{ printf "%.2f" $value }}GB.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexProvisioningTooManyActiveSeries',
+            // 1 million active series per ingester max.
+            expr: |||
+              avg by (cluster, namespace) (cortex_ingester_memory_series{job=~".+/ingester"}) > 1.1e6
+                and
+              sum by (cluster, namespace) (rate(cortex_ingester_received_chunks{job=~".+/ingester"}[1h])) == 0
+            |||,
+            'for': '1h',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                Too many active series for ingesters in namespace {{ $labels.namespace }}, add more ingesters.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexProvisioningTooManyWrites',
+            // 80k writes / s per ingester max.
+            expr: |||
+              avg by (cluster,namespace) (rate(cortex_ingester_ingested_samples_total[1m])) > 80e3
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                Too much write QPS for ingesters in namespace {{ $labels.namespace }}, add more ingesters.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexProvisioningTooMuchMemory',
+            expr: |||
+              avg by (cluster, namespace) (container_memory_working_set_bytes{container_name="ingester"} / container_spec_memory_limit_bytes{container_name="ingester"}) > 0.7
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: |||
+                Too much memory being used by ingesters in namespace {{ $labels.namespace }}, add more ingesters.
+              |||,
+            },
+          },
+        ],
+      },
+      {
+        name: 'memcached',
+        rules: [
+          {
+            alert: 'MemcachedDown',
+            expr: |||
+              memcached_up == 0
+            |||,
+            'for': '15m',
+            labels: {
+              severity: 'critical',
+            },
+            annotations: {
+              message: |||
+                Memcached Instance {{ $labels.instance }} is down for more than 15mins.
+              |||,
+            },
+          },
+        ],
+      },
+      {
+        name: 'ruler_alerts',
+        rules: [
+          {
+            alert: 'CortexRulerFailedEvaluations',
+            expr: |||
+              sum(rate(cortex_prometheus_rule_evaluation_failures_total[1m])) by (namespace, job)
+                /
+              sum(rate(cortex_prometheus_rule_evaluation_total[1m])) by (namespace, job)
+                > 0.01
+            |||,
+            'for': '5m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% errors.
+              |||,
+            },
+          },
+          {
+            alert: 'CortexRulerMissedEvaluations',
+            expr: |||
+              sum(rate(cortex_prometheus_rule_group_missed_iterations_total[1m])) by (namespace, job)
+                /
+              sum(rate(cortex_prometheus_rule_group_iterations_total[1m])) by (namespace, job)
+                > 0.01
+            |||,
+            'for': '5m',
+            labels: {
+              severity: 'warning',
+            },
+            annotations: {
+              message: |||
+                {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% missed iterations.
+              |||,
+            },
+          },
+        ],
+      },
+    ],
+  },
+}
diff --git a/cortex-mixin/dashboards.jsonnet b/cortex-mixin/dashboards.jsonnet
new file mode 100644
index 00000000..fb102817
--- /dev/null
+++ b/cortex-mixin/dashboards.jsonnet
@@ -0,0 +1,6 @@
+local dashboards = (import 'mixin.libsonnet').dashboards;
+
+{
+  [name]: dashboards[name]
+  for name in std.objectFields(dashboards)
+}
diff --git a/cortex-mixin/dashboards.libsonnet b/cortex-mixin/dashboards.libsonnet
new file mode 100644
index 00000000..44003655
--- /dev/null
+++ b/cortex-mixin/dashboards.libsonnet
@@ -0,0 +1,649 @@
+local utils = (import 'mixin-utils/utils.libsonnet');
+
+local g = (import 'grafana-builder/grafana.libsonnet') + {
+  qpsPanel(selector)::
+    super.qpsPanel(selector) + {
+      targets: [
+        target {
+          interval: '1m',
+        }
+        for target in super.targets
+      ],
+    },
+
+  latencyPanel(metricName, selector, multiplier='1e3')::
+    super.latencyPanel(metricName, selector, multiplier) + {
+      targets: [
+        target {
+          interval: '1m',
+        }
+        for target in super.targets
+      ],
+    },
+};
+
+{
+  _config+:: {
+    storage_backend: error 'must specify storage backend (cassandra, gcp)',
+    gcs_enabled: false,
+  },
+
+  dashboards+: {
+    'cortex-writes.json':
+      if $._config.gcs_enabled then
+        $.cortex_writes_dashboard.addRow(
+          g.row('GCS')
+          .addPanel(
+            g.panel('QPS') +
+            g.qpsPanel('cortex_gcs_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="POST"}')
+          )
+          .addPanel(
+            g.panel('Latency') +
+            utils.latencyRecordingRulePanel('cortex_gcs_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'POST')])
+          )
+        )
+      else $.cortex_writes_dashboard,
+
+    'cortex-reads.json':
+      if $._config.gcs_enabled then
+        $.cortex_reads_dashboard.addRow(
+          g.row('GCS')
+          .addPanel(
+            g.panel('QPS') +
+            g.qpsPanel('cortex_gcs_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="GET"}')
+          )
+          .addPanel(
+            g.panel('Latency') +
+            utils.latencyRecordingRulePanel('cortex_gcs_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'GET')])
+          )
+        )
+      else $.cortex_reads_dashboard,
+
+    'cortex-chunks.json':
+      g.dashboard('Cortex / Chunks')
+      .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster')
+      .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace')
+      .addRow(
+        g.row('Active Series / Chunks')
+        .addPanel(
+          g.panel('Series') +
+          g.queryPanel('sum(cortex_ingester_memory_series{cluster=~"$cluster", job=~"($namespace)/ingester"})', 'series'),
+        )
+        .addPanel(
+          g.panel('Chunks per series') +
+          g.queryPanel('sum(cortex_ingester_memory_chunks{cluster=~"$cluster", job=~"($namespace)/ingester"}) / sum(cortex_ingester_memory_series{cluster=~"$cluster", job=~"($namespace)/ingester"})', 'chunks'),
+        )
+      )
+      .addRow(
+        g.row('Flush Stats')
+        .addPanel(
+          g.panel('Utilization') +
+          g.latencyPanel('cortex_ingester_chunk_utilization', '{cluster=~"$cluster", job=~"($namespace)/ingester"}', multiplier='1') +
+          { yaxes: g.yaxes('percentunit') },
+        )
+        .addPanel(
+          g.panel('Age') +
+          g.latencyPanel('cortex_ingester_chunk_age_seconds', '{cluster=~"$cluster", job=~"($namespace)/ingester"}'),
+        ),
+      )
+      .addRow(
+        g.row('Flush Stats')
+        .addPanel(
+          g.panel('Size') +
+          g.latencyPanel('cortex_ingester_chunk_length', '{cluster=~"$cluster", job=~"($namespace)/ingester"}', multiplier='1') +
+          { yaxes: g.yaxes('short') },
+        )
+        .addPanel(
+          g.panel('Entries') +
+          g.queryPanel('sum(rate(cortex_chunk_store_index_entries_per_chunk_sum{cluster=~"$cluster", job=~"($namespace)/ingester"}[5m])) / sum(rate(cortex_chunk_store_index_entries_per_chunk_count{cluster=~"$cluster", job=~"($namespace)/ingester"}[5m]))', 'entries'),
+        ),
+      )
+      .addRow(
+        g.row('Flush Stats')
+        .addPanel(
+          g.panel('Queue Length') +
+          g.queryPanel('cortex_ingester_flush_queue_length{cluster=~"$cluster", job=~"($namespace)/ingester"}', '{{instance}}'),
+        )
+        .addPanel(
+          g.panel('Flush Rate') +
+          g.qpsPanel('cortex_ingester_chunk_age_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester"}'),
+        ),
+      ),
+
+    'cortex-queries.json':
+      g.dashboard('Cortex / Queries')
+      .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster')
+      .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace')
+      .addRow(
+        g.row('Query Frontend')
+        .addPanel(
+          g.panel('Queue Duration') +
+          g.latencyPanel('cortex_query_frontend_queue_duration_seconds', '{cluster=~"$cluster", job=~"($namespace)/query-frontend"}'),
+        )
+        .addPanel(
+          g.panel('Retries') +
+          g.latencyPanel('cortex_query_frontend_retries', '{cluster=~"$cluster", job=~"($namespace)/query-frontend"}', multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+        .addPanel(
+          g.panel('Queue Length') +
+          g.queryPanel('cortex_query_frontend_queue_length{cluster=~"$cluster", job=~"($namespace)/query-frontend"}', '{{cluster}} / {{namespace}} / {{instance}}'),
+        )
+      )
+      .addRow(
+        g.row('Query Frontend - Results Cache')
+        .addPanel(
+          g.panel('Cache Hit %') +
+          g.queryPanel('sum(rate(cortex_cache_hits{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m])) / sum(rate(cortex_cache_fetched_keys{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m]))', 'Hit Rate') +
+          { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
+        )
+        .addPanel(
+          g.panel('Cache misses') +
+          g.queryPanel('sum(rate(cortex_cache_fetched_keys{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m])) - sum(rate(cortex_cache_hits{cluster=~"$cluster",job=~"($namespace)/query-frontend"}[1m]))', 'Miss Rate'),
+        )
+      )
+      .addRow(
+        g.row('Querier')
+        .addPanel(
+          g.panel('Stages') +
+          g.queryPanel('max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster",job=~"($namespace)/querier"}) * 1e3', '{{slice}}') +
+          { yaxes: g.yaxes('ms') } +
+          g.stack,
+        )
+        .addPanel(
+          g.panel('Chunk cache misses') +
+          g.queryPanel('sum(rate(cortex_cache_fetched_keys{cluster=~"$cluster",job=~"($namespace)/querier",name="chunksmemcache"}[1m])) - sum(rate(cortex_cache_hits{cluster=~"$cluster",job=~"($namespace)/querier",name="chunksmemcache"}[1m]))', 'Hit rate'),
+        )
+        .addPanel(
+          g.panel('Chunk cache corruptions') +
+          g.queryPanel('sum(rate(cortex_cache_corrupt_chunks_total{cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))', 'Corrupt chunks'),
+        )
+      )
+      .addRow(
+        g.row('Querier - Index Cache')
+        .addPanel(
+          g.panel('Total entries') +
+          g.queryPanel('sum(querier_cache_added_new_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}) - sum(querier_cache_evicted_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"})', 'Entries'),
+        )
+        .addPanel(
+          g.panel('Cache Hit %') +
+          g.queryPanel('(sum(rate(querier_cache_gets_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m])) - sum(rate(querier_cache_misses_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))) / sum(rate(querier_cache_gets_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))', 'hit rate')
+          { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
+        )
+        .addPanel(
+          g.panel('Churn Rate') +
+          g.queryPanel('sum(rate(querier_cache_evicted_total{cache="store.index-cache-read.fifocache", cluster=~"$cluster",job=~"($namespace)/querier"}[1m]))', 'churn rate'),
+        )
+      )
+      .addRow(
+        g.row('Ingester')
+        .addPanel(
+          g.panel('Series per Query') +
+          utils.latencyRecordingRulePanel('cortex_ingester_queried_series', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')], multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+        .addPanel(
+          g.panel('Chunks per Query') +
+          utils.latencyRecordingRulePanel('cortex_ingester_queried_chunks', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')], multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+        .addPanel(
+          g.panel('Samples per Query') +
+          utils.latencyRecordingRulePanel('cortex_ingester_queried_samples', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')], multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+      )
+      .addRow(
+        g.row('Chunk Store')
+        .addPanel(
+          g.panel('Index Lookups per Query') +
+          utils.latencyRecordingRulePanel('cortex_chunk_store_index_lookups_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+        .addPanel(
+          g.panel('Series (pre-intersection) per Query') +
+          utils.latencyRecordingRulePanel('cortex_chunk_store_series_pre_intersection_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+        .addPanel(
+          g.panel('Series (post-intersection) per Query') +
+          utils.latencyRecordingRulePanel('cortex_chunk_store_series_post_intersection_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+        .addPanel(
+          g.panel('Chunks per Query') +
+          utils.latencyRecordingRulePanel('cortex_chunk_store_chunks_per_query', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')], multiplier=1) +
+          { yaxes: g.yaxes('short') },
+        )
+      ),
+
+    'frontend.json':
+      g.dashboard('Frontend')
+      .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster')
+      .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace')
+      .addRow(
+        g.row('Cortex Reqs (cortex_gw)')
+        .addPanel(
+          g.panel('QPS') +
+          g.qpsPanel('cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw"}')
+        )
+        .addPanel(
+          g.panel('Latency') +
+          utils.latencyRecordingRulePanel('cortex_gw_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw')])
+        )
+      ),
+
+    'ruler.json':
+      g.dashboard('Cortex / Ruler')
+      .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster')
+      .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace')
+      .addRow(
+        g.row('Rule Evaluations')
+        .addPanel(
+          g.panel('EPS') +
+          g.queryPanel('sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))', 'rules processed'),
+        )
+        .addPanel(
+          g.panel('Latency') +
+          g.queryPanel(
+            |||
+              sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))
+                /
+              sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))
+            |||, 'average'
+          ),
+        )
+      )
+      .addRow(
+        g.row('Group Evaluations')
+        .addPanel(
+          g.panel('Missed Iterations') +
+          g.queryPanel('sum(rate(prometheus_rule_group_iterations_missed_total{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))', 'iterations missed'),
+        )
+        .addPanel(
+          g.panel('Latency') +
+          g.queryPanel(
+            |||
+              sum (rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))
+                /
+              sum (rate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ruler"}[$__interval]))
+            |||, 'average'
+          ),
+        )
+      ),
+
+    'cortex-scaling.json':
+      g.dashboard('Cortex / Scaling')
+      .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster')
+      .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace')
+      .addRow(
+        g.row('Workload-based scaling')
+        .addPanel(
+          g.panel('Workload-based scaling') + { sort: { col: 1, desc: false } } +
+          g.tablePanel([
+            |||
+              sum by (cluster, namespace, deployment) (
+                kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace", deployment=~"ingester|memcached"}
+                or
+                label_replace(
+                  kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace", deployment=~"ingester|memcached"},
+                  "deployment", "$1", "statefulset", "(.*)"
+                )
+              )
+            |||,
+            |||
+              quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(cortex_distributor_received_samples_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "ingester", "cluster", ".*"))[1h:])
+                * 3 / 80e3
+            |||,
+            |||
+              label_replace(
+                sum by(cluster, namespace) (
+                  cortex_ingester_memory_series{cluster=~"$cluster", namespace=~"$namespace"}
+                ) / 1e+6,
+                "deployment", "ingester", "cluster", ".*"
+              )
+                or
+              label_replace(
+                sum by (cluster, namespace) (
+                  4 * cortex_ingester_memory_series{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"}
+                    *
+                  cortex_ingester_chunk_size_bytes_sum{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"}
+                    /
+                  cortex_ingester_chunk_size_bytes_count{cluster=~"$cluster", namespace=~"$namespace", job=~".+/ingester"}
+                )
+                  /
+                avg by (cluster, namespace) (memcached_limit_bytes{cluster=~"$cluster", namespace=~"$namespace", job=~".+/memcached"}),
+                "deployment", "memcached", "namespace", ".*"
+              )
+            |||,
+          ], {
+            cluster: { alias: 'Cluster' },
+            namespace: { alias: 'Namespace' },
+            deployment: { alias: 'Deployment' },
+            'Value #A': { alias: 'Current Replicas', decimals: 0 },
+            'Value #B': { alias: 'Required Replicas, by ingestion rate', decimals: 0 },
+            'Value #C': { alias: 'Required Replicas, by active series', decimals: 0 },
+          })
+        )
+      )
+      .addRow(
+        (g.row('Resource-based scaling') + { height: '500px' })
+        .addPanel(
+          g.panel('Resource-based scaling') + { sort: { col: 1, desc: false } } +
+          g.tablePanel([
+            |||
+              sum by (cluster, namespace, deployment) (
+                kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"}
+                or
+                label_replace(
+                  kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"},
+                  "deployment", "$1", "statefulset", "(.*)"
+                )
+              )
+            |||,
+            |||
+              sum by (cluster, namespace, deployment) (
+                kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"}
+                or
+                label_replace(
+                  kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"},
+                  "deployment", "$1", "statefulset", "(.*)"
+                )
+              )
+                *
+              quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(rate(container_cpu_usage_seconds_total{cluster=~"$cluster", namespace=~"$namespace"}[1m]), "deployment", "$1", "pod_name", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:])
+                /
+              sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_cpu_cores{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))
+            |||,
+            |||
+              sum by (cluster, namespace, deployment) (
+                kube_deployment_spec_replicas{cluster=~"$cluster", namespace=~"$namespace"}
+                or
+                label_replace(
+                  kube_statefulset_replicas{cluster=~"$cluster", namespace=~"$namespace"},
+                  "deployment", "$1", "statefulset", "(.*)"
+                )
+              )
+                *
+              quantile_over_time(0.99, sum by (cluster, namespace, deployment) (label_replace(container_memory_usage_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod_name", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))[24h:1m])
+                /
+              sum by (cluster, namespace, deployment) (label_replace(kube_pod_container_resource_requests_memory_bytes{cluster=~"$cluster", namespace=~"$namespace"}, "deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"))
+            |||,
+          ], {
+            cluster: { alias: 'Cluster' },
+            namespace: { alias: 'Namespace' },
+            deployment: { alias: 'Deployment' },
+            'Value #A': { alias: 'Current Replicas', decimals: 0 },
+            'Value #B': { alias: 'Required Replicas, by CPU usage', decimals: 0 },
+            'Value #C': { alias: 'Required Replicas, by Memory usage', decimals: 0 },
+          })
+        )
+      ),
+  },
+
+  cortex_writes_dashboard::
+    g.dashboard('Cortex / Writes')
+    .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster')
+    .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace')
+    .addRow(
+      (g.row('Headlines') +
+       {
+         height: '100px',
+         showTitle: false,
+       })
+      .addPanel(
+        g.panel('Samples / s') +
+        g.statPanel('sum(cluster_namespace:cortex_distributor_received_samples:rate5m{cluster=~"$cluster", namespace=~"$namespace"})', format='reqps')
+      )
+      .addPanel(
+        g.panel('Active Series') +
+        g.statPanel(|||
+          sum(cortex_ingester_memory_series{cluster=~"$cluster", job=~"($namespace)/ingester"}
+          / on(namespace) group_left
+          max by (namespace) (cortex_distributor_replication_factor{cluster=~"$cluster", job=~"($namespace)/distributor"}))
+        |||, format='short')
+      )
+      .addPanel(
+        g.panel('QPS') +
+        g.statPanel('sum(rate(cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="cortex-write"}[5m]))', format='reqps')
+      )
+    )
+    .addRow(
+      g.row('Legacy Gateway')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="cortex-write"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_gw_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.eq('route', 'cortex-write')])
+      )
+    )
+    .addRow(
+      g.row('Gateway')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="api_prom_push"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.eq('route', 'api_prom_push')])
+      )
+    )
+    .addRow(
+      g.row('Distributor')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/distributor"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/distributor')])
+      )
+    )
+    .addRow(
+      g.row('Etcd (HA Dedupe)')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_kv_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/distributor"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_kv_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/distributor')])
+      )
+    )
+    .addRow(
+      g.row('Ingester')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester",route="/cortex.Ingester/Push"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('route', '/cortex.Ingester/Push')])
+      )
+    )
+    .addRow(
+      g.row('Consul (Ring)')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_kv_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_kv_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester')])
+      )
+    )
+    .addRow(
+      g.row('Memcached')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_memcache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester",method="Memcache.Put"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_memcache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('method', 'Memcache.Put')])
+      )
+    )
+    .addRow({
+      cassandra:
+        g.row('Cassandra')
+        .addPanel(
+          g.panel('QPS') +
+          g.qpsPanel('cortex_cassandra_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="INSERT"}')
+        )
+        .addPanel(
+          g.panel('Latency') +
+          utils.latencyRecordingRulePanel('cortex_cassandra_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('operation', 'INSERT')])
+        ),
+
+      gcp:
+        g.row('BigTable')
+        .addPanel(
+          g.panel('QPS') +
+          g.qpsPanel('cortex_bigtable_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="/google.bigtable.v2.Bigtable/MutateRows"}')
+        )
+        .addPanel(
+          g.panel('Latency') +
+          utils.latencyRecordingRulePanel('cortex_bigtable_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')])
+        ),
+
+      dynamodb:
+        g.row('DynamoDB')
+        .addPanel(
+          g.panel('QPS') +
+          g.qpsPanel('cortex_dynamo_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester", operation="DynamoDB.BatchWriteItem"}')
+        )
+        .addPanel(
+          g.panel('Latency') +
+          utils.latencyRecordingRulePanel('cortex_dynamo_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.eq('operation', 'DynamoDB.BatchWriteItem')])
+        ),
+    }[$._config.storage_backend]),
+
+  cortex_reads_dashboard::
+    g.dashboard('Cortex / Reads')
+    .addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*cortex.*"}', 'cluster')
+    .addMultiTemplate('namespace', 'kube_pod_container_info{image=~".*cortex.*"}', 'namespace')
+    .addRow(
+      g.row('Legacy Gateway')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_gw_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route="cortex-read"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_gw_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.eq('route', 'cortex-read')])
+      )
+    )
+    .addRow(
+      g.row('Gateway')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/cortex-gw", route=~"(api_prom_api_v1_query_range|api_prom_api_v1_query|api_prom_api_v1_label_name_values|api_prom_api_v1_series|api_prom_api_v1_labels)"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/cortex-gw'), utils.selector.re('route', '(api_prom_api_v1_query_range|api_prom_api_v1_query|api_prom_api_v1_label_name_values|api_prom_api_v1_series|api_prom_api_v1_labels)')])
+      )
+    )
+    .addRow(
+      g.row('Query Frontend')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/query-frontend"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/query-frontend'), utils.selector.neq('route', '/frontend.Frontend/Process')])
+      )
+    )
+    .addRow(
+      g.row('Cache - Query Results')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_cache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/query-frontend"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_cache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/query-frontend')])
+      )
+    )
+    .addRow(
+      g.row('Querier')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier')])
+      )
+    )
+    .addRow(
+      g.row('Ingester')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/ingester",route!~"/cortex.Ingester/Push|metrics|ready|traces"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/ingester'), utils.selector.nre('route', '/cortex.Ingester/Push|metrics|ready')])
+      )
+    )
+    .addRow(
+      g.row('Memcached - Index')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_cache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier",method="store.index-cache-read.memcache.fetch"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_cache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('method', 'store.index-cache-read.memcache.fetch')])
+      )
+    )
+    .addRow(
+      g.row('Memcached - Chunks')
+      .addPanel(
+        g.panel('QPS') +
+        g.qpsPanel('cortex_cache_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier",method="chunksmemcache.fetch"}')
+      )
+      .addPanel(
+        g.panel('Latency') +
+        utils.latencyRecordingRulePanel('cortex_cache_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('method', 'chunksmemcache.fetch')])
+      )
+    )
+    .addRow({
+      cassandra:
+        g.row('Cassandra')
+        .addPanel(
+          g.panel('QPS') +
+          g.qpsPanel('cortex_cassandra_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="SELECT"}')
+        )
+        .addPanel(
+          g.panel('Latency') +
+          utils.latencyRecordingRulePanel('cortex_cassandra_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'SELECT')])
+        ),
+
+      gcp:
+        g.row('BigTable')
+        .addPanel(
+          g.panel('QPS') +
+          g.qpsPanel('cortex_bigtable_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="/google.bigtable.v2.Bigtable/ReadRows"}')
+        )
+        .addPanel(
+          g.panel('Latency') +
+          utils.latencyRecordingRulePanel('cortex_bigtable_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')])
+        ),
+
+      dynamodb:
+        g.row('DynamoDB')
+        .addPanel(
+          g.panel('QPS') +
+          g.qpsPanel('cortex_dynamo_request_duration_seconds_count{cluster=~"$cluster", job=~"($namespace)/querier", operation="DynamoDB.QueryPages"}')
+        )
+        .addPanel(
+          g.panel('Latency') +
+          utils.latencyRecordingRulePanel('cortex_dynamo_request_duration_seconds', [utils.selector.re('cluster', '$cluster'), utils.selector.re('job', '($namespace)/querier'), utils.selector.eq('operation', 'DynamoDB.QueryPages')])
+        ),
+    }[$._config.storage_backend]),
+}
diff --git a/cortex-mixin/jsonnetfile.json b/cortex-mixin/jsonnetfile.json
new file mode 100644
index 00000000..87e724d5
--- /dev/null
+++ b/cortex-mixin/jsonnetfile.json
@@ -0,0 +1,24 @@
+{
+  "dependencies": [
+    {
+      "name": "grafana-builder",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "grafana-builder"
+        }
+      },
+      "version": "master"
+    },
+    {
+      "name": "mixin-utils",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "mixin-utils"
+        }
+      },
+      "version": "master"
+    }
+  ]
+}
diff --git a/cortex-mixin/jsonnetfile.lock.json b/cortex-mixin/jsonnetfile.lock.json
new file mode 100644
index 00000000..fe50a404
--- /dev/null
+++ b/cortex-mixin/jsonnetfile.lock.json
@@ -0,0 +1,26 @@
+{
+  "dependencies": [
+    {
+      "name": "grafana-builder",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "grafana-builder"
+        }
+      },
+      "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2",
+      "sum": "ELsYwK+kGdzX1mee2Yy+/b2mdO4Y503BOCDkFzwmGbE="
+    },
+    {
+      "name": "mixin-utils",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "mixin-utils"
+        }
+      },
+      "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2",
+      "sum": "J1iExBloZLjVEvdzHVjvP9AVTqDOJSfFOtBoeQ7EhKk="
+    }
+  ]
+}
diff --git a/cortex-mixin/mixin.libsonnet b/cortex-mixin/mixin.libsonnet
new file mode 100644
index 00000000..b2b2f10d
--- /dev/null
+++ b/cortex-mixin/mixin.libsonnet
@@ -0,0 +1,3 @@
+(import 'dashboards.libsonnet') +
+(import 'alerts.libsonnet') +
+(import 'recording_rules.libsonnet')
diff --git a/cortex-mixin/recording_rules.jsonnet b/cortex-mixin/recording_rules.jsonnet
new file mode 100644
index 00000000..4cda6c6f
--- /dev/null
+++ b/cortex-mixin/recording_rules.jsonnet
@@ -0,0 +1 @@
+std.manifestYamlDoc((import 'mixin.libsonnet').prometheus_rules)
diff --git a/cortex-mixin/recording_rules.libsonnet b/cortex-mixin/recording_rules.libsonnet
new file mode 100644
index 00000000..7291ac5c
--- /dev/null
+++ b/cortex-mixin/recording_rules.libsonnet
@@ -0,0 +1,114 @@
+local utils = import 'mixin-utils/utils.libsonnet';
+local windows = [
+  { period: '5m' },
+  { period: '30m' },
+  { period: '1h' },
+  { period: '2h' },
+  { period: '6h' },
+  { period: '1d' },
+  { period: '3d' },
+];
+
+{
+  prometheus_rules+:: {
+    groups+: [{
+      name: 'cortex_rules',
+      rules:
+        utils.histogramRules('cortex_request_duration_seconds', ['cluster', 'job']) +
+        utils.histogramRules('cortex_request_duration_seconds', ['cluster', 'job', 'route']) +
+        utils.histogramRules('cortex_request_duration_seconds', ['cluster', 'namespace', 'job', 'route']) +
+        utils.histogramRules('cortex_memcache_request_duration_seconds', ['cluster', 'job', 'method']) +
+        utils.histogramRules('cortex_cache_request_duration_seconds', ['cluster', 'job']) +
+        utils.histogramRules('cortex_cache_request_duration_seconds', ['cluster', 'job', 'method']) +
+        utils.histogramRules('cortex_bigtable_request_duration_seconds', ['cluster', 'job', 'operation']) +
+        utils.histogramRules('cortex_cassandra_request_duration_seconds', ['cluster', 'job', 'operation']) +
+        utils.histogramRules('cortex_dynamo_request_duration_seconds', ['cluster', 'job', 'operation']) +
+        utils.histogramRules('cortex_query_frontend_retries', ['cluster', 'job']) +
+        utils.histogramRules('cortex_query_frontend_queue_duration_seconds', ['cluster', 'job']) +
+        utils.histogramRules('cortex_ingester_queried_series', ['cluster', 'job']) +
+        utils.histogramRules('cortex_ingester_queried_chunks', ['cluster', 'job']) +
+        utils.histogramRules('cortex_ingester_queried_samples', ['cluster', 'job']) +
+        utils.histogramRules('cortex_chunk_store_index_lookups_per_query', ['cluster', 'job']) +
+        utils.histogramRules('cortex_chunk_store_series_pre_intersection_per_query', ['cluster', 'job']) +
+        utils.histogramRules('cortex_chunk_store_series_post_intersection_per_query', ['cluster', 'job']) +
+        utils.histogramRules('cortex_chunk_store_chunks_per_query', ['cluster', 'job']) +
+        utils.histogramRules('cortex_database_request_duration_seconds', ['cluster', 'job', 'method']) +
+        utils.histogramRules('cortex_gcs_request_duration_seconds', ['cluster', 'job', 'operation']) +
+        utils.histogramRules('cortex_kv_request_duration_seconds', ['cluster', 'job']),
+    }, {
+      name: 'frontend_rules',
+      rules:
+        utils.histogramRules('tsdb_gw_request_duration_seconds', ['cluster', 'job']) +
+        utils.histogramRules('tsdb_gw_request_duration_seconds', ['cluster', 'job', 'route']) +
+        utils.histogramRules('tsdb_gw_request_duration_seconds', ['cluster', 'namespace', 'job', 'route']) +
+        utils.histogramRules('cortex_gw_request_duration_seconds', ['cluster', 'job']) +
+        utils.histogramRules('cortex_gw_request_duration_seconds', ['cluster', 'job', 'route']) +
+        utils.histogramRules('cortex_gw_request_duration_seconds', ['cluster', 'namespace', 'job', 'route']),
+    }, {
+      name: 'cortex_slo_rules',
+      rules: [
+        {
+          record: 'namespace_job:cortex_gateway_write_slo_errors_per_request:ratio_rate%(period)s' % window,
+          expr: |||
+            1 -
+            (
+              sum by (namespace, job) (rate(cortex_request_duration_seconds_bucket{status_code!~"5..", le="1", route="api_prom_push", job=~".*/cortex-gw"}[%(period)s]))
+            /
+              sum by (namespace, job) (rate(cortex_request_duration_seconds_count{route="api_prom_push", job=~".*/cortex-gw"}[%(period)s]))
+            )
+          ||| % window,
+        }
+        for window in windows
+      ] + [
+        {
+          record: 'namespace_job:cortex_gateway_read_slo_errors_per_request:ratio_rate%(period)s' % window,
+          expr: |||
+            1 -
+            (
+              sum by (namespace, job) (rate(cortex_request_duration_seconds_bucket{status_code!~"5..",le="2.5",route=~"api_prom_api_v1_query.*", job=~".*/cortex-gw"}[%(period)s]))
+            /
+              sum by (namespace, job) (rate(cortex_request_duration_seconds_count{route=~"api_prom_api_v1_query.*", job=~".*/cortex-gw"}[%(period)s]))
+            )
+          ||| % window,
+        }
+        for window in windows
+      ] + [
+        {
+          record: 'namespace_job:cortex_gw_write_slo_errors_per_request:ratio_rate%(period)s' % window,
+          expr: |||
+            1 -
+            (
+              sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_bucket{status_code!~"error|5..",le="1",route="cortex-write"}[%(period)s]))
+            /
+              sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_count{route="cortex-write"}[%(period)s]))
+            )
+          ||| % window,
+        }
+        for window in windows
+      ] + [
+        {
+          record: 'namespace_job:cortex_gw_read_slo_errors_per_request:ratio_rate%(period)s' % window,
+          expr: |||
+            1 -
+            (
+              sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_bucket{status_code!~"error|5..",le="2.5",route="cortex-read"}[%(period)s]))
+            /
+              sum by (namespace, job) (rate(cortex_gw_request_duration_seconds_count{route="cortex-read"}[%(period)s]))
+            )
+          ||| % window,
+        }
+        for window in windows
+      ],
+    }, {
+      name: 'cortex_received_samples',
+      rules: [
+        {
+          record: 'cluster_namespace:cortex_distributor_received_samples:rate5m',
+          expr: |||
+            sum by (cluster, namespace) (rate(cortex_distributor_received_samples_total{job=~".*/distributor"}[5m]))
+          |||,
+        },
+      ],
+    }],
+  },
+}
diff --git a/cortex/alertmanager.libsonnet b/cortex/alertmanager.libsonnet
new file mode 100644
index 00000000..168f6be2
--- /dev/null
+++ b/cortex/alertmanager.libsonnet
@@ -0,0 +1,32 @@
+{
+  local container = $.core.v1.container,
+
+  alertmanager_args::
+    {
+      target: 'alertmanager',
+      'log.level': 'debug',
+
+      'alertmanager.storage.type': 'gcs',
+      'alertmanager.gcs.bucketname': '%(cluster)s-cortex-configdb-%(namespace)s' % $._config,
+      'alertmanager.web.external-url': 'http://alertmanager.%s.svc.cluster.local/alertmanager' % $._config.namespace,
+    },
+
+  alertmanager_container::
+    container.new('alertmanager', $._images.alertmanager) +
+    container.withPorts($.util.defaultPorts) +
+    container.withArgsMixin($.util.mapToFlags($.alertmanager_args)) +
+    $.util.resourcesRequests('100m', '1Gi') +
+    $.jaeger_mixin,
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  alertmanager_deployment:
+    deployment.new('alertmanager', 1, [$.alertmanager_container]) +
+    deployment.mixin.spec.template.spec.withRestartPolicy('Always') +
+    $.util.antiAffinity,
+
+  local service = $.core.v1.service,
+
+  alertmanager_server:
+    $.util.serviceFor($.alertmanager_deployment),
+}
diff --git a/cortex/common.libsonnet b/cortex/common.libsonnet
new file mode 100644
index 00000000..62a5a338
--- /dev/null
+++ b/cortex/common.libsonnet
@@ -0,0 +1,14 @@
+{
+  namespace:
+    $.core.v1.namespace.new($._config.namespace),
+
+  util+:: {
+    local containerPort = $.core.v1.containerPort,
+
+    defaultPorts::
+      [
+        containerPort.newNamed('http-metrics', 80),
+        containerPort.newNamed('grpc', 9095),
+      ],
+  },
+}
diff --git a/cortex/config.libsonnet b/cortex/config.libsonnet
new file mode 100644
index 00000000..17350551
--- /dev/null
+++ b/cortex/config.libsonnet
@@ -0,0 +1,251 @@
+{
+  _config+: {
+    namespace: error 'must define namespace',
+    cluster: error 'must define cluster',
+    replication_factor: 3,
+
+    storage_backend: error 'must specify storage backend (cassandra, gcp)',
+    table_prefix: $._config.namespace,
+    cassandra_addresses: error 'must specify cassandra addresses',
+    bigtable_instance: error 'must specify bigtable instance',
+    bigtable_project: error 'must specify bigtable project',
+    aws_region: error 'must specify AWS region',
+    s3_bucket_name: error 'must specify S3 bucket name',
+
+    // schema is used to generate the storage schema yaml file used by
+    // the Cortex chunks storage:
+    // - More information: https://github.com/cortexproject/cortex/pull/1072
+    // - TSDB integration doesn't rely on the Cortex chunks store, so doesn't
+    //   support the schema config.
+    schema: if $._config.storage_engine != 'tsdb' then
+      error 'must specify a schema config'
+    else
+      [],
+
+    max_series_per_user: 250000,
+    max_series_per_metric: 10000,
+    max_chunk_idle: '15m',
+
+    test_exporter_enabled: false,
+    test_exporter_start_time: error 'must specify test exporter start time',
+    test_exporter_user_id: error 'must specify test exporter used id',
+
+    querierConcurrency: 8,
+    querier_ingester_streaming_enabled: $._config.storage_engine != 'tsdb',
+
+    jaeger_agent_host: null,
+
+    // Use the Cortex chunks storage engine by default, while giving the ability
+    // to switch to tsdb storage.
+    storage_engine: 'chunks',
+    storage_tsdb_bucket_name: error 'must specify GCS bucket name to store TSDB blocks',
+
+    // TSDB storage engine doesn't require the table manager.
+    table_manager_enabled: $._config.storage_engine != 'tsdb',
+
+    // TSDB storage engine doesn't require memcached for chunks or chunk indexes.
+    memcached_index_queries_enabled: $._config.storage_engine != 'tsdb',
+    memcached_index_writes_enabled: $._config.storage_engine != 'tsdb',
+    memcached_chunks_enabled: $._config.storage_engine != 'tsdb',
+
+    enabledBackends: [
+      backend
+      for backend in std.split($._config.storage_backend, ',')
+    ],
+
+    client_configs: {
+      aws:
+        if std.count($._config.enabledBackends, 'aws') > 0 then {
+          'dynamodb.api-limit': 10,
+          'dynamodb.url': 'https://%s' % $._config.aws_region,
+          's3.url': 'https://%s/%s' % [$._config.aws_region, $._config.s3_bucket_name],
+        } else {},
+      cassandra:
+        if std.count($._config.enabledBackends, 'cassandra') > 0 then {
+          'cassandra.keyspace': $._config.namespace,
+          'cassandra.addresses': $._config.cassandra_addresses,
+          'cassandra.replication-factor': $._config.replication_factor,
+        } else {},
+      gcp:
+        if std.count($._config.enabledBackends, 'gcp') > 0 then {
+          'bigtable.project': $._config.bigtable_project,
+          'bigtable.instance': $._config.bigtable_instance,
+        } else {},
+    },
+
+    storeConfig: self.storeMemcachedChunksConfig,
+
+    storeMemcachedChunksConfig: if $._config.memcached_chunks_enabled then
+      {
+        'memcached.hostname': 'memcached.%s.svc.cluster.local' % $._config.namespace,
+        'memcached.service': 'memcached-client',
+        'memcached.timeout': '3s',
+        'memcached.batchsize': 1024,
+        'memcached.consistent-hash': true,
+      }
+    else {},
+
+    storageConfig:
+      $._config.client_configs.aws +
+      $._config.client_configs.cassandra +
+      $._config.client_configs.gcp +
+      $._config.storageTSDBConfig +
+      { 'config-yaml': '/etc/cortex/schema/config.yaml' },
+
+    // TSDB blocks storage configuration, used only when 'tsdb' storage
+    // engine is explicitly enabled.
+    storageTSDBConfig: if $._config.storage_engine == 'tsdb' then {
+      'store.engine': 'tsdb',
+      'experimental.tsdb.dir': '/tmp/tsdb',
+      'experimental.tsdb.sync-dir': '/tmp/tsdb',
+      'experimental.tsdb.block-ranges-period': '2h',
+      'experimental.tsdb.retention-period': '1h',
+      'experimental.tsdb.ship-interval': '1m',
+      'experimental.tsdb.backend': 'gcs',
+      'experimental.tsdb.gcs.bucket-name': $._config.storage_tsdb_bucket_name,
+    } else {},
+
+    // Shared between the Ruler and Querier
+    queryConfig: {
+      // Use iterators to merge chunks, to reduce memory usage.
+      'querier.ingester-streaming': $._config.querier_ingester_streaming_enabled,
+      'querier.batch-iterators': true,
+
+      // Don't query the chunk store for data younger than max_chunk_idle.
+      'store.min-chunk-age': $._config.max_chunk_idle,
+
+      // Don't query ingesters for older queries.
+      // Chunks are 6hrs right now.  Add some slack for safety.
+      'querier.query-ingesters-within': '12h',
+
+      'limits.per-user-override-config': '/etc/cortex/overrides.yaml',
+
+      // Limit the size of the rows we read from the index.
+      'store.cardinality-limit': 1e6,
+
+      // Don't allow individual queries of longer than 31days.  Due to day query
+      // splitting in the frontend, the reality is this only limits rate(foo[31d])
+      // type queries.
+      'store.max-query-length': '744h',
+    } + (
+      if $._config.memcached_index_queries_enabled then
+        {
+          // Setting for index cache.
+          'store.index-cache-validity': '14m',  // ingester.retain-period=15m, 1m less for safety.
+          'store.index-cache-read.cache.enable-fifocache': true,
+          'store.index-cache-read.fifocache.size': 102400,
+          'store.index-cache-read.memcached.hostname': 'memcached-index-queries.%(namespace)s.svc.cluster.local' % $._config,
+          'store.index-cache-read.memcached.service': 'memcached-client',
+          'store.index-cache-read.memcached.timeout': '500ms',
+          'store.index-cache-read.memcached.consistent-hash': true,
+          'store.cache-lookups-older-than': '36h',
+        }
+      else {}
+    ),
+
+    ringConfig: {
+      'consul.hostname': 'consul.%s.svc.cluster.local:8500' % $._config.namespace,
+      'consul.consistent-reads': false,
+      'ring.prefix': '',
+    },
+
+    // Some distributor config is shared with the querier.
+    distributorConfig: {
+      'distributor.replication-factor': $._config.replication_factor,
+      'distributor.shard-by-all-labels': true,
+      'distributor.health-check-ingesters': true,
+      'ring.heartbeat-timeout': '10m',
+      'consul.consistent-reads': false,
+    },
+
+    overrides: {
+      // === Per-tenant usage limits. ===
+      // These are the defaults. These are not global limits but per instance limits.
+      //
+      // small_user: {
+      //   ingestion_rate: 10,000
+      //   ingestion_burst_size: 20,000
+      //
+      //   max_series_per_user: 250,000
+      //   max_series_per_metric: 10,000
+      //
+      //   max_series_per_query: 10,000
+      //   max_samples_per_query: 100,000
+      // },
+
+      medium_user:: {
+        ingestion_rate: 25000,
+        ingestion_burst_size: 50000,
+
+        max_series_per_metric: 100000,
+        max_series_per_user: 500000,
+
+        max_series_per_query: 100000,
+        max_samples_per_query: 1000000,
+      },
+
+      big_user:: {
+        ingestion_rate: 50000,
+        ingestion_burst_size: 70000,
+
+        max_series_per_metric: 100000,
+        max_series_per_user: 1000000,
+
+        max_series_per_query: 100000,
+        max_samples_per_query: 1000000,
+      },
+
+      super_user:: {
+        ingestion_rate: 200000,
+        ingestion_burst_size: 240000,
+
+        max_series_per_metric: 200000,
+        max_series_per_user: 2000000,
+
+        max_series_per_query: 100000,
+        max_samples_per_query: 1000000,
+      },
+    },
+
+    schemaID: std.md5(std.toString($._config.schema)),
+
+    enable_pod_priorities: true,
+  },
+
+  local configMap = $.core.v1.configMap,
+
+  overrides_config:
+    configMap.new('overrides') +
+    configMap.withData({
+      'overrides.yaml': $.util.manifestYaml({
+        overrides: $._config.overrides,
+      }),
+    }),
+
+  storage_config:
+    configMap.new('schema-' + $._config.schemaID) +
+    configMap.withData({
+      'config.yaml': $.util.manifestYaml({
+        configs: $._config.schema,
+      }),
+    }),
+
+  local deployment = $.apps.v1beta1.deployment,
+  storage_config_mixin::
+    deployment.mixin.spec.template.metadata.withAnnotationsMixin({ schemaID: $._config.schemaID },) +
+    $.util.configVolumeMount('schema-' + $._config.schemaID, '/etc/cortex/schema'),
+
+  // This removed the CPU limit from the config.  NB won't show up in subset
+  // diffs, but ks apply will do the right thing.
+  removeCPULimitsMixin:: {
+    resources+: {
+      // Can't use super.memory in limits, as we want to
+      // override the whole limits struct.
+      local memoryLimit = super.limits.memory,
+
+      limits: {
+        memory: memoryLimit,
+      },
+    },
+  },
+}
diff --git a/cortex/consul.libsonnet b/cortex/consul.libsonnet
new file mode 100644
index 00000000..9ece317d
--- /dev/null
+++ b/cortex/consul.libsonnet
@@ -0,0 +1,59 @@
+local consul = import 'consul/consul.libsonnet';
+
+{
+  _config+:: {
+    consul_replicas: 1,
+    other_namespaces+: [],
+  },
+
+  consul: consul {
+    _config+:: {
+      consul_replicas: $._config.consul_replicas,
+      namespace: $._config.namespace,
+    },
+
+    // Snapshot the raft.db very frequently, to stop it getting to big.
+    consul_config+:: {
+      raft_snapshot_threshold: 128,
+      raft_trailing_logs: 10e3,
+    },
+
+    local container = $.core.v1.container,
+
+    consul_container+::
+      container.withArgsMixin([
+        '-ui-content-path=/%s/consul/' % $._config.namespace,
+      ]) +
+      $.util.resourcesRequests('4', '4Gi'),
+
+    local deployment = $.apps.v1beta1.deployment,
+    local podAntiAffinity = deployment.mixin.spec.template.spec.affinity.podAntiAffinity,
+    local volume = $.core.v1.volume,
+    consul_deployment+:
+
+      // Keep the consul state on a ramdisk, as they are ephemeral to us.
+      $.util.emptyVolumeMount(
+        'data',
+        '/consul/data/',
+        volumeMixin=volume.mixin.emptyDir.withMedium('Memory'),
+      ) +
+
+      // Ensure Consul is not scheduled on the same host as an ingester
+      // (in any namespace - hence other_namespaces).
+      podAntiAffinity.withRequiredDuringSchedulingIgnoredDuringExecutionMixin([
+        podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.new() +
+        podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.mixin.labelSelector.withMatchLabels({ name: 'ingester' }) +
+        podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.withNamespaces([$._config.namespace] + $._config.other_namespaces) +
+        podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecutionType.withTopologyKey('kubernetes.io/hostname'),
+      ]) +
+
+      $.util.podPriority('high'),
+
+    // Don't healthcheck services, adds load to consul.
+    consul_exporter+::
+      container.withArgsMixin([
+        '--no-consul.health-summary',
+        '--consul.allow_stale',
+      ]),
+  },
+}
diff --git a/cortex/cortex-manifests.jsonnet.example b/cortex/cortex-manifests.jsonnet.example
new file mode 100644
index 00000000..7edc14cd
--- /dev/null
+++ b/cortex/cortex-manifests.jsonnet.example
@@ -0,0 +1,26 @@
+local cortex = import "cortex/cortex.libsonnet";
+
+cortex {
+  _config+:: {
+    namespace: "default",
+    schema: [{
+      from: '2019-11-15',
+      store: 'bigtable-hashed',
+      object_store: 'gcs',
+      schema: 'v10',
+      index: {
+        prefix: 'dev_index_',
+        period: '168h',
+      },
+      chunks: {
+        prefix: 'dev_chunks_',
+        period: '168h',
+      },
+    }],
+
+    storage_backend: 'gcp',
+    bigtable_instance: 'example-instance-prod',
+    bigtable_project: 'example-project1-cortex',
+  },
+}
+
diff --git a/cortex/cortex.libsonnet b/cortex/cortex.libsonnet
new file mode 100644
index 00000000..430c1d43
--- /dev/null
+++ b/cortex/cortex.libsonnet
@@ -0,0 +1,18 @@
+(import 'ksonnet-util/kausal.libsonnet') +
+(import 'ksonnet-util/jaeger.libsonnet') +
+(import 'images.libsonnet') +
+(import 'common.libsonnet') +
+(import 'config.libsonnet') +
+(import 'consul.libsonnet') +
+
+// Cortex services
+(import 'distributor.libsonnet') +
+(import 'ingester.libsonnet') +
+(import 'querier.libsonnet') +
+(import 'query-frontend.libsonnet') +
+(import 'table-manager.libsonnet') +
+
+// Supporting services
+(import 'etcd.libsonnet') +
+(import 'memcached.libsonnet') +
+(import 'test-exporter.libsonnet')
diff --git a/cortex/distributor.libsonnet b/cortex/distributor.libsonnet
new file mode 100644
index 00000000..d5b147d8
--- /dev/null
+++ b/cortex/distributor.libsonnet
@@ -0,0 +1,52 @@
+{
+  local container = $.core.v1.container,
+  local containerPort = $.core.v1.containerPort,
+
+  distributor_args::
+    $._config.ringConfig +
+    $._config.distributorConfig +
+    {
+      target: 'distributor',
+
+      'distributor.ingestion-rate-limit': 10000,
+      'distributor.ingestion-burst-size': 20000,
+      'validation.reject-old-samples': true,
+      'validation.reject-old-samples.max-age': '12h',
+      'limits.per-user-override-config': '/etc/cortex/overrides.yaml',
+      'distributor.remote-timeout': '20s',
+
+      'distributor.ha-tracker.enable': true,
+      'distributor.ha-tracker.enable-for-all-users': true,
+      'distributor.ha-tracker.store': 'etcd',
+      'distributor.ha-tracker.etcd.endpoints': 'etcd-client.%s.svc.cluster.local.:2379' % $._config.namespace,
+      'distributor.ha-tracker.prefix': 'prom_ha/',
+
+      // The memory requests are 2G, and we barely use 100M.
+      // By adding a ballast of 1G, we can drastically reduce GC, but also keep the usage at
+      // around 1.25G, reducing the 99%ile.
+      'mem-ballast-size-bytes': 1 << 30,  // 1GB
+    },
+
+  distributor_container::
+    container.new('distributor', $._images.distributor) +
+    container.withPorts($.util.defaultPorts) +
+    container.withArgsMixin($.util.mapToFlags($.distributor_args)) +
+    $.util.resourcesRequests('2', '2Gi') +
+    $.util.resourcesLimits('6', '4Gi') +
+    $.jaeger_mixin,
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  distributor_deployment:
+    deployment.new('distributor', 3, [
+      $.distributor_container,
+    ]) +
+    $.util.antiAffinity +
+    $.util.configVolumeMount('overrides', '/etc/cortex'),
+
+  local service = $.core.v1.service,
+
+  distributor_service:
+    $.util.serviceFor($.distributor_deployment) +
+    service.mixin.spec.withClusterIp('None'),
+}
diff --git a/cortex/etcd.libsonnet b/cortex/etcd.libsonnet
new file mode 100644
index 00000000..41981db9
--- /dev/null
+++ b/cortex/etcd.libsonnet
@@ -0,0 +1,9 @@
+local etcd_cluster = import 'etcd-operator/etcd-cluster.libsonnet';
+
+etcd_cluster {
+  etcd:
+    $.etcd_cluster('etcd', env=[{
+      name: 'ETCD_AUTO_COMPACTION_RETENTION',
+      value: '1h',
+    }]),
+}
diff --git a/cortex/images.libsonnet b/cortex/images.libsonnet
new file mode 100644
index 00000000..8d5ccce4
--- /dev/null
+++ b/cortex/images.libsonnet
@@ -0,0 +1,21 @@
+{
+  _images+:: {
+    // Various third-party images.
+    memcached: 'memcached:1.5.17-alpine',
+    memcachedExporter: 'prom/memcached-exporter:v0.6.0',
+    postgresql: 'postgres:9.6.11-alpine',
+
+    // Our services.
+    cortex: 'cortexproject/cortex:master-37c1f178',
+
+    distributor: self.cortex,
+    ingester: self.cortex,
+    querier: self.cortex,
+    query_frontend: self.cortex,
+    tableManager: self.cortex,
+    // TODO(gouthamve/jtlisi): Upstream the ruler and AM configs.
+    ruler: 'jtlisi/cortex:20190806_prommanager_ruler_with_api-50343f8d',
+    alertmanager: 'jtlisi/cortex:20190819_alertmanager_update-165b393a',
+    testExporter: 'cortexproject/test-exporter:master-ef99cdaf',
+  },
+}
diff --git a/cortex/ingester.libsonnet b/cortex/ingester.libsonnet
new file mode 100644
index 00000000..0e08ba15
--- /dev/null
+++ b/cortex/ingester.libsonnet
@@ -0,0 +1,69 @@
+{
+  local container = $.core.v1.container,
+
+  ingester_args::
+    $._config.ringConfig +
+    $._config.storeConfig +
+    $._config.storageConfig +
+    {
+      target: 'ingester',
+
+      // Ring config.
+      'ingester.num-tokens': 512,
+      'ingester.join-after': '30s',
+      'ingester.max-transfer-retries': 60,  // Each retry is backed off by 5s, so 5mins for new ingester to come up.
+      'ingester.claim-on-rollout': true,
+      'ingester.heartbeat-period': '15s',
+
+      // Chunk building/flushing config.
+      'ingester.chunk-encoding': 3,  // Bigchunk encoding
+      'ingester.retain-period': '15m',
+      'ingester.max-chunk-age': '6h',
+      'ingester.spread-flushes': true,
+
+      // Limits config.
+      'ingester.max-chunk-idle': $._config.max_chunk_idle,
+      'ingester.max-series-per-user': $._config.max_series_per_user,
+      'ingester.max-series-per-metric': $._config.max_series_per_metric,
+      'limits.per-user-override-config': '/etc/cortex/overrides.yaml',
+      'server.grpc-max-concurrent-streams': 100000,
+    } + (
+      if $._config.memcached_index_writes_enabled then
+        {
+          // Setup index write deduping.
+          'store.index-cache-write.memcached.hostname': 'memcached-index-writes.%(namespace)s.svc.cluster.local' % $._config,
+          'store.index-cache-write.memcached.service': 'memcached-client',
+          'store.index-cache-write.memcached.consistent-hash': true,
+        }
+      else {}
+    ),
+
+  ingester_container::
+    container.new('ingester', $._images.ingester) +
+    container.withPorts($.util.defaultPorts) +
+    container.withArgsMixin($.util.mapToFlags($.ingester_args)) +
+    container.mixin.readinessProbe.httpGet.withPath('/ready') +
+    container.mixin.readinessProbe.httpGet.withPort(80) +
+    container.mixin.readinessProbe.withInitialDelaySeconds(15) +
+    container.mixin.readinessProbe.withTimeoutSeconds(1) +
+
+    $.util.resourcesRequests('4', '15Gi') +
+    $.util.resourcesLimits(null, '25Gi') +
+    $.jaeger_mixin,
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  ingester_deployment:
+    deployment.new('ingester', 3, [$.ingester_container]) +
+    $.util.antiAffinity +
+    $.util.configVolumeMount('overrides', '/etc/cortex') +
+    deployment.mixin.spec.withMinReadySeconds(60) +
+    deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) +
+    deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) +
+    deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(4800) +
+    $.storage_config_mixin +
+    $.util.podPriority('high'),
+
+  ingester_service:
+    $.util.serviceFor($.ingester_deployment),
+}
diff --git a/cortex/jsonnetfile.json b/cortex/jsonnetfile.json
new file mode 100644
index 00000000..375b9813
--- /dev/null
+++ b/cortex/jsonnetfile.json
@@ -0,0 +1,44 @@
+{
+  "dependencies": [
+    {
+      "name": "consul",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "consul"
+        }
+      },
+      "version": "master"
+    },
+    {
+      "name": "etcd-operator",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "etcd-operator"
+        }
+      },
+      "version": "master"
+    },
+    {
+      "name": "ksonnet-util",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "ksonnet-util"
+        }
+      },
+      "version": "master"
+    },
+    {
+      "name": "memcached",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "memcached"
+        }
+      },
+      "version": "master"
+    }
+  ]
+}
diff --git a/cortex/jsonnetfile.lock.json b/cortex/jsonnetfile.lock.json
new file mode 100644
index 00000000..e4f26b0b
--- /dev/null
+++ b/cortex/jsonnetfile.lock.json
@@ -0,0 +1,48 @@
+{
+  "dependencies": [
+    {
+      "name": "consul",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "consul"
+        }
+      },
+      "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2",
+      "sum": "S3cLCI5OLpSdwqsAWkNtdGXTlFTpuVGB29m6CXw8xHI="
+    },
+    {
+      "name": "etcd-operator",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "etcd-operator"
+        }
+      },
+      "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2",
+      "sum": "KUklp389C8zcSrYjRkIy00w81gP1HGU3eDmxghqtmBs="
+    },
+    {
+      "name": "ksonnet-util",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "ksonnet-util"
+        }
+      },
+      "version": "250bf5499d81e5e77e1e5ed2242c89ad27485aec",
+      "sum": "8gmmSMANOAs4dfP5a09Y+nE9pd8E4TMpk3YPKxT4ys0="
+    },
+    {
+      "name": "memcached",
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/jsonnet-libs",
+          "subdir": "memcached"
+        }
+      },
+      "version": "8f9d72b2e35b5f3cc1b7c2a8af9bbae7658804e2",
+      "sum": "hroD9u119YWI5g2SnspmSgMDJUMyXFZDnjymhUS6Pjs="
+    }
+  ]
+}
diff --git a/cortex/memcached.libsonnet b/cortex/memcached.libsonnet
new file mode 100644
index 00000000..bd00189f
--- /dev/null
+++ b/cortex/memcached.libsonnet
@@ -0,0 +1,63 @@
+local memcached = import 'memcached/memcached.libsonnet';
+
+memcached {
+  memcached+:: {
+    cpu_limits:: null,
+
+    deployment: {},
+
+    local statefulSet = $.apps.v1beta1.statefulSet,
+
+    statefulSet:
+      statefulSet.new(self.name, 3, [
+        self.memcached_container,
+        self.memcached_exporter,
+      ], []) +
+      statefulSet.mixin.spec.withServiceName(self.name) +
+      $.util.antiAffinity,
+
+    local service = $.core.v1.service,
+
+    service:
+      $.util.serviceFor(self.statefulSet) +
+      service.mixin.spec.withClusterIp('None'),
+  },
+
+  // Dedicated memcached instance used to cache query results.
+  memcached_frontend: $.memcached {
+    name: 'memcached-frontend',
+    max_item_size: '5m',
+  },
+
+  // Dedicated memcached instance used to temporarily cache index lookups.
+  memcached_index_queries: if $._config.memcached_index_queries_enabled then
+    $.memcached {
+      name: 'memcached-index-queries',
+      max_item_size: '5m',
+    }
+  else {},
+
+  // Dedicated memcached instance used to dedupe writes to the index.
+  memcached_index_writes: if $._config.memcached_index_writes_enabled then
+    $.memcached {
+      name: 'memcached-index-writes',
+    }
+  else {},
+
+  // Memcached instance used to cache chunks.
+  memcached_chunks: if $._config.memcached_chunks_enabled then
+    $.memcached {
+      name: 'memcached',
+
+      // Save memory by more tightly provisioning memcached chunks.
+      memory_limit_mb: 6 * 1024,
+      overprovision_factor: 1.05,
+
+      local container = $.core.v1.container,
+
+      // Raise connection limits now our clusters are bigger.
+      memcached_container+::
+        container.withArgsMixin(['-c 4096']),
+    }
+  else {},
+}
diff --git a/cortex/postgresql.libsonnet b/cortex/postgresql.libsonnet
new file mode 100644
index 00000000..d63eb8c6
--- /dev/null
+++ b/cortex/postgresql.libsonnet
@@ -0,0 +1,29 @@
+{
+  local container = $.core.v1.container,
+  local containerPort = $.core.v1.containerPort,
+
+  _config+: {
+    pgUser: 'cortex',
+    pgPassword: '1234',
+  },
+
+  postgresql_container::
+    container.new('postgres', $._images.postgresql) +
+    container.withPorts([
+      containerPort.newNamed('postgresql', 5432),
+    ]) +
+    container.withEnvMap({
+      POSTGRES_USER: $._config.pgUser,
+      POSTGRES_DB: 'configs',
+    }) +
+    $.util.resourcesRequests('2', '1Gi') +
+    $.util.resourcesLimits('4', '2Gi'),
+
+  local deployment = $.apps.v1beta1.deployment,
+  postgresql_deployment:
+    deployment.new('postgresql', 1, [$.postgresql_container]),
+
+  local service = $.core.v1.service,
+  postgresql_service:
+    $.util.serviceFor($.postgresql_deployment),
+}
diff --git a/cortex/querier.libsonnet b/cortex/querier.libsonnet
new file mode 100644
index 00000000..fda924c7
--- /dev/null
+++ b/cortex/querier.libsonnet
@@ -0,0 +1,52 @@
+{
+  local container = $.core.v1.container,
+
+  querier_args::
+    $._config.ringConfig +
+    $._config.storeConfig +
+    $._config.storageConfig +
+    $._config.queryConfig +
+    $._config.distributorConfig +
+    {
+      target: 'querier',
+
+      // Increase HTTP server response write timeout, as we were seeing some
+      // queries that return a lot of data timeing out.
+      'server.http-write-timeout': '1m',
+
+      // Limit query concurrency to prevent multi large queries causing an OOM.
+      'querier.max-concurrent': $._config.querierConcurrency,
+
+      // Limit to N/2 worker threads per frontend, as we have two frontends.
+      'querier.worker-parallelism': $._config.querierConcurrency / 2,
+      'querier.frontend-address': 'query-frontend.%(namespace)s.svc.cluster.local:9095' % $._config,
+      'querier.frontend-client.grpc-max-send-msg-size': 100 << 20,
+
+      'log.level': 'debug',
+    },
+
+  querier_container::
+    container.new('querier', $._images.querier) +
+    container.withPorts($.util.defaultPorts) +
+    container.withArgsMixin($.util.mapToFlags($.querier_args)) +
+    $.util.resourcesRequests('1', '12Gi') +
+    $.util.resourcesLimits(null, '24Gi') +
+    $.jaeger_mixin +
+    container.withEnvMap({
+      JAEGER_REPORTER_MAX_QUEUE_SIZE: '1024',  // Default is 100.
+    }),
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  querier_deployment:
+    deployment.new('querier', 3, [$.querier_container]) +
+    $.util.antiAffinity +
+    $.util.configVolumeMount('overrides', '/etc/cortex') +
+    $.storage_config_mixin,
+
+  local service = $.core.v1.service,
+
+  querier_service:
+    $.util.serviceFor($.querier_deployment) +
+    service.mixin.spec.withSelector({ name: 'query-frontend' }),
+}
diff --git a/cortex/query-frontend.libsonnet b/cortex/query-frontend.libsonnet
new file mode 100644
index 00000000..a9e64abe
--- /dev/null
+++ b/cortex/query-frontend.libsonnet
@@ -0,0 +1,60 @@
+{
+  local container = $.core.v1.container,
+
+  query_frontend_args:: {
+    target: 'query-frontend',
+
+    // Need log.level=debug so all queries are logged, needed for analyse.py.
+    'log.level': 'debug',
+
+    // Increase HTTP server response write timeout, as we were seeing some
+    // queries that return a lot of data timeing out.
+    'server.http-write-timeout': '1m',
+
+    // Split long queries up into multiple day-long queries.
+    'querier.split-queries-by-day': true,
+
+    // Cache query results.
+    'querier.align-querier-with-step': true,
+    'querier.cache-results': true,
+    'frontend.memcached.hostname': 'memcached-frontend.%s.svc.cluster.local' % $._config.namespace,
+    'frontend.memcached.service': 'memcached-client',
+    'frontend.memcached.timeout': '500ms',
+    'frontend.memcached.consistent-hash': true,
+
+    // So that exporters like cloudwatch can still send in data and be un-cached.
+    'frontend.max-cache-freshness': '10m',
+
+    // Compress HTTP responses; improves latency for very big results and slow
+    // connections.
+    'querier.compress-http-responses': true,
+
+    // So it can recieve big responses from the querier.
+    'server.grpc-max-recv-msg-size-bytes': 100 << 20,
+
+    // Limit queries to 500 days, allow this to be override per-user.
+    'store.max-query-length': '12000h',  // 500 Days
+    'limits.per-user-override-config': '/etc/cortex/overrides.yaml',
+  },
+
+  query_frontend_container::
+    container.new('query-frontend', $._images.query_frontend) +
+    container.withPorts($.util.defaultPorts) +
+    container.withArgsMixin($.util.mapToFlags($.query_frontend_args)) +
+    $.util.resourcesRequests('2', '600Mi') +
+    $.util.resourcesLimits(null, '1200Mi') +
+    $.jaeger_mixin,
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  query_frontend_deployment:
+    deployment.new('query-frontend', 2, [$.query_frontend_container]) +
+    $.util.configVolumeMount('overrides', '/etc/cortex') +
+    $.util.antiAffinity,
+
+  local service = $.core.v1.service,
+
+  query_frontend_service:
+    $.util.serviceFor($.query_frontend_deployment) +
+    service.mixin.spec.withClusterIp('None'),
+}
diff --git a/cortex/ruler.libsonnet b/cortex/ruler.libsonnet
new file mode 100644
index 00000000..749c8f84
--- /dev/null
+++ b/cortex/ruler.libsonnet
@@ -0,0 +1,53 @@
+{
+  local container = $.core.v1.container,
+
+  ruler_args::
+    $._config.ringConfig +
+    $._config.storeConfig +
+    $._config.storageConfig +
+    $._config.queryConfig +
+    $._config.distributorConfig +
+    {
+      target: 'ruler',
+      // Alertmanager configs
+      'ruler.alertmanager-url': 'http://alertmanager.%s.svc.cluster.local/alertmanager' % $._config.namespace,
+
+      // Ring Configs
+      'ruler.enable-sharding': true,
+      'ruler.consul.hostname': 'consul.%s.svc.cluster.local:8500' % $._config.namespace,
+      'ruler.consul.consistent-reads': false,
+      'ruler.prefix': 'rulers/',
+      'ruler.distributor.replication-factor': 1,
+      'ruler.claim-on-rollout': true,
+      'ruler.join-after': '15s',
+      'ruler.ring.heartbeat-timeout': '10m',
+      'ruler.heartbeat-period': '1m',
+      'ruler.search-pending-for': '1m',
+
+      // Rule Storage Configs
+      'ruler.storage.type': 'gcs',
+      'rules.gcs.bucketname': '%(cluster)s-cortex-configdb-%(namespace)s' % $._config,
+    },
+
+  ruler_container::
+    container.new('ruler', $._images.ruler) +
+    container.withPorts($.util.defaultPorts) +
+    container.withArgsMixin($.util.mapToFlags($.ruler_args)) +
+    $.util.resourcesRequests('1', '6Gi') +
+    $.util.resourcesLimits('16', '16Gi') +
+    $.jaeger_mixin,
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  ruler_deployment:
+    deployment.new('ruler', 2, [$.ruler_container]) +
+    deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(600) +
+    $.util.antiAffinity +
+    $.util.configVolumeMount('overrides', '/etc/cortex') +
+    $.storage_config_mixin,
+
+  local service = $.core.v1.service,
+
+  ruler_service:
+    $.util.serviceFor($.ruler_deployment),
+}
diff --git a/cortex/table-manager.libsonnet b/cortex/table-manager.libsonnet
new file mode 100644
index 00000000..b392cd93
--- /dev/null
+++ b/cortex/table-manager.libsonnet
@@ -0,0 +1,53 @@
+{
+  local container = $.core.v1.container,
+
+  table_manager_args::
+    $._config.storageConfig
+    {
+      target: 'table-manager',
+
+      // Cassandra / BigTable doesn't use these fields, so set them to zero
+      'dynamodb.chunk-table.inactive-read-throughput': 0,
+      'dynamodb.chunk-table.inactive-write-throughput': 0,
+      'dynamodb.chunk-table.read-throughput': 0,
+      'dynamodb.chunk-table.write-throughput': 0,
+      'dynamodb.periodic-table.inactive-read-throughput': 0,
+      'dynamodb.periodic-table.inactive-write-throughput': 0,
+      'dynamodb.periodic-table.read-throughput': 0,
+      'dynamodb.periodic-table.write-throughput': 0,
+
+      // Rate limit Bigtable Admin calls.  Google seem to limit to ~100QPS,
+      // and given 2yrs worth of tables (~100) a sync will table 20s.  This
+      // allows you to run upto 20 independant Cortex clusters on the same
+      // Google project before running into issues.
+      'dynamodb.poll-interval': '10m',
+      'dynamodb.periodic-table.grace-period': '3h',
+      'bigtable.grpc-client-rate-limit': 5.0,
+      'bigtable.grpc-client-rate-limit-burst': 5,
+      'bigtable.backoff-on-ratelimits': true,
+      'bigtable.table-cache.enabled': true,
+    },
+
+  table_manager_container::
+    if $._config.table_manager_enabled then
+      container.new('table-manager', $._images.tableManager) +
+      container.withPorts($.util.defaultPorts) +
+      container.withArgsMixin($.util.mapToFlags($.table_manager_args)) +
+      $.util.resourcesRequests('100m', '100Mi') +
+      $.util.resourcesLimits('200m', '200Mi') +
+      $.jaeger_mixin
+    else {},
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  table_manager_deployment:
+    if $._config.table_manager_enabled then
+      deployment.new('table-manager', 1, [$.table_manager_container]) +
+      $.storage_config_mixin
+    else {},
+
+  table_manager_service:
+    if $._config.table_manager_enabled then
+      $.util.serviceFor($.table_manager_deployment)
+    else {},
+}
diff --git a/cortex/test-exporter.libsonnet b/cortex/test-exporter.libsonnet
new file mode 100644
index 00000000..31d7a2c9
--- /dev/null
+++ b/cortex/test-exporter.libsonnet
@@ -0,0 +1,40 @@
+{
+  local container = $.core.v1.container,
+  local containerPort = $.core.v1.containerPort,
+
+  test_exporter_args:: {
+    'user-id': $._config.test_exporter_user_id,
+    'prometheus-address': 'http://query-frontend.%(namespace)s.svc.cluster.local/api/prom' % $._config,
+    'test-query-start': $._config.test_exporter_start_time,
+    'extra-selectors': 'job="%(namespace)s/test-exporter"' % $._config,
+    'test-query-min-size': '1m',
+    'test-epsilion': '0.05',  // There is enough jitter in our system for scrapes to be off by 5%.
+  },
+
+  test_exporter_container::
+    if !($._config.test_exporter_enabled)
+    then {}
+    else
+      container.new('test-exporter', $._images.testExporter) +
+      container.withPorts($.util.defaultPorts) +
+      container.withArgsMixin($.util.mapToFlags($.test_exporter_args)) +
+      $.util.resourcesRequests('100m', '100Mi') +
+      $.util.resourcesLimits('100m', '100Mi') +
+      $.jaeger_mixin,
+
+  local deployment = $.apps.v1beta1.deployment,
+
+  test_exporter_deployment:
+    if !($._config.test_exporter_enabled)
+    then {}
+    else
+      deployment.new('test-exporter', 1, [
+        $.test_exporter_container,
+      ]),
+
+  test_exporter_service:
+    if !($._config.test_exporter_enabled)
+    then {}
+    else
+      $.util.serviceFor($.test_exporter_deployment),
+}