From e7759ad42e444055f7c977be91dfc7ecd4f784b8 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 22 Nov 2021 11:03:39 +0100 Subject: [PATCH 1/2] Upstream some config improvements Signed-off-by: Marco Pracucci --- operations/mimir/distributor.libsonnet | 4 +++- operations/mimir/memcached.libsonnet | 2 +- operations/mimir/querier.libsonnet | 2 ++ operations/mimir/query-frontend.libsonnet | 6 ++++-- operations/mimir/query-scheduler.libsonnet | 5 ++++- operations/mimir/tsdb.libsonnet | 5 +++-- 6 files changed, 17 insertions(+), 7 deletions(-) diff --git a/operations/mimir/distributor.libsonnet b/operations/mimir/distributor.libsonnet index ae1c9ffd900..ea22523e6fb 100644 --- a/operations/mimir/distributor.libsonnet +++ b/operations/mimir/distributor.libsonnet @@ -57,7 +57,9 @@ distributor_deployment: deployment.new('distributor', 3, [$.distributor_container], $.distributor_deployment_labels) + (if $._config.cortex_distributor_allow_multiple_replicas_on_same_node then {} else $.util.antiAffinity) + - $.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex'), + $.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex') + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), local service = $.core.v1.service, diff --git a/operations/mimir/memcached.libsonnet b/operations/mimir/memcached.libsonnet index e303f51f9b0..6fecb9ffbd9 100644 --- a/operations/mimir/memcached.libsonnet +++ b/operations/mimir/memcached.libsonnet @@ -65,7 +65,7 @@ memcached { $.memcached { name: 'memcached-metadata', max_item_size: '%dm' % [$._config.memcached_metadata_max_item_size_mb], - connection_limit: 4096, + connection_limit: 16384, // Metadata cache doesn't need much memory. memory_limit_mb: 512, diff --git a/operations/mimir/querier.libsonnet b/operations/mimir/querier.libsonnet index 0256a91f193..eb807ee28be 100644 --- a/operations/mimir/querier.libsonnet +++ b/operations/mimir/querier.libsonnet @@ -58,6 +58,8 @@ deployment.new(name, $._config.querier.replicas, [container], $.querier_deployment_labels) + (if $._config.cortex_querier_allow_multiple_replicas_on_same_node then {} else $.util.antiAffinity) + $.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex') + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) + $.storage_config_mixin, querier_deployment: diff --git a/operations/mimir/query-frontend.libsonnet b/operations/mimir/query-frontend.libsonnet index 7cc5a8cfc3c..80f36d04736 100644 --- a/operations/mimir/query-frontend.libsonnet +++ b/operations/mimir/query-frontend.libsonnet @@ -17,7 +17,7 @@ 'querier.split-queries-by-interval': '24h', // Cache query results. - 'querier.align-querier-with-step': true, + 'querier.align-querier-with-step': false, 'querier.cache-results': true, 'frontend.memcached.hostname': 'memcached-frontend.%s.svc.cluster.local' % $._config.namespace, 'frontend.memcached.service': 'memcached-client', @@ -52,7 +52,9 @@ newQueryFrontendDeployment(name, container):: deployment.new(name, $._config.queryFrontend.replicas, [container]) + $.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex') + - (if $._config.cortex_query_frontend_allow_multiple_replicas_on_same_node then {} else $.util.antiAffinity), + (if $._config.cortex_query_frontend_allow_multiple_replicas_on_same_node then {} else $.util.antiAffinity) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), query_frontend_deployment: self.newQueryFrontendDeployment('query-frontend', $.query_frontend_container), diff --git a/operations/mimir/query-scheduler.libsonnet b/operations/mimir/query-scheduler.libsonnet index 130325e2d44..604d258a6c5 100644 --- a/operations/mimir/query-scheduler.libsonnet +++ b/operations/mimir/query-scheduler.libsonnet @@ -25,7 +25,10 @@ newQuerySchedulerDeployment(name, container):: deployment.new(name, 2, [container]) + $.util.configVolumeMount('overrides', '/etc/cortex') + - $.util.antiAffinity, + $.util.antiAffinity + + // Do not run more query-schedulers than expected. + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), query_scheduler_deployment: if !$._config.query_scheduler_enabled then {} else self.newQuerySchedulerDeployment('query-scheduler', $.query_scheduler_container), diff --git a/operations/mimir/tsdb.libsonnet b/operations/mimir/tsdb.libsonnet index 491adb02c5c..aa47082959e 100644 --- a/operations/mimir/tsdb.libsonnet +++ b/operations/mimir/tsdb.libsonnet @@ -172,8 +172,9 @@ container.withPorts($.compactor_ports) + container.withArgsMixin($.util.mapToFlags($.compactor_args)) + container.withVolumeMountsMixin([volumeMount.new('compactor-data', '/data')]) + - $.util.resourcesRequests('1', '6Gi') + - $.util.resourcesLimits($._config.cortex_compactor_max_concurrency, '6Gi') + + // Do not limit compactor CPU and request enough cores to honor configured max concurrency. + $.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '6Gi') + + $.util.resourcesLimits(null, '6Gi') + $.util.readinessProbe + $.jaeger_mixin, From 2bbb7ded09b86985db288ccfbc76d2dd55e0ccb5 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 22 Nov 2021 11:12:13 +0100 Subject: [PATCH 2/2] Increased max connections for memcached chunks and index-queries too Signed-off-by: Marco Pracucci --- operations/mimir/memcached.libsonnet | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/operations/mimir/memcached.libsonnet b/operations/mimir/memcached.libsonnet index 6fecb9ffbd9..011328c33b0 100644 --- a/operations/mimir/memcached.libsonnet +++ b/operations/mimir/memcached.libsonnet @@ -34,6 +34,7 @@ memcached { $.memcached { name: 'memcached-index-queries', max_item_size: '%dm' % [$._config.memcached_index_queries_max_item_size_mb], + connection_limit: 16384, } else {}, @@ -54,7 +55,7 @@ memcached { // Save memory by more tightly provisioning memcached chunks. memory_limit_mb: 6 * 1024, overprovision_factor: 1.05, - connection_limit: 4096, + connection_limit: 16384, local container = $.core.v1.container, }