From 48c93863e454d8361d03414133e6a98063a6e95c Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Thu, 2 Mar 2023 15:23:01 +0100 Subject: [PATCH 01/19] Helm: nginx HPA and tests kubeversion fixes (#4299) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Helm: fix Kubernetes override for nginx HPA The template did not take into account the override "kubeVersionOverride". Fix by using the mimir template implemented for this reason. Signed-off-by: György Krajcsovits * Helm: fix missing Kubernetes version overrides in tests The golden record tests need a fixed version because helm uses the version of the default context and can produce different results between contributor's machine and the CI environment. Add logic to test build to inject the minimal version if not found in the values file. Mainly because we cannot have a version override in the small and large values files. Signed-off-by: György Krajcsovits Co-authored-by: Jon Kartago Lamida --- .../helm/charts/mimir-distributed/CHANGELOG.md | 3 ++- .../ci/offline/gateway-enterprise-values.yaml | 3 +++ .../ci/offline/gateway-nginx-values.yaml | 3 +++ .../mimir-distributed/templates/nginx/_helpers.tpl | 2 +- operations/helm/scripts/build.sh | 14 +++++++++++++- ...ateway-v2-hpa.yaml => gateway-v2beta1-hpa.yaml} | 12 ++++-------- .../templates/ingester/ingester-pdb.yaml | 2 +- .../templates/store-gateway/store-gateway-pdb.yaml | 2 +- ...ateway-v2-hpa.yaml => gateway-v2beta1-hpa.yaml} | 12 ++++-------- .../templates/ingester/ingester-pdb.yaml | 2 +- .../templates/store-gateway/store-gateway-pdb.yaml | 2 +- .../templates/ingester/ingester-pdb.yaml | 2 +- .../templates/store-gateway/store-gateway-pdb.yaml | 2 +- .../templates/ingester/ingester-pdb.yaml | 2 +- .../templates/store-gateway/store-gateway-pdb.yaml | 2 +- 15 files changed, 38 insertions(+), 27 deletions(-) rename operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/gateway/{gateway-v2-hpa.yaml => gateway-v2beta1-hpa.yaml} (69%) rename operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/gateway/{gateway-v2-hpa.yaml => gateway-v2beta1-hpa.yaml} (69%) diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index 3fdda70dafa..e498fa00ab1 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -28,7 +28,8 @@ Entries should include a reference to the Pull Request that introduced the chang ## main / unreleased -* [ENHANCEMENT] Support autoscaling/v2 HorizontalPodAutoscaler for nginx autoscaling starting with Kubernetes 1.23. +* [ENHANCEMENT] Support autoscaling/v2 HorizontalPodAutoscaler for nginx autoscaling starting with Kubernetes 1.23. #4285 +* [BUGFIX] Allow override of Kubernetes version for nginx HPA. #4299 ## 4.2.0 diff --git a/operations/helm/charts/mimir-distributed/ci/offline/gateway-enterprise-values.yaml b/operations/helm/charts/mimir-distributed/ci/offline/gateway-enterprise-values.yaml index 2d283dc38b3..ed20d462c04 100644 --- a/operations/helm/charts/mimir-distributed/ci/offline/gateway-enterprise-values.yaml +++ b/operations/helm/charts/mimir-distributed/ci/offline/gateway-enterprise-values.yaml @@ -1,3 +1,6 @@ +# Pin kube version so results are the same for running in CI and locally where the installed kube version may be different. +kubeVersionOverride: "1.20" + enterprise: enabled: true gateway: diff --git a/operations/helm/charts/mimir-distributed/ci/offline/gateway-nginx-values.yaml b/operations/helm/charts/mimir-distributed/ci/offline/gateway-nginx-values.yaml index fc12edfc3be..8cf822a28b2 100644 --- a/operations/helm/charts/mimir-distributed/ci/offline/gateway-nginx-values.yaml +++ b/operations/helm/charts/mimir-distributed/ci/offline/gateway-nginx-values.yaml @@ -1,3 +1,6 @@ +# Pin kube version so results are the same for running in CI and locally where the installed kube version may be different. +kubeVersionOverride: "1.20" + nginx: enabled: false gateway: diff --git a/operations/helm/charts/mimir-distributed/templates/nginx/_helpers.tpl b/operations/helm/charts/mimir-distributed/templates/nginx/_helpers.tpl index c72f2577741..414887a01ee 100644 --- a/operations/helm/charts/mimir-distributed/templates/nginx/_helpers.tpl +++ b/operations/helm/charts/mimir-distributed/templates/nginx/_helpers.tpl @@ -9,7 +9,7 @@ nginx auth secret name Returns the HorizontalPodAutoscaler API version for this verison of kubernetes. */}} {{- define "mimir.hpa.version" -}} -{{- if semverCompare ">= 1.23-0" .Capabilities.KubeVersion.Version -}} +{{- if semverCompare ">= 1.23-0" (include "mimir.kubeVersion" .) -}} autoscaling/v2 {{- else -}} autoscaling/v2beta1 diff --git a/operations/helm/scripts/build.sh b/operations/helm/scripts/build.sh index d5dbd676a09..f8aac6d2a3f 100755 --- a/operations/helm/scripts/build.sh +++ b/operations/helm/scripts/build.sh @@ -6,6 +6,9 @@ set -euo pipefail # use a normal sed on macOS if available SED=$(which gsed || which sed) +# DEFAULT_KUBE_VERSION is used if the input values do not contain "kubeVersionOverride" +DEFAULT_KUBE_VERSION="1.20" + CHART_PATH="operations/helm/charts/mimir-distributed" INTERMEDIATE_PATH="" OUTPUT_PATH="" @@ -59,9 +62,18 @@ for FILEPATH in $TESTS; do INTERMEDIATE_OUTPUT_DIR="${INTERMEDIATE_PATH}/${TEST_NAME}-generated" OUTPUT_DIR="${OUTPUT_PATH}/${TEST_NAME}-generated" + echo "" echo "Templating $TEST_NAME" + ARGS=("${TEST_NAME}" "${CHART_PATH}" "-f" "${FILEPATH}" "--output-dir" "${INTERMEDIATE_OUTPUT_DIR}" "--namespace" "citestns") + + echo "Checking for kubeVersionOverride inside tests' values.yaml ..." + if ! grep "^kubeVersionOverride:" "${FILEPATH}" ; then + echo "Warning: injecting Kubernetes version override: kubeVersionOverride=${DEFAULT_KUBE_VERSION}" + ARGS+=("--set-string" "kubeVersionOverride=${DEFAULT_KUBE_VERSION}") + fi + set -x - helm template "${TEST_NAME}" "${CHART_PATH}" -f "${FILEPATH}" --output-dir "${INTERMEDIATE_OUTPUT_DIR}" --namespace citestns + helm template "${ARGS[@]}" set +x echo "Removing mutable config checksum, helm chart, application, image tag version for clarity" diff --git a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/gateway/gateway-v2-hpa.yaml b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml similarity index 69% rename from operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/gateway/gateway-v2-hpa.yaml rename to operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml index 23ee0b857de..eb4f1c6c0a8 100644 --- a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/gateway/gateway-v2-hpa.yaml +++ b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml @@ -1,6 +1,6 @@ --- -# Source: mimir-distributed/templates/gateway/gateway-v2-hpa.yaml -apiVersion: autoscaling/v2 +# Source: mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml +apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: gateway-enterprise-values-mimir-gateway @@ -21,12 +21,8 @@ spec: - type: Resource resource: name: memory - target: - type: AverageValue - averageUtilization: 70 + targetAverageUtilization: 70 - type: Resource resource: name: cpu - target: - type: AverageValue - averageUtilization: 70 + targetAverageUtilization: 70 diff --git a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml index 1af5cf62223..9534f185d4e 100644 --- a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml +++ b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/ingester/ingester-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: gateway-enterprise-values-mimir-ingester diff --git a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml index 25fe6c6c600..aa8e5711774 100644 --- a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml +++ b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: gateway-enterprise-values-mimir-store-gateway diff --git a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/gateway/gateway-v2-hpa.yaml b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml similarity index 69% rename from operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/gateway/gateway-v2-hpa.yaml rename to operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml index 73e40e3677e..443d1d68e97 100644 --- a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/gateway/gateway-v2-hpa.yaml +++ b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml @@ -1,6 +1,6 @@ --- -# Source: mimir-distributed/templates/gateway/gateway-v2-hpa.yaml -apiVersion: autoscaling/v2 +# Source: mimir-distributed/templates/gateway/gateway-v2beta1-hpa.yaml +apiVersion: autoscaling/v2beta1 kind: HorizontalPodAutoscaler metadata: name: gateway-nginx-values-mimir-gateway @@ -21,12 +21,8 @@ spec: - type: Resource resource: name: memory - target: - type: AverageValue - averageUtilization: 70 + targetAverageUtilization: 70 - type: Resource resource: name: cpu - target: - type: AverageValue - averageUtilization: 70 + targetAverageUtilization: 70 diff --git a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml index 670a86b4414..7853c9732c1 100644 --- a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml +++ b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/ingester/ingester-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: gateway-nginx-values-mimir-ingester diff --git a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml index 10c514a393d..4fcb41e376f 100644 --- a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml +++ b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: gateway-nginx-values-mimir-store-gateway diff --git a/operations/helm/tests/large-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml b/operations/helm/tests/large-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml index 87b2f21dc33..8036d8341b2 100644 --- a/operations/helm/tests/large-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml +++ b/operations/helm/tests/large-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/ingester/ingester-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: large-values-mimir-ingester diff --git a/operations/helm/tests/large-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml b/operations/helm/tests/large-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml index 56803b73e9d..de0b642b270 100644 --- a/operations/helm/tests/large-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml +++ b/operations/helm/tests/large-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: large-values-mimir-store-gateway diff --git a/operations/helm/tests/small-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml b/operations/helm/tests/small-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml index dea0944a456..f75c0049d29 100644 --- a/operations/helm/tests/small-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml +++ b/operations/helm/tests/small-values-generated/mimir-distributed/templates/ingester/ingester-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/ingester/ingester-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: small-values-mimir-ingester diff --git a/operations/helm/tests/small-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml b/operations/helm/tests/small-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml index 8150bcd7b99..a8c76a99e75 100644 --- a/operations/helm/tests/small-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml +++ b/operations/helm/tests/small-values-generated/mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml @@ -1,6 +1,6 @@ --- # Source: mimir-distributed/templates/store-gateway/store-gateway-pdb.yaml -apiVersion: policy/v1 +apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: small-values-mimir-store-gateway From 2aeb8fbb19cd851f6fbcc7b4007970f5354bd119 Mon Sep 17 00:00:00 2001 From: ying-jeanne <74549700+ying-jeanne@users.noreply.github.com> Date: Thu, 2 Mar 2023 16:58:48 +0100 Subject: [PATCH 02/19] Ruler: load more tenants in parallel during startup (#4258) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Ruler: load more tenants in parallel during startup * add more tests * fix lint * Apply suggestions from code review Co-authored-by: Marco Pracucci * Ingester: fix OOO blocks labelling (#4297) * Ingester: fix OOO blocks labelling This fixes a bug where the OutOfOrderExternalLabel was being added to all blocks instead of the ones coming from OOO data, when the feature flag was enabled. * Changelog * PR number to changelog * Update previous changelog entry instead * Ruler: load more tenants in parallel during startup * fix context * improve unittest --------- Co-authored-by: Marco Pracucci Co-authored-by: Nicolás Pazos <32206519+npazosmendez@users.noreply.github.com> --- CHANGELOG.md | 1 + pkg/ruler/manager.go | 21 +++++++++-- pkg/ruler/manager_test.go | 76 +++++++++++++++++++++++++++++++-------- 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 33b78eb8ddf..815229e2b2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. * [FEATURE] Query-frontend: Introduce experimental `-query-frontend.query-sharding-target-series-per-shard` to allow query sharding to take into account cardinality of similar requests executed previously. This feature uses the same cache that's used for results caching. #4121 #4177 #4188 #4254 * [ENHANCEMENT] Go: update go to 1.20.1. #4266 * [ENHANCEMENT] Ingester: added `out_of_order_blocks_external_label_enabled` shipper option to label out-of-order blocks before shipping them to cloud storage. #4182 #4297 +* [ENHANCEMENT] Ruler: introduced concurrency when loading per-tenant rules configuration. This improvement is expected to speed up the ruler start up time in a Mimir cluster with a large number of tenants. #4258 * [ENHANCEMENT] Compactor: Add `reason` label to `cortex_compactor_runs_failed_total`. The value can be `shutdown` or `error`. #4012 * [ENHANCEMENT] Store-gateway: enforce `max_fetched_series_per_query`. #4056 * [ENHANCEMENT] Docs: use long flag names in runbook commands. #4088 diff --git a/pkg/ruler/manager.go b/pkg/ruler/manager.go index 75095fe0743..fd82482f27c 100644 --- a/pkg/ruler/manager.go +++ b/pkg/ruler/manager.go @@ -14,6 +14,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/cache" + "github.com/grafana/dskit/concurrency" ot "github.com/opentracing/opentracing-go" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -105,8 +106,24 @@ func (r *DefaultMultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGrou RemoveFederatedRuleGroups(ruleGroups) } - for userID, ruleGroup := range ruleGroups { - r.syncRulesToManager(ctx, userID, ruleGroup) + // Sync the rules to disk and then update the user's Prometheus Rules Manager. + // Since users are different, we can sync rules in parallel. + users := make([]string, 0, len(ruleGroups)) + for userID := range ruleGroups { + users = append(users, userID) + } + + // concurrenty.ForEachJob is a helper function that runs a function for each job in parallel. + // It cancel context of jobFunc once iteration is done. + // That is why the context passed to syncRulesToManager should be the global context not the context of jobFunc. + err := concurrency.ForEachJob(ctx, len(users), 10, func(_ context.Context, idx int) error { + userID := users[idx] + r.syncRulesToManager(ctx, userID, ruleGroups[userID]) + return nil + }) + if err != nil { + // The only error we could get here is a context canceled. + return } r.userManagerMtx.Lock() diff --git a/pkg/ruler/manager_test.go b/pkg/ruler/manager_test.go index 5fcd986787a..50a630c431b 100644 --- a/pkg/ruler/manager_test.go +++ b/pkg/ruler/manager_test.go @@ -7,6 +7,7 @@ package ruler import ( "context" + "path/filepath" "testing" "time" @@ -29,41 +30,88 @@ func TestSyncRuleGroups(t *testing.T) { m, err := NewDefaultMultiTenantManager(Config{RulePath: dir}, factory, nil, log.NewNopLogger(), nil) require.NoError(t, err) - const user = "testUser" + const ( + user1 = "testUser1" + user2 = "testUser2" + namespace1 = "ns1" + namespace2 = "ns2" + ) userRules := map[string]rulespb.RuleGroupList{ - user: { + user1: { &rulespb.RuleGroupDesc{ Name: "group1", - Namespace: "ns", + Namespace: namespace1, + Interval: 30 * time.Second, + User: user1, + }, + }, + user2: { + &rulespb.RuleGroupDesc{ + Name: "group2", + Namespace: namespace2, Interval: 1 * time.Minute, - User: user, + User: user2, }, }, } m.SyncRuleGroups(context.Background(), userRules) + mgr1 := getManager(m, user1) + require.NotNil(t, mgr1) + test.Poll(t, 1*time.Second, true, func() interface{} { + return mgr1.(*mockRulesManager).running.Load() + }) - mgr := getManager(m, user) - require.NotNil(t, mgr) - + mgr2 := getManager(m, user2) + require.NotNil(t, mgr2) test.Poll(t, 1*time.Second, true, func() interface{} { - return mgr.(*mockRulesManager).running.Load() + return mgr2.(*mockRulesManager).running.Load() }) // Verify that user rule groups are now cached locally. { users, err := m.mapper.users() require.NoError(t, err) - require.Equal(t, []string{user}, users) + require.Contains(t, users, user2) + require.Contains(t, users, user1) + } + + // Verify that rule groups are now written to disk. + { + rulegroup1 := filepath.Join(m.mapper.Path, user1, namespace1) + f, error := m.mapper.FS.Open(rulegroup1) + require.NoError(t, error) + defer f.Close() + + bt := make([]byte, 100) + n, error := f.Read(bt) + require.NoError(t, error) + require.Equal(t, string("groups:\n - name: group1\n interval: 30s\n rules: []\n"), string(bt[:n])) + } + + { + rulegroup2 := filepath.Join(m.mapper.Path, user2, namespace2) + f, error := m.mapper.FS.Open(rulegroup2) + require.NoError(t, error) + defer f.Close() + + bt := make([]byte, 100) + n, error := f.Read(bt) + require.NoError(t, error) + require.Equal(t, string("groups:\n - name: group2\n interval: 1m\n rules: []\n"), string(bt[:n])) } // Passing empty map / nil stops all managers. m.SyncRuleGroups(context.Background(), nil) - require.Nil(t, getManager(m, user)) + require.Nil(t, getManager(m, user1)) // Make sure old manager was stopped. test.Poll(t, 1*time.Second, false, func() interface{} { - return mgr.(*mockRulesManager).running.Load() + return mgr1.(*mockRulesManager).running.Load() + }) + + test.Poll(t, 1*time.Second, false, func() interface{} { + return mgr2.(*mockRulesManager).running.Load() }) // Verify that local rule groups were removed. @@ -76,9 +124,9 @@ func TestSyncRuleGroups(t *testing.T) { // Resync same rules as before. Previously this didn't restart the manager. m.SyncRuleGroups(context.Background(), userRules) - newMgr := getManager(m, user) + newMgr := getManager(m, user1) require.NotNil(t, newMgr) - require.True(t, mgr != newMgr) + require.True(t, mgr1 != newMgr) test.Poll(t, 1*time.Second, true, func() interface{} { return newMgr.(*mockRulesManager).running.Load() @@ -88,7 +136,7 @@ func TestSyncRuleGroups(t *testing.T) { { users, err := m.mapper.users() require.NoError(t, err) - require.Equal(t, []string{user}, users) + require.Contains(t, users, user1) } m.Stop() From 68be264bcd2fa1e2233b9ef5ff760470f38aed5b Mon Sep 17 00:00:00 2001 From: Ursula Kallio Date: Thu, 2 Mar 2023 17:39:09 +0100 Subject: [PATCH 03/19] Change language to match the math. (#4356) --- .../configure/configure-zone-aware-replication.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md b/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md index 9f48d871bcd..621890b6bfa 100644 --- a/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md +++ b/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md @@ -70,7 +70,7 @@ With a replication factor of 3, which is the default, deploy the Grafana Mimir c Deploying Grafana Mimir clusters to more zones than the configured replication factor does not have a negative impact. Deploying Grafana Mimir clusters to fewer zones than the configured replication factor can cause writes to the replica to be missed, or can cause writes to fail completely. -If there are no more than `floor(replication factor / 2)` zones with failing replicas, reads and writes can withstand zone failures. +If there are fewer than `floor(replication factor / 2)` zones with failing replicas, reads and writes can withstand zone failures. ## Unbalanced zones From b5b7c3b87f27f82f6581135a0ce0a6234cf46825 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Thu, 2 Mar 2023 17:44:50 +0100 Subject: [PATCH 04/19] Upgrade mimir-prometheus to get a fast regexp path optimization (#4357) * Upgrade mimir-prometheus to get a fast regexp path optimization Signed-off-by: Marco Pracucci * Added CHANGELOG entry Signed-off-by: Marco Pracucci --------- Signed-off-by: Marco Pracucci --- CHANGELOG.md | 1 + go.mod | 2 +- go.sum | 4 +- .../prometheus/model/labels/regexp.go | 121 +++++++++++++++++- vendor/modules.txt | 4 +- 5 files changed, 125 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 815229e2b2a..ddf295c731b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. * [ENHANCEMENT] Store-gateway: add a `stage` label to the metrics `cortex_bucket_store_series_data_fetched`, `cortex_bucket_store_series_data_size_fetched_bytes`, `cortex_bucket_store_series_data_touched`, `cortex_bucket_store_series_data_size_touched_bytes`. This label only applies to `data_type="chunks"`. For `fetched` metrics with `data_type="chunks"` the `stage` label has 2 values: `fetched` - the chunks or bytes that were fetched from the cache or the object store, `refetched` - the chunks or bytes that had to be refetched from the cache or the object store because their size was underestimated during the first fetch. For `touched` metrics with `data_type="chunks"` the `stage` label has 2 values: `processed` - the chunks or bytes that were read from the fetched chunks or bytes and were processed in memory, `returned` - the chunks or bytes that were selected from the processed bytes to satisfy the query. #4227 #4316 * [ENHANCEMENT] Compactor: improve the partial block check related to `compactor.partial-block-deletion-delay` to potentially issue less requests to object storage. #4246 * [ENHANCEMENT] Memcached: added `-*.memcached.min-idle-connections-headroom-percentage` support to configure the minimum number of idle connections to keep open as a percentage (0-100) of the number of recently used idle connections. This feature is disabled when set to a negative value (default), which means idle connections are kept open indefinitely. #4249 +* [ENHANCEMENT] Querier and store-gateway: optimized regular expression label matchers with case insensitive alternate operator. #4340 #4357 * [BUGFIX] Ingester: remove series from ephemeral storage even if there are no persistent series. #4052 * [BUGFIX] Store-gateway: return `Canceled` rather than `Aborted` or `Internal` error when the calling querier cancels a label names or values request, and return `Internal` if processing the request fails for another reason. #4061 * [BUGFIX] Ingester: reuse memory when ingesting ephemeral series. #4072 diff --git a/go.mod b/go.mod index 428ebc0c127..32a52195eff 100644 --- a/go.mod +++ b/go.mod @@ -231,7 +231,7 @@ require ( replace github.com/bradfitz/gomemcache => github.com/grafana/gomemcache v0.0.0-20220812141943-44b6cde200bb // Using a fork of Prometheus with Mimir-specific changes. -replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230301145004-383ea59ce1c1 +replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230302162052-1e7ad0ec11fe // Pin hashicorp depencencies since the Prometheus fork, go mod tries to update them. replace github.com/hashicorp/go-immutable-radix => github.com/hashicorp/go-immutable-radix v1.2.0 diff --git a/go.sum b/go.sum index d14e7597fae..7562ddcf572 100644 --- a/go.sum +++ b/go.sum @@ -503,8 +503,8 @@ github.com/grafana/gomemcache v0.0.0-20230221082510-6cde04bf2270 h1:cj3uiNKskh+/ github.com/grafana/gomemcache v0.0.0-20230221082510-6cde04bf2270/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/grafana/mimir-prometheus v0.0.0-20230301145004-383ea59ce1c1 h1:Qh5Dx0arKuKLSVLn4H54S5wNmmTA0TyQE6IJex1nEg8= -github.com/grafana/mimir-prometheus v0.0.0-20230301145004-383ea59ce1c1/go.mod h1:XHIzUaYXL352XOhSF/R0eZ+/k2x6u5de/d/X9VWwVnI= +github.com/grafana/mimir-prometheus v0.0.0-20230302162052-1e7ad0ec11fe h1:WYj9zGt/ZVg2aBijj+QTSfJGieg3uV/H+1Isv4rNao8= +github.com/grafana/mimir-prometheus v0.0.0-20230302162052-1e7ad0ec11fe/go.mod h1:XHIzUaYXL352XOhSF/R0eZ+/k2x6u5de/d/X9VWwVnI= github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 h1:A3dhViTeFDSQcGOXuUi6ukCQSMyDtDISBp2z6OOo2YM= github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= diff --git a/vendor/github.com/prometheus/prometheus/model/labels/regexp.go b/vendor/github.com/prometheus/prometheus/model/labels/regexp.go index d2c4906fc06..3fba3643005 100644 --- a/vendor/github.com/prometheus/prometheus/model/labels/regexp.go +++ b/vendor/github.com/prometheus/prometheus/model/labels/regexp.go @@ -20,7 +20,14 @@ import ( "github.com/grafana/regexp/syntax" ) -const maxSetMatches = 256 +const ( + maxSetMatches = 256 + + // The minimum number of alternate values a regex should have to trigger + // the optimization done by optimizeEqualStringMatchers(). This value has + // been computed running BenchmarkOptimizeEqualStringMatchers. + optimizeEqualStringMatchersThreshold = 16 +) type FastRegexMatcher struct { re *regexp.Regexp @@ -326,7 +333,10 @@ type StringMatcher interface { func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher { clearBeginEndText(re) - return stringMatcherFromRegexpInternal(re) + m := stringMatcherFromRegexpInternal(re) + m = optimizeEqualStringMatchers(m, optimizeEqualStringMatchersThreshold) + + return m } func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher { @@ -503,6 +513,24 @@ func (m *equalStringMatcher) Matches(s string) bool { return strings.EqualFold(m.s, s) } +// equalMultiStringMatcher matches a string exactly against a set of valid values. +type equalMultiStringMatcher struct { + // values to match a string against. If the matching is case insensitive, + // the values here must be lowercase. + values map[string]struct{} + + caseSensitive bool +} + +func (m *equalMultiStringMatcher) Matches(s string) bool { + if !m.caseSensitive { + s = strings.ToLower(s) + } + + _, ok := m.values[s] + return ok +} + // anyStringMatcher is a matcher that matches any string. // It is used for the + and * operator. matchNL tells if it should matches newlines or not. type anyStringMatcher struct { @@ -519,3 +547,92 @@ func (m *anyStringMatcher) Matches(s string) bool { } return true } + +// optimizeEqualStringMatchers optimize a specific case where all matchers are made by an +// alternation (orStringMatcher) of strings checked for equality (equalStringMatcher). In +// this specific case, when we have many strings to match against we can use a map instead +// of iterating over the list of strings. +func optimizeEqualStringMatchers(input StringMatcher, threshold int) StringMatcher { + var ( + caseSensitive bool + caseSensitiveSet bool + numValues int + ) + + // Analyse the input StringMatcher to count the number of occurrences + // and ensure all of them have the same case sensitivity. + analyseCallback := func(matcher *equalStringMatcher) bool { + // Ensure we don't have mixed case sensitivity. + if caseSensitiveSet && caseSensitive != matcher.caseSensitive { + return false + } else if !caseSensitiveSet { + caseSensitive = matcher.caseSensitive + caseSensitiveSet = true + } + + numValues++ + return true + } + + if !findEqualStringMatchers(input, analyseCallback) { + return input + } + + // If the number of values found is less than the threshold, then we should skip the optimization. + if numValues < threshold { + return input + } + + // Parse again the input StringMatcher to extract all values and storing them. + // We can skip the case sensitivity check because we've already checked it and + // if the code reach this point then it means all matchers have the same case sensitivity. + values := make(map[string]struct{}, numValues) + + // Ignore the return value because we already iterated over the input StringMatcher + // and it was all good. + findEqualStringMatchers(input, func(matcher *equalStringMatcher) bool { + if caseSensitive { + values[matcher.s] = struct{}{} + } else { + values[strings.ToLower(matcher.s)] = struct{}{} + } + + return true + }) + + return &equalMultiStringMatcher{ + values: values, + caseSensitive: caseSensitive, + } +} + +// findEqualStringMatchers analyze the input StringMatcher and calls the callback for each +// equalStringMatcher found. Returns true if and only if the input StringMatcher is *only* +// composed by an alternation of equalStringMatcher. +func findEqualStringMatchers(input StringMatcher, callback func(matcher *equalStringMatcher) bool) bool { + orInput, ok := input.(orStringMatcher) + if !ok { + return false + } + + for _, m := range orInput { + switch casted := m.(type) { + case orStringMatcher: + if !findEqualStringMatchers(m, callback) { + return false + } + + case *equalStringMatcher: + if !callback(casted) { + return false + } + + default: + // It's not an equal string matcher, so we have to stop searching + // cause this optimization can't be applied. + return false + } + } + + return true +} diff --git a/vendor/modules.txt b/vendor/modules.txt index fc4adb58f8d..0e1faf67852 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -790,7 +790,7 @@ github.com/prometheus/exporter-toolkit/web github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v1.8.2-0.20220620125440-d7e7b8e04b5e => github.com/grafana/mimir-prometheus v0.0.0-20230301145004-383ea59ce1c1 +# github.com/prometheus/prometheus v1.8.2-0.20220620125440-d7e7b8e04b5e => github.com/grafana/mimir-prometheus v0.0.0-20230302162052-1e7ad0ec11fe ## explicit; go 1.18 github.com/prometheus/prometheus/config github.com/prometheus/prometheus/discovery @@ -1378,7 +1378,7 @@ sigs.k8s.io/kustomize/kyaml/yaml/walk ## explicit; go 1.12 sigs.k8s.io/yaml # github.com/bradfitz/gomemcache => github.com/grafana/gomemcache v0.0.0-20220812141943-44b6cde200bb -# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230301145004-383ea59ce1c1 +# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20230302162052-1e7ad0ec11fe # github.com/hashicorp/go-immutable-radix => github.com/hashicorp/go-immutable-radix v1.2.0 # github.com/hashicorp/go-hclog => github.com/hashicorp/go-hclog v0.12.2 # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe From 6b3e5b82f34e9a7fba254e85bcf3651363d7f694 Mon Sep 17 00:00:00 2001 From: l3ioo <122443155+l3ioo@users.noreply.github.com> Date: Thu, 2 Mar 2023 19:15:24 +0100 Subject: [PATCH 05/19] Fix typo in the docs URL for migrating from Cortex (#4358) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 976309a9361..463bd2ef0fd 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Grafana Mimir is an open source software project that provides a scalable long-t If you're migrating to Grafana Mimir, refer to the following documents: - [Migrating from Thanos or Prometheus to Grafana Mimir](https://grafana.com/docs/mimir/latest/migration-guide/migrating-from-thanos-or-prometheus/). -- [Migrating from Cortex to Grafana Mimir](https://grafana.com/docs/mimir/latest/migration-guide/migrating-from-cortex/) +- [Migrating from Cortex to Grafana Mimir](https://grafana.com/docs/mimir/latest/migration-guide/migrate-from-cortex/) ## Deploying Grafana Mimir From caba95c9d9ebb3cee3ad84eb4bbfcbc50044374a Mon Sep 17 00:00:00 2001 From: Ursula Kallio Date: Fri, 3 Mar 2023 08:58:58 +0100 Subject: [PATCH 06/19] Remove forced paragraph break. (#4359) --- docs/sources/mimir/operators-guide/get-started/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sources/mimir/operators-guide/get-started/_index.md b/docs/sources/mimir/operators-guide/get-started/_index.md index e02fde76525..9e0b68ba8ba 100644 --- a/docs/sources/mimir/operators-guide/get-started/_index.md +++ b/docs/sources/mimir/operators-guide/get-started/_index.md @@ -11,7 +11,7 @@ weight: 10 You can get started with Grafana Mimir _imperatively_ or _declaratively_: -- **Imperatively**: The written instructions that follow contain commands to help you start a single Mimir process. You would need to perform the commands again to start another Mimir process.

+- **Imperatively**: The written instructions that follow contain commands to help you start a single Mimir process. You would need to perform the commands again to start another Mimir process. - **Declaratively**: The following video tutorial uses `docker-compose` to deploy multiple Mimir processes. Therefore, if you want to deploy multiple Mimir processes later, the majority of the configuration work will have already been done. {{< vimeo 691947043 >}} From ffac57415b33a7ccb82764c9bb85096f6edb20f5 Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 3 Mar 2023 19:00:32 +1100 Subject: [PATCH 07/19] Bump actions/setup-go to v3 to resolve Node.js 12 deprecation warning. (#4361) --- .github/workflows/test-build-deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-build-deploy.yml b/.github/workflows/test-build-deploy.yml index a2a5d09ecfb..56ddee99a5d 100644 --- a/.github/workflows/test-build-deploy.yml +++ b/.github/workflows/test-build-deploy.yml @@ -196,7 +196,7 @@ jobs: test_group_total: [4] steps: - name: Upgrade golang - uses: actions/setup-go@v2 + uses: actions/setup-go@v3 with: go-version: 1.20.1 - name: Check out repository From 52b1776db1b1ceb02eb6e7b27982be4ef84de84c Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Fri, 3 Mar 2023 20:09:14 +1100 Subject: [PATCH 08/19] Improve flaky `TestIngesterWithShippingDisabledDeletesBlocksOnlyAfterRetentionExpires` (#4362) * Use more specific assertion to include more information in test failures. See #4198. * Reduce flakiness of test by extending retention period. This gives the rest of the test more time to retrieve `oldBlocks` before any of the blocks is removed. --- pkg/ingester/ingester_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index f284a7b4b85..6735f9665f9 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -4515,7 +4515,7 @@ func TestIngesterWithShippingDisabledDeletesBlocksOnlyAfterRetentionExpires(t *t cfg := defaultIngesterTestConfig(t) cfg.BlocksStorageConfig.TSDB.BlockRanges = []time.Duration{chunkRange} cfg.BlocksStorageConfig.TSDB.ShipInterval = 0 // Disabled shipping - cfg.BlocksStorageConfig.TSDB.Retention = 1 * time.Second // With shipping disabled this means will only expire 1 hour after the block creation time. + cfg.BlocksStorageConfig.TSDB.Retention = 5 * time.Second // With shipping disabled this means will only expire 1 hour after the block creation time. // Create ingester reg := prometheus.NewPedanticRegistry() @@ -4551,10 +4551,10 @@ func TestIngesterWithShippingDisabledDeletesBlocksOnlyAfterRetentionExpires(t *t require.Nil(t, db.Compact()) oldBlocks := db.Blocks() - require.Equal(t, 3, len(oldBlocks)) + require.Len(t, oldBlocks, 3) // Yes, we're sleeping in this test to let the retention of the newly compacted blocks expire - time.Sleep(1 * time.Second) + time.Sleep(cfg.BlocksStorageConfig.TSDB.Retention) // Add more samples that could trigger another compaction and hence reload of blocks. req, _, _, _ := mockWriteRequest(t, labels.FromStrings(labels.MetricName, "test"), 0, 5*chunkRangeMilliSec) @@ -4572,7 +4572,6 @@ func TestIngesterWithShippingDisabledDeletesBlocksOnlyAfterRetentionExpires(t *t newBlocks := db.Blocks() require.Equal(t, 1, len(newBlocks)) require.NotContains(t, []ulid.ULID{oldBlocks[0].Meta().ULID, oldBlocks[1].Meta().ULID, oldBlocks[2].Meta().ULID}, newBlocks[0].Meta().ULID) - } func TestIngesterPushErrorDuringForcedCompaction(t *testing.T) { From 921eb300802ec6592fa52ac80cc63bb2739b6271 Mon Sep 17 00:00:00 2001 From: Vernon Miller <96601789+aldernero@users.noreply.github.com> Date: Fri, 3 Mar 2023 02:22:27 -0700 Subject: [PATCH 09/19] Add asynchronous validation scaffolding for block upload (#3411) * Add asynchronous validation scaffolding for block upload * addressed lint errors * Update pkg/compactor/block_upload.go Co-authored-by: Arve Knudsen * Update pkg/compactor/block_upload.go Co-authored-by: Arve Knudsen * Update pkg/compactor/block_upload.go Co-authored-by: Arve Knudsen * Update pkg/compactor/block_upload.go Co-authored-by: Arve Knudsen * Update pkg/compactor/block_upload.go Co-authored-by: Arve Knudsen * Update pkg/compactor/block_upload.go Co-authored-by: Arve Knudsen * enable block upload for dev testing * fixed validation errors, added debug log messages * fixed cancelled context issue * changed name of flag to disable complete block upload * addressed reviewer feedback * addressed reviewer feedback * Address some review comments, WIP * Small spacing cleanup * Transition to in-memory bucket for block finish test * Async validation test coordination, adding configuration flags * Small comment and flag fix * Swap config strategy, test still needs separation * Docs + lint * Review comments, begin separating tests * Finish validateAndComplete test * Update docs * Remove docker compose arguments * Regenerate rather than modify * Review, add test for periodicValidationUpdater * Make validateAndComplete test clearer, add upload meta check * Set missing cancelContext * Add sleep as suggested * Configure data directory * fixed compactor data dir in e2e test * Add changelog entry * Split into two entries * Missing entry number * Update CHANGELOG.md --------- Co-authored-by: Arve Knudsen Co-authored-by: Andy Asp Co-authored-by: Andy Asp <90626759+andyasp@users.noreply.github.com> --- CHANGELOG.md | 2 + cmd/mimir/config-descriptor.json | 21 + cmd/mimir/help-all.txt.tmpl | 2 + .../configuration-parameters/index.md | 5 + integration/backfill_test.go | 6 +- pkg/compactor/block_upload.go | 235 +++++++++-- pkg/compactor/block_upload_test.go | 385 ++++++++++++++---- pkg/compactor/compactor.go | 3 + .../bucket/error_injected_bucket_client.go | 86 ++++ 9 files changed, 636 insertions(+), 109 deletions(-) create mode 100644 pkg/storage/bucket/error_injected_bucket_client.go diff --git a/CHANGELOG.md b/CHANGELOG.md index ddf295c731b..a36dbe47d6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. * [ENHANCEMENT] Compactor: improve the partial block check related to `compactor.partial-block-deletion-delay` to potentially issue less requests to object storage. #4246 * [ENHANCEMENT] Memcached: added `-*.memcached.min-idle-connections-headroom-percentage` support to configure the minimum number of idle connections to keep open as a percentage (0-100) of the number of recently used idle connections. This feature is disabled when set to a negative value (default), which means idle connections are kept open indefinitely. #4249 * [ENHANCEMENT] Querier and store-gateway: optimized regular expression label matchers with case insensitive alternate operator. #4340 #4357 +* [ENHANCEMENT] Compactor: added the experimental flag `-compactor.block-upload.block-validation-enabled` with the default `true` to configure whether block validation occurs on backfilled blocks. #3411 * [BUGFIX] Ingester: remove series from ephemeral storage even if there are no persistent series. #4052 * [BUGFIX] Store-gateway: return `Canceled` rather than `Aborted` or `Internal` error when the calling querier cancels a label names or values request, and return `Internal` if processing the request fails for another reason. #4061 * [BUGFIX] Ingester: reuse memory when ingesting ephemeral series. #4072 @@ -86,6 +87,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. ### Mimirtool +* [CHANGE] Blocks uploaded with the `mimirtool backfill` command are now by default verified by a compactor before the upload is finalized. #3411 * [FEATURE] Added `keep_firing_for` support to rules configuration. #4099 * [ENHANCEMENT] Add `-tls-insecure-skip-verify` to rules, alertmanager and backfill commands. #4162 diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 3742aa9949d..630fe762cc6 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -6703,6 +6703,27 @@ "fieldFlag": "compactor.compaction-jobs-order", "fieldType": "string", "fieldCategory": "advanced" + }, + { + "kind": "block", + "name": "block_upload", + "required": false, + "desc": "", + "blockEntries": [ + { + "kind": "field", + "name": "block_validation_enabled", + "required": false, + "desc": "Validate blocks before finalizing a block upload", + "fieldValue": null, + "fieldDefaultValue": true, + "fieldFlag": "compactor.block-upload.block-validation-enabled", + "fieldType": "boolean", + "fieldCategory": "experimental" + } + ], + "fieldValue": null, + "fieldDefaultValue": null } ], "fieldValue": null, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 0d353affd4b..a4086fb0a64 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -629,6 +629,8 @@ Usage of ./cmd/mimir/mimir: Number of Go routines to use when downloading blocks for compaction and uploading resulting blocks. (default 8) -compactor.block-upload-enabled Enable block upload API for the tenant. + -compactor.block-upload.block-validation-enabled + [experimental] Validate blocks before finalizing a block upload (default true) -compactor.blocks-retention-period duration Delete blocks containing samples older than the specified retention period. Also used by query-frontend to avoid querying beyond the retention period. 0 to disable. -compactor.cleanup-concurrency int diff --git a/docs/sources/mimir/references/configuration-parameters/index.md b/docs/sources/mimir/references/configuration-parameters/index.md index c603aaf328e..405a0422fa9 100644 --- a/docs/sources/mimir/references/configuration-parameters/index.md +++ b/docs/sources/mimir/references/configuration-parameters/index.md @@ -3464,6 +3464,11 @@ sharding_ring: # smallest-range-oldest-blocks-first, newest-blocks-first. # CLI flag: -compactor.compaction-jobs-order [compaction_jobs_order: | default = "smallest-range-oldest-blocks-first"] + +block_upload: + # (experimental) Validate blocks before finalizing a block upload + # CLI flag: -compactor.block-upload.block-validation-enabled + [block_validation_enabled: | default = true] ``` ### store_gateway diff --git a/integration/backfill_test.go b/integration/backfill_test.go index 4224b8f517b..0d183a196d5 100644 --- a/integration/backfill_test.go +++ b/integration/backfill_test.go @@ -71,9 +71,13 @@ func TestMimirtoolBackfill(t *testing.T) { consul := e2edb.NewConsul() minio := e2edb.NewMinio(9000, mimirBucketName) require.NoError(t, s.StartAndWaitReady(consul, minio)) + dataDir := filepath.Join(s.SharedDir(), "data") + err = os.Mkdir(dataDir, os.ModePerm) + require.NoError(t, err) - // Configure the compactor with runtime config (for overrides) + // Configure the compactor with a data directory and runtime config (for overrides) flags := mergeFlags(CommonStorageBackendFlags(), BlocksStorageFlags(), map[string]string{ + "-compactor.data-dir": filepath.Join(e2e.ContainerSharedDir, "data"), "-runtime-config.reload-period": "100ms", "-runtime-config.file": filepath.Join(e2e.ContainerSharedDir, overridesFile), }) diff --git a/pkg/compactor/block_upload.go b/pkg/compactor/block_upload.go index 9975a5fbfed..8a4bd0ee6d6 100644 --- a/pkg/compactor/block_upload.go +++ b/pkg/compactor/block_upload.go @@ -6,9 +6,13 @@ import ( "bytes" "context" "encoding/json" + "flag" "fmt" "net/http" + "os" "path" + "path/filepath" + "sync" "time" "github.com/go-kit/log" @@ -30,12 +34,21 @@ import ( util_log "github.com/grafana/mimir/pkg/util/log" ) -// Name of file where we store a block's meta file while it's being uploaded. const ( - uploadingMetaFilename = "uploading-meta.json" - validationFilename = "validation.json" + uploadingMetaFilename = "uploading-meta.json" // Name of the file that stores a block's meta file while it's being uploaded + validationFilename = "validation.json" // Name of the file that stores a heartbeat time and possibly an error message + validationHeartbeatInterval = 1 * time.Minute // Duration of time between heartbeats of an in-progress block upload validation + validationHeartbeatTimeout = 5 * time.Minute // Maximum duration of time to wait until a validation is able to be restarted ) +type BlockUploadConfig struct { + BlockValidationEnabled bool `yaml:"block_validation_enabled" category:"experimental"` +} + +func (cfg *BlockUploadConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet, logger log.Logger) { + f.BoolVar(&cfg.BlockValidationEnabled, prefix+".block-validation-enabled", true, "Validate blocks before finalizing a block upload") +} + var rePath = regexp.MustCompile(`^(index|chunks/\d{6})$`) // StartBlockUpload handles request for starting block upload. @@ -99,9 +112,21 @@ func (c *MultitenantCompactor) FinishBlockUpload(w http.ResponseWriter, r *http. return } - if err := c.completeBlockUpload(ctx, logger, userBkt, blockID, *m); err != nil { - writeBlockUploadError(err, op, "", logger, w) - return + if c.compactorCfg.BlockUpload.BlockValidationEnabled { + // create validation file to signal that block validation has started + if err := c.uploadValidation(ctx, blockID, userBkt); err != nil { + writeBlockUploadError(err, op, "while creating validation file", logger, w) + return + } + go c.validateAndCompleteBlockUpload(logger, userBkt, blockID, m, func(ctx context.Context) error { + return c.validateBlock(ctx, blockID, userBkt, m) + }) + } else { + if err := c.markBlockComplete(ctx, logger, userBkt, blockID, m); err != nil { + writeBlockUploadError(err, op, "uploading meta file", logger, w) + return + } + level.Debug(logger).Log("msg", "successfully completed block upload") } w.WriteHeader(http.StatusOK) @@ -175,11 +200,10 @@ func (c *MultitenantCompactor) createBlockUpload(ctx context.Context, r *http.Re } } - return c.uploadMeta(ctx, logger, meta, blockID, uploadingMetaFilename, userBkt) + return c.uploadMeta(ctx, logger, &meta, blockID, uploadingMetaFilename, userBkt) } // UploadBlockFile handles requests for uploading block files. -// // It takes the mandatory query parameter "path", specifying the file's destination path. func (c *MultitenantCompactor) UploadBlockFile(w http.ResponseWriter, r *http.Request) { blockID, tenantID, err := c.parseBlockUploadParameters(r) @@ -262,21 +286,64 @@ func (c *MultitenantCompactor) UploadBlockFile(w http.ResponseWriter, r *http.Re w.WriteHeader(http.StatusOK) } -func (c *MultitenantCompactor) completeBlockUpload(ctx context.Context, logger log.Logger, userBkt objstore.Bucket, blockID ulid.ULID, meta metadata.Meta) error { +func (c *MultitenantCompactor) validateAndCompleteBlockUpload(logger log.Logger, userBkt objstore.Bucket, blockID ulid.ULID, meta *metadata.Meta, validation func(context.Context) error) { level.Debug(logger).Log("msg", "completing block upload", "files", len(meta.Thanos.Files)) - // Upload meta file so block is considered complete + { + var wg sync.WaitGroup + ctx, cancel := context.WithCancel(context.Background()) + + // start a go routine that updates the validation file's timestamp every heartbeat interval + wg.Add(1) + go func() { + defer wg.Done() + c.periodicValidationUpdater(ctx, logger, blockID, userBkt, cancel, validationHeartbeatInterval) + }() + + if err := validation(ctx); err != nil { + level.Error(logger).Log("msg", "error while validating block", "err", err) + cancel() + wg.Wait() + err := c.uploadValidationWithError(context.Background(), blockID, userBkt, err.Error()) + if err != nil { + level.Error(logger).Log("msg", "error updating validation file after failed block validation", "err", err) + } + return + } + + cancel() + wg.Wait() // use waitgroup to ensure validation ts update is complete + } + + ctx := context.Background() + + if err := c.markBlockComplete(ctx, logger, userBkt, blockID, meta); err != nil { + if err := c.uploadValidationWithError(ctx, blockID, userBkt, err.Error()); err != nil { + level.Error(logger).Log("msg", "error updating validation file after upload of metadata file failed", "err", err) + } + return + } + + if err := userBkt.Delete(ctx, path.Join(blockID.String(), validationFilename)); err != nil { + level.Warn(logger).Log("msg", fmt.Sprintf( + "failed to delete %s from block in object storage", validationFilename), "err", err) + return + } + + level.Debug(logger).Log("msg", "successfully completed block upload") +} + +func (c *MultitenantCompactor) markBlockComplete(ctx context.Context, logger log.Logger, userBkt objstore.Bucket, blockID ulid.ULID, meta *metadata.Meta) error { if err := c.uploadMeta(ctx, logger, meta, blockID, block.MetaFilename, userBkt); err != nil { + level.Error(logger).Log("msg", "error uploading block metadata file", "err", err) return err } if err := userBkt.Delete(ctx, path.Join(blockID.String(), uploadingMetaFilename)); err != nil { - level.Warn(logger).Log("msg", fmt.Sprintf( - "failed to delete %s from block in object storage", uploadingMetaFilename), "err", err) - return nil + // Not returning an error since the temporary meta file persisting is a harmless side effect + level.Warn(logger).Log("msg", fmt.Sprintf("failed to delete %s from block in object storage", uploadingMetaFilename), "err", err) } - level.Debug(logger).Log("msg", "successfully completed block upload") return nil } @@ -284,7 +351,6 @@ func (c *MultitenantCompactor) completeBlockUpload(ctx context.Context, logger l // message gets returned, otherwise an empty string. func (c *MultitenantCompactor) sanitizeMeta(logger log.Logger, blockID ulid.ULID, meta *metadata.Meta) string { meta.ULID = blockID - for l, v := range meta.Thanos.Labels { switch l { // Preserve this label @@ -350,8 +416,10 @@ func (c *MultitenantCompactor) sanitizeMeta(logger log.Logger, blockID ulid.ULID return "" } -func (c *MultitenantCompactor) uploadMeta(ctx context.Context, logger log.Logger, meta metadata.Meta, - blockID ulid.ULID, name string, userBkt objstore.Bucket) error { +func (c *MultitenantCompactor) uploadMeta(ctx context.Context, logger log.Logger, meta *metadata.Meta, blockID ulid.ULID, name string, userBkt objstore.Bucket) error { + if meta == nil { + return errors.New("missing block metadata") + } dst := path.Join(blockID.String(), name) level.Debug(logger).Log("msg", fmt.Sprintf("uploading %s to bucket", name), "dst", dst) buf := bytes.NewBuffer(nil) @@ -365,6 +433,80 @@ func (c *MultitenantCompactor) uploadMeta(ctx context.Context, logger log.Logger return nil } +func (c *MultitenantCompactor) createTemporaryBlockDirectory() (dir string, err error) { + blockDir, err := os.MkdirTemp(c.compactorCfg.DataDir, "upload") + if err != nil { + level.Error(c.logger).Log("msg", "failed to create temporary block directory", "err", err) + return "", errors.New("failed to create temporary block directory") + } + + level.Debug(c.logger).Log("msg", "created temporary block directory", "dir", blockDir) + return blockDir, nil +} + +func (c *MultitenantCompactor) removeTemporaryBlockDirectory(blockDir string) { + level.Debug(c.logger).Log("msg", "removing temporary block directory", "dir", blockDir) + if err := os.RemoveAll(blockDir); err != nil { + level.Warn(c.logger).Log("msg", "failed to remove temporary block directory", "path", blockDir, "err", err) + } +} + +func (c *MultitenantCompactor) prepareBlockForValidation(ctx context.Context, userBkt objstore.Bucket, blockID ulid.ULID) (string, error) { + blockDir, err := c.createTemporaryBlockDirectory() + if err != nil { + return "", err + } + + // download the block to local storage + level.Debug(c.logger).Log("msg", "downloading block from bucket", "block", blockID.String()) + err = objstore.DownloadDir(ctx, c.logger, userBkt, blockID.String(), blockID.String(), blockDir) + if err != nil { + c.removeTemporaryBlockDirectory(blockDir) + return "", errors.Wrap(err, "failed to download block") + } + + // rename the temporary meta file name to the expected one locally so that the block can be inspected + err = os.Rename(filepath.Join(blockDir, uploadingMetaFilename), filepath.Join(blockDir, block.MetaFilename)) + if err != nil { + level.Warn(c.logger).Log("msg", "could not rename temporary metadata file", "block", blockID.String(), "err", err) + c.removeTemporaryBlockDirectory(blockDir) + return "", errors.New("failed renaming while preparing block for validation") + } + + return blockDir, nil +} + +func (c *MultitenantCompactor) validateBlock(ctx context.Context, blockID ulid.ULID, userBkt objstore.Bucket, meta *metadata.Meta) error { + blockDir, err := c.prepareBlockForValidation(ctx, userBkt, blockID) + if err != nil { + return err + } + defer c.removeTemporaryBlockDirectory(blockDir) + + blockMetadata, err := metadata.ReadFromDir(blockDir) + if err != nil { + return errors.Wrap(err, "error reading block metadata file") + } + + // check that all files listed in the metadata are present and the correct size + for _, f := range blockMetadata.Thanos.Files { + fi, err := os.Stat(filepath.Join(blockDir, filepath.FromSlash(f.RelPath))) + if err != nil { + return errors.Wrapf(err, "failed to stat %s", f.RelPath) + } + + if !fi.Mode().IsRegular() { + return errors.Errorf("not a file: %s", f.RelPath) + } + + if f.RelPath != block.MetaFilename && fi.Size() != f.SizeBytes { + return errors.Errorf("file size mismatch for %s", f.RelPath) + } + } + + return nil +} + type httpError struct { message string statusCode int @@ -397,7 +539,10 @@ type validationFile struct { Error string // Error message if validation failed. } -const validationFileStaleTimeout = 5 * time.Minute +type blockUploadStateResult struct { + State string `json:"result"` + Error string `json:"error,omitempty"` +} type blockUploadState int @@ -430,12 +575,7 @@ func (c *MultitenantCompactor) GetBlockUploadStateHandler(w http.ResponseWriter, return } - type result struct { - State string `json:"result"` - Error string `json:"error,omitempty"` - } - - res := result{} + res := blockUploadStateResult{} switch s { case blockIsComplete: @@ -516,7 +656,7 @@ func (c *MultitenantCompactor) getBlockUploadState(ctx context.Context, userBkt if v.Error != "" { return blockValidationFailed, meta, v, err } - if time.Since(time.UnixMilli(v.LastUpdate)) < validationFileStaleTimeout { + if time.Since(time.UnixMilli(v.LastUpdate)) < validationHeartbeatTimeout { return blockValidationInProgress, meta, v, nil } return blockValidationStale, meta, v, nil @@ -559,3 +699,48 @@ func (c *MultitenantCompactor) loadValidation(ctx context.Context, userBkt objst return v, nil } + +func (c *MultitenantCompactor) uploadValidationWithError(ctx context.Context, blockID ulid.ULID, + userBkt objstore.Bucket, errorStr string) error { + val := validationFile{ + LastUpdate: time.Now().UnixMilli(), + Error: errorStr, + } + dst := path.Join(blockID.String(), validationFilename) + if err := marshalAndUploadToBucket(ctx, userBkt, dst, val); err != nil { + return errors.Wrapf(err, "failed uploading %s to bucket", validationFilename) + } + return nil +} + +func (c *MultitenantCompactor) uploadValidation(ctx context.Context, blockID ulid.ULID, userBkt objstore.Bucket) error { + return c.uploadValidationWithError(ctx, blockID, userBkt, "") +} + +func (c *MultitenantCompactor) periodicValidationUpdater(ctx context.Context, logger log.Logger, blockID ulid.ULID, userBkt objstore.Bucket, cancelFn func(), interval time.Duration) { + ticker := time.NewTicker(interval) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + if err := c.uploadValidation(ctx, blockID, userBkt); err != nil { + level.Warn(logger).Log("msg", "error during periodic update of validation file", "err", err) + cancelFn() + return + } + } + } +} + +func marshalAndUploadToBucket(ctx context.Context, bkt objstore.Bucket, pth string, val interface{}) error { + buf, err := json.Marshal(val) + if err != nil { + return err + } + if err := bkt.Upload(ctx, pth, bytes.NewReader(buf)); err != nil { + return err + } + return nil +} diff --git a/pkg/compactor/block_upload_test.go b/pkg/compactor/block_upload_test.go index adaada2f883..75d6d704ff4 100644 --- a/pkg/compactor/block_upload_test.go +++ b/pkg/compactor/block_upload_test.go @@ -13,12 +13,15 @@ import ( "net/url" "path" "strings" + "sync" "testing" "time" "github.com/go-kit/log" "github.com/gorilla/mux" + "github.com/grafana/dskit/test" "github.com/oklog/ulid" + "github.com/pkg/errors" "github.com/prometheus/prometheus/tsdb" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -47,8 +50,8 @@ func verifyUploadedMeta(t *testing.T, bkt *bucket.ClientMock, expMeta metadata.M assert.Equal(t, expMeta, gotMeta) } -// Test MultitenantCompactor.HandleBlockUpload with uploadComplete=false (the default). -func TestMultitenantCompactor_HandleBlockUpload_Create(t *testing.T) { +// Test MultitenantCompactor.StartBlockUpload +func TestMultitenantCompactor_StartBlockUpload(t *testing.T) { const tenantID = "test" const blockID = "01G3FZ0JWJYJC0ZM6Y9778P6KD" bULID := ulid.MustParse(blockID) @@ -680,7 +683,7 @@ func TestMultitenantCompactor_HandleBlockUpload_Create(t *testing.T) { } } -// Test MultitenantCompactor.UploadBlockFile. +// Test MultitenantCompactor.UploadBlockFile func TestMultitenantCompactor_UploadBlockFile(t *testing.T) { const tenantID = "test" const blockID = "01G3FZ0JWJYJC0ZM6Y9778P6KD" @@ -1056,16 +1059,17 @@ func setUpGet(bkt *bucket.ClientMock, pth string, content []byte, err error) { }) } -// Test MultitenantCompactor.HandleBlockUpload with uploadComplete=true. -func TestMultitenantCompactor_HandleBlockUpload_Complete(t *testing.T) { +// Test MultitenantCompactor.FinishBlockUpload +func TestMultitenantCompactor_FinishBlockUpload(t *testing.T) { const tenantID = "test" const blockID = "01G3FZ0JWJYJC0ZM6Y9778P6KD" uploadingMetaPath := path.Join(tenantID, blockID, uploadingMetaFilename) - validationPath := path.Join(tenantID, blockID, validationFilename) metaPath := path.Join(tenantID, blockID, block.MetaFilename) + injectedError := fmt.Errorf("injected error") validMeta := metadata.Meta{ BlockMeta: tsdb.BlockMeta{ - ULID: ulid.MustParse(blockID), + Version: metadata.TSDBVersion1, + ULID: ulid.MustParse(blockID), }, Thanos: metadata.Thanos{ Labels: map[string]string{ @@ -1078,33 +1082,33 @@ func TestMultitenantCompactor_HandleBlockUpload_Complete(t *testing.T) { }, { RelPath: "chunks/000001", - SizeBytes: 1024, + SizeBytes: 2, }, }, }, } - setUpSuccessfulComplete := func(bkt *bucket.ClientMock) { - metaJSON, err := json.Marshal(validMeta) + validSetup := func(t *testing.T, bkt objstore.Bucket) { + err := marshalAndUploadToBucket(context.Background(), bkt, uploadingMetaPath, validMeta) require.NoError(t, err) - bkt.MockExists(metaPath, false, nil) - setUpGet(bkt, uploadingMetaPath, metaJSON, nil) - setUpGet(bkt, validationPath, nil, bucket.ErrObjectDoesNotExist) - bkt.MockUpload(metaPath, nil) - bkt.MockDelete(uploadingMetaPath, nil) + for _, file := range validMeta.Thanos.Files { + content := bytes.NewReader(make([]byte, file.SizeBytes)) + err = bkt.Upload(context.Background(), path.Join(tenantID, blockID, file.RelPath), content) + require.NoError(t, err) + } } + testCases := []struct { name string tenantID string blockID string + setUpBucket func(*testing.T, objstore.Bucket) + errorInjector func(op bucket.Operation, name string) error disableBlockUpload bool - expMeta metadata.Meta expBadRequest string expConflict string expNotFound string expInternalServerError bool - setUpBucketMock func(bkt *bucket.ClientMock) - verifyUpload func(*testing.T, *bucket.ClientMock, metadata.Meta) }{ { name: "without tenant ID", @@ -1133,102 +1137,79 @@ func TestMultitenantCompactor_HandleBlockUpload_Complete(t *testing.T) { name: "complete block already exists", tenantID: tenantID, blockID: blockID, - setUpBucketMock: func(bkt *bucket.ClientMock) { - bkt.MockExists(metaPath, true, nil) + setUpBucket: func(t *testing.T, bkt objstore.Bucket) { + err := marshalAndUploadToBucket(context.Background(), bkt, metaPath, validMeta) + require.NoError(t, err) }, expConflict: "block already exists", }, { - name: "checking for complete block fails", - tenantID: tenantID, - blockID: blockID, - setUpBucketMock: func(bkt *bucket.ClientMock) { - bkt.MockExists(metaPath, false, fmt.Errorf("test")) - }, + name: "checking for complete block fails", + tenantID: tenantID, + blockID: blockID, + errorInjector: bucket.InjectErrorOn(bucket.OpExists, metaPath, injectedError), expInternalServerError: true, }, { - name: "missing in-flight meta file", - tenantID: tenantID, - blockID: blockID, - setUpBucketMock: func(bkt *bucket.ClientMock) { - bkt.MockExists(metaPath, false, nil) - setUpGet(bkt, uploadingMetaPath, nil, bucket.ErrObjectDoesNotExist) - }, + name: "missing in-flight meta file", + tenantID: tenantID, + blockID: blockID, expNotFound: "block upload not started", }, { name: "downloading in-flight meta file fails", tenantID: tenantID, blockID: blockID, - setUpBucketMock: func(bkt *bucket.ClientMock) { - bkt.MockExists(metaPath, false, nil) - setUpGet(bkt, uploadingMetaPath, nil, fmt.Errorf("test")) + setUpBucket: func(t *testing.T, bkt objstore.Bucket) { + err := marshalAndUploadToBucket(context.Background(), bkt, uploadingMetaPath, validMeta) + require.NoError(t, err) }, + errorInjector: bucket.InjectErrorOn(bucket.OpGet, uploadingMetaPath, injectedError), expInternalServerError: true, }, { name: "corrupt in-flight meta file", tenantID: tenantID, blockID: blockID, - setUpBucketMock: func(bkt *bucket.ClientMock) { - bkt.MockExists(metaPath, false, nil) - setUpGet(bkt, uploadingMetaPath, []byte("{"), nil) - }, - expInternalServerError: true, - }, - { - name: "uploading meta file fails", - tenantID: tenantID, - blockID: blockID, - setUpBucketMock: func(bkt *bucket.ClientMock) { - bkt.MockExists(metaPath, false, nil) - metaJSON, err := json.Marshal(validMeta) + setUpBucket: func(t *testing.T, bkt objstore.Bucket) { + err := bkt.Upload(context.Background(), uploadingMetaPath, bytes.NewReader([]byte("{"))) require.NoError(t, err) - setUpGet(bkt, uploadingMetaPath, metaJSON, nil) - setUpGet(bkt, validationPath, nil, bucket.ErrObjectDoesNotExist) - bkt.MockUpload(metaPath, fmt.Errorf("test")) }, expInternalServerError: true, }, { - name: "removing in-flight meta file fails", - tenantID: tenantID, - blockID: blockID, - setUpBucketMock: func(bkt *bucket.ClientMock) { - bkt.MockExists(metaPath, false, nil) - metaJSON, err := json.Marshal(validMeta) - require.NoError(t, err) - setUpGet(bkt, uploadingMetaPath, metaJSON, nil) - setUpGet(bkt, validationPath, nil, bucket.ErrObjectDoesNotExist) - bkt.MockUpload(metaPath, nil) - bkt.MockDelete(uploadingMetaPath, fmt.Errorf("test")) - }, - expMeta: validMeta, - verifyUpload: verifyUploadedMeta, - }, - { - name: "valid request", - tenantID: tenantID, - blockID: blockID, - setUpBucketMock: setUpSuccessfulComplete, - expMeta: validMeta, - verifyUpload: verifyUploadedMeta, + name: "uploading meta file fails", + tenantID: tenantID, + blockID: blockID, + setUpBucket: validSetup, + errorInjector: bucket.InjectErrorOn(bucket.OpUpload, metaPath, injectedError), + expInternalServerError: true, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - var bkt bucket.ClientMock - if tc.setUpBucketMock != nil { - tc.setUpBucketMock(&bkt) + bkt := objstore.NewInMemBucket() + injectedBkt := bucket.ErrorInjectedBucketClient{ + Bucket: bkt, + Injector: tc.errorInjector, } + if tc.setUpBucket != nil { + tc.setUpBucket(t, bkt) + } + cfgProvider := newMockConfigProvider() cfgProvider.blockUploadEnabled[tc.tenantID] = !tc.disableBlockUpload c := &MultitenantCompactor{ logger: log.NewNopLogger(), - bucketClient: &bkt, + bucketClient: &injectedBkt, cfgProvider: cfgProvider, } + + c.compactorCfg.BlockUpload = BlockUploadConfig{ + BlockValidationEnabled: false, + } + c.compactorCfg.DataDir = t.TempDir() + r := httptest.NewRequest(http.MethodPost, fmt.Sprintf("/api/v1/upload/block/%s/finish", tc.blockID), nil) if tc.tenantID != "" { r = r.WithContext(user.InjectOrgID(r.Context(), tenantID)) @@ -1259,13 +1240,252 @@ func TestMultitenantCompactor_HandleBlockUpload_Complete(t *testing.T) { default: assert.Equal(t, http.StatusOK, resp.StatusCode) assert.Empty(t, string(body)) + exists, err := bkt.Exists(context.Background(), path.Join(tc.blockID, block.MetaFilename)) + require.NoError(t, err) + require.True(t, exists) } + }) + } +} - bkt.AssertExpectations(t) +func TestMultitenantCompactor_ValidateAndComplete(t *testing.T) { + const tenantID = "test" + const blockID = "01G3FZ0JWJYJC0ZM6Y9778P6KD" + injectedError := fmt.Errorf("injected error") - if tc.verifyUpload != nil { - tc.verifyUpload(t, &bkt, tc.expMeta) + uploadingMetaPath := path.Join(tenantID, blockID, uploadingMetaFilename) + validationPath := path.Join(tenantID, blockID, validationFilename) + metaPath := path.Join(tenantID, blockID, block.MetaFilename) + + validationSucceeds := func(_ context.Context) error { return nil } + + testCases := []struct { + name string + errorInjector func(op bucket.Operation, name string) error + validation func(context.Context) error + expectValidationFile bool + expectErrorInValidationFile bool + expectTempUploadingMeta bool + expectMeta bool + }{ + { + name: "validation fails", + validation: func(_ context.Context) error { return injectedError }, + expectValidationFile: true, + expectErrorInValidationFile: true, + expectTempUploadingMeta: true, + expectMeta: false, + }, + { + name: "validation fails, uploading error fails", + errorInjector: bucket.InjectErrorOn(bucket.OpUpload, validationPath, injectedError), + validation: func(_ context.Context) error { return injectedError }, + expectValidationFile: true, + expectErrorInValidationFile: false, + expectTempUploadingMeta: true, + expectMeta: false, + }, + { + name: "uploading meta file fails", + errorInjector: bucket.InjectErrorOn(bucket.OpUpload, metaPath, injectedError), + validation: validationSucceeds, + expectValidationFile: true, + expectErrorInValidationFile: true, + expectTempUploadingMeta: true, + expectMeta: false, + }, + { + name: "uploading meta file fails, uploading error fails", + errorInjector: func(op bucket.Operation, target string) error { + if op == bucket.OpUpload && (target == metaPath || target == validationPath) { + return injectedError + } + return nil + }, + validation: validationSucceeds, + expectValidationFile: true, + expectErrorInValidationFile: false, + expectTempUploadingMeta: true, + expectMeta: false, + }, + { + name: "removing in-flight meta file fails", + errorInjector: bucket.InjectErrorOn(bucket.OpDelete, uploadingMetaPath, injectedError), + validation: validationSucceeds, + expectValidationFile: false, + expectTempUploadingMeta: true, + expectMeta: true, + }, + { + name: "removing validation file fails", + errorInjector: bucket.InjectErrorOn(bucket.OpDelete, validationPath, injectedError), + validation: validationSucceeds, + expectValidationFile: true, + expectErrorInValidationFile: false, + expectTempUploadingMeta: false, + expectMeta: true, + }, + { + name: "valid request", + validation: validationSucceeds, + expectValidationFile: false, + expectTempUploadingMeta: false, + expectMeta: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + bkt := objstore.NewInMemBucket() + var injectedBkt objstore.Bucket = bkt + if tc.errorInjector != nil { + injectedBkt = &bucket.ErrorInjectedBucketClient{ + Bucket: bkt, + Injector: tc.errorInjector, + } + } + cfgProvider := newMockConfigProvider() + c := &MultitenantCompactor{ + logger: log.NewNopLogger(), + bucketClient: injectedBkt, + cfgProvider: cfgProvider, } + userBkt := bucket.NewUserBucketClient(tenantID, injectedBkt, cfgProvider) + + meta := metadata.Meta{} + marshalAndUploadJSON(t, bkt, uploadingMetaPath, meta) + v := validationFile{} + marshalAndUploadJSON(t, bkt, validationPath, v) + + c.validateAndCompleteBlockUpload(log.NewNopLogger(), userBkt, ulid.MustParse(blockID), &meta, tc.validation) + + tempUploadingMetaExists, err := bkt.Exists(context.Background(), uploadingMetaPath) + require.NoError(t, err) + require.Equal(t, tempUploadingMetaExists, tc.expectTempUploadingMeta) + + metaExists, err := bkt.Exists(context.Background(), metaPath) + require.NoError(t, err) + require.Equal(t, metaExists, tc.expectMeta) + + if !tc.expectValidationFile { + exists, err := bkt.Exists(context.Background(), validationPath) + require.NoError(t, err) + require.False(t, exists) + return + } + + r, err := bkt.Get(context.Background(), validationPath) + require.NoError(t, err) + decoder := json.NewDecoder(r) + err = decoder.Decode(&v) + require.NoError(t, err) + + if tc.expectErrorInValidationFile { + require.NotEmpty(t, v.Error) + } else { + require.Empty(t, v.Error) + } + }) + } +} + +func TestMultitenantCompactor_PeriodicValidationUpdater(t *testing.T) { + const tenantID = "test" + const blockID = "01G3FZ0JWJYJC0ZM6Y9778P6KD" + injectedError := fmt.Errorf("injected error") + validationPath := path.Join(tenantID, blockID, validationFilename) + + heartbeatInterval := 50 * time.Millisecond + + validationExists := func(t *testing.T, bkt objstore.Bucket) bool { + exists, err := bkt.Exists(context.Background(), validationPath) + require.NoError(t, err) + return exists + } + + testCases := []struct { + name string + errorInjector func(op bucket.Operation, name string) error + cancelContext bool + assertions func(t *testing.T, ctx context.Context, bkt objstore.Bucket) + }{ + { + name: "updating validation file fails", + errorInjector: bucket.InjectErrorOn(bucket.OpUpload, validationPath, injectedError), + assertions: func(t *testing.T, ctx context.Context, bkt objstore.Bucket) { + <-ctx.Done() + require.True(t, errors.Is(context.Canceled, ctx.Err())) + require.False(t, validationExists(t, bkt)) + }, + }, + { + name: "updating validation file succeeds", + assertions: func(t *testing.T, ctx context.Context, bkt objstore.Bucket) { + test.Poll(t, heartbeatInterval*2, true, func() interface{} { + return validationExists(t, bkt) + }) + + v := validationFile{} + r, err := bkt.Get(context.Background(), validationPath) + require.NoError(t, err) + decoder := json.NewDecoder(r) + err = decoder.Decode(&v) + require.NoError(t, err) + require.NotEqual(t, 0, v.LastUpdate) + require.Empty(t, v.Error) + }, + }, + { + name: "context cancelled before update", + cancelContext: true, + assertions: func(t *testing.T, ctx context.Context, bkt objstore.Bucket) { + require.False(t, validationExists(t, bkt)) + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + + bkt := objstore.NewInMemBucket() + var injectedBkt objstore.Bucket = bkt + if tc.errorInjector != nil { + injectedBkt = &bucket.ErrorInjectedBucketClient{ + Bucket: bkt, + Injector: tc.errorInjector, + } + } + + cfgProvider := newMockConfigProvider() + c := &MultitenantCompactor{ + logger: log.NewNopLogger(), + bucketClient: injectedBkt, + cfgProvider: cfgProvider, + } + userBkt := bucket.NewUserBucketClient(tenantID, injectedBkt, cfgProvider) + ctx, cancel := context.WithCancel(context.Background()) + + heartbeatInterval := heartbeatInterval + if tc.cancelContext { + cancel() + heartbeatInterval = 1 * time.Hour // to avoid racing a heartbeat + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + c.periodicValidationUpdater(ctx, log.NewNopLogger(), ulid.MustParse(blockID), userBkt, cancel, heartbeatInterval) + }() + + if !tc.cancelContext { + time.Sleep(heartbeatInterval) + } + + tc.assertions(t, ctx, bkt) + + cancel() + wg.Wait() }) } } @@ -1273,9 +1493,8 @@ func TestMultitenantCompactor_HandleBlockUpload_Complete(t *testing.T) { // marshalAndUploadJSON is a test helper for uploading a meta file to a certain path in a bucket. func marshalAndUploadJSON(t *testing.T, bkt objstore.Bucket, pth string, val interface{}) { t.Helper() - buf, err := json.Marshal(val) + err := marshalAndUploadToBucket(context.Background(), bkt, pth, val) require.NoError(t, err) - require.NoError(t, bkt.Upload(context.Background(), pth, bytes.NewReader(buf))) } func TestMultitenantCompactor_GetBlockUploadStateHandler(t *testing.T) { diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 1774f398a07..b68d913d448 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -116,11 +116,14 @@ type Config struct { // Allow downstream projects to customise the blocks compactor. BlocksGrouperFactory BlocksGrouperFactory `yaml:"-"` BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"` + + BlockUpload BlockUploadConfig `yaml:"block_upload"` } // RegisterFlags registers the MultitenantCompactor flags. func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) { cfg.ShardingRing.RegisterFlags(f, logger) + cfg.BlockUpload.RegisterFlagsWithPrefix("compactor.block-upload", f, logger) cfg.BlockRanges = mimir_tsdb.DurationList{2 * time.Hour, 12 * time.Hour, 24 * time.Hour} cfg.retryMinBackoff = 10 * time.Second diff --git a/pkg/storage/bucket/error_injected_bucket_client.go b/pkg/storage/bucket/error_injected_bucket_client.go new file mode 100644 index 00000000000..b1373282b55 --- /dev/null +++ b/pkg/storage/bucket/error_injected_bucket_client.go @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package bucket + +import ( + "context" + "io" + + "github.com/thanos-io/objstore" +) + +type Operation uint8 + +const ( + OpGet = 1 << iota + OpExists + OpUpload + OpDelete + OpIter + OpAttributes +) + +// InjectErrorOn injects the provided error if the operation matches the provided mask and target +func InjectErrorOn(mask Operation, target string, err error) func(Operation, string) error { + return func(op Operation, tar string) error { + if (op&mask) == op && tar == target { + return err + } + return nil + } +} + +// ErrorInjectedBucketClient facilitates injecting errors into a bucket client +type ErrorInjectedBucketClient struct { + objstore.Bucket + Injector func(Operation, string) error +} + +func (b *ErrorInjectedBucketClient) injectError(op Operation, name string) error { + if b.Injector == nil { + return nil + } + return b.Injector(op, name) +} + +func (b *ErrorInjectedBucketClient) Get(ctx context.Context, name string) (io.ReadCloser, error) { + if err := b.injectError(OpGet, name); err != nil { + return nil, err + } + return b.Bucket.Get(ctx, name) +} + +func (b *ErrorInjectedBucketClient) Exists(ctx context.Context, name string) (bool, error) { + if err := b.injectError(OpExists, name); err != nil { + return false, err + } + return b.Bucket.Exists(ctx, name) +} + +func (b *ErrorInjectedBucketClient) Upload(ctx context.Context, name string, r io.Reader) error { + if err := b.injectError(OpUpload, name); err != nil { + return err + } + return b.Bucket.Upload(ctx, name, r) +} + +func (b *ErrorInjectedBucketClient) Delete(ctx context.Context, name string) error { + if err := b.injectError(OpDelete, name); err != nil { + return err + } + return b.Bucket.Delete(ctx, name) +} + +func (b *ErrorInjectedBucketClient) Iter(ctx context.Context, dir string, f func(string) error, options ...objstore.IterOption) error { + if err := b.injectError(OpIter, dir); err != nil { + return err + } + return b.Bucket.Iter(ctx, dir, f, options...) +} + +func (b *ErrorInjectedBucketClient) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) { + if err := b.injectError(OpAttributes, name); err != nil { + return objstore.ObjectAttributes{}, err + } + return b.Bucket.Attributes(ctx, name) +} From a98e77c9751e934467de80a990cd81936e74f29a Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 3 Mar 2023 10:26:27 +0100 Subject: [PATCH 10/19] Jsonnet: honor the minimum shard size configured (#4363) Signed-off-by: Marco Pracucci --- CHANGELOG.md | 1 + operations/mimir/shuffle-sharding.libsonnet | 48 ++++++++++----------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a36dbe47d6d..4734c3816a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -84,6 +84,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. * [ENHANCEMENT] Add support for ruler auto-scaling. #4046 * [ENHANCEMENT] Add optional `weight` param to `newQuerierScaledObject` and `newRulerQuerierScaledObject` to allow running multiple querier deployments on different node types. #4141 * [ENHANCEMENT] Add support for query-frontend and ruler-query-frontend auto-scaling. #4199 +* [BUGFIX] Shuffle sharding: when applying user class limits, honor the minimum shard size configured in `$._config.shuffle_sharding.*`. #4363 ### Mimirtool diff --git a/operations/mimir/shuffle-sharding.libsonnet b/operations/mimir/shuffle-sharding.libsonnet index 2ce7e004866..ba05d82274a 100644 --- a/operations/mimir/shuffle-sharding.libsonnet +++ b/operations/mimir/shuffle-sharding.libsonnet @@ -37,58 +37,58 @@ // Target 300K active series. medium_small_user+:: { - ingestion_tenant_shard_size: 6, - store_gateway_tenant_shard_size: 3, - ruler_tenant_shard_size: 2, + ingestion_tenant_shard_size: std.max(6, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(3, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(2, $._config.shuffle_sharding.ruler_shard_size), }, // Target 1M active series. small_user+:: { - ingestion_tenant_shard_size: 15, - store_gateway_tenant_shard_size: 6, - ruler_tenant_shard_size: 2, + ingestion_tenant_shard_size: std.max(15, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(6, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(2, $._config.shuffle_sharding.ruler_shard_size), }, // Target 3M active series. medium_user+:: { - ingestion_tenant_shard_size: 45, - store_gateway_tenant_shard_size: 9, - ruler_tenant_shard_size: 2, + ingestion_tenant_shard_size: std.max(45, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(9, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(2, $._config.shuffle_sharding.ruler_shard_size), }, // Target 6M active series. big_user+:: { - ingestion_tenant_shard_size: 90, - store_gateway_tenant_shard_size: 12, - ruler_tenant_shard_size: 3, + ingestion_tenant_shard_size: std.max(90, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(12, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(3, $._config.shuffle_sharding.ruler_shard_size), }, // Target 12M active series. super_user+:: { - ingestion_tenant_shard_size: 180, - store_gateway_tenant_shard_size: 18, - ruler_tenant_shard_size: 6, + ingestion_tenant_shard_size: std.max(180, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(18, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(6, $._config.shuffle_sharding.ruler_shard_size), }, // Target 16M active series. mega_user+:: { - ingestion_tenant_shard_size: 240, - store_gateway_tenant_shard_size: 24, - ruler_tenant_shard_size: 8, + ingestion_tenant_shard_size: std.max(240, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(24, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(8, $._config.shuffle_sharding.ruler_shard_size), }, // Target 24M active series. user_24M+:: { - ingestion_tenant_shard_size: 360, - store_gateway_tenant_shard_size: 30, - ruler_tenant_shard_size: 8, + ingestion_tenant_shard_size: std.max(360, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(30, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(8, $._config.shuffle_sharding.ruler_shard_size), }, // Target 32M active series. user_32M+:: { - ingestion_tenant_shard_size: 480, - store_gateway_tenant_shard_size: 42, - ruler_tenant_shard_size: 12, + ingestion_tenant_shard_size: std.max(480, $._config.shuffle_sharding.ingester_shard_size), + store_gateway_tenant_shard_size: std.max(42, $._config.shuffle_sharding.store_gateway_shard_size), + ruler_tenant_shard_size: std.max(12, $._config.shuffle_sharding.ruler_shard_size), }, }, }, From 0100cc958307837f44d45e75b48eda326040723d Mon Sep 17 00:00:00 2001 From: ying-jeanne <74549700+ying-jeanne@users.noreply.github.com> Date: Fri, 3 Mar 2023 11:22:47 +0100 Subject: [PATCH 11/19] [CHANGE] Ruler: set default `evaluation-delay-duration` to 1m (#4250) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * change the default evaluation delay of ruler to 1m * revert the changes in ruler test * change integration test to set ruler default value * fix integration tests * Update integration/configs.go Co-authored-by: Marco Pracucci * Update CHANGELOG.md Co-authored-by: Peter Štibraný --------- Co-authored-by: Marco Pracucci Co-authored-by: Peter Štibraný --- CHANGELOG.md | 1 + cmd/mimir/config-descriptor.json | 2 +- cmd/mimir/help-all.txt.tmpl | 2 +- cmd/mimir/help.txt.tmpl | 2 +- .../sources/mimir/references/configuration-parameters/index.md | 2 +- integration/configs.go | 3 ++- pkg/util/validation/limits.go | 1 + 7 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4734c3816a8..6deb26d4e1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Grafana Mimir +* [CHANGE] Ruler: changed default value of `-ruler.evaluation-delay-duration` option from 0 to 1m. #4250 * [CHANGE] Querier: Errors with status code `422` coming from the store-gateway are propagated and not converted to the consistency check error anymore. #4100 * [CHANGE] Store-gateway: When a query hits `max_fetched_chunks_per_query` and `max_fetched_series_per_query` limits, an error with the status code `422` is created and returned. #4056 * [CHANGE] Packaging: Migrate FPM packaging solution to NFPM. Rationalize packages dependencies and add package for all binaries. #3911 diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 630fe762cc6..81a8bb20068 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -3275,7 +3275,7 @@ "required": false, "desc": "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed.", "fieldValue": null, - "fieldDefaultValue": 0, + "fieldDefaultValue": 60000000000, "fieldFlag": "ruler.evaluation-delay-duration", "fieldType": "duration" }, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index a4086fb0a64..82aa08a90cd 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -1756,7 +1756,7 @@ Usage of ./cmd/mimir/mimir: -ruler.enabled-tenants comma-separated-list-of-strings Comma separated list of tenants whose rules this ruler can evaluate. If specified, only these tenants will be handled by ruler, otherwise this ruler can process rules from all tenants. Subject to sharding. -ruler.evaluation-delay-duration duration - Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed. + Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed. (default 1m) -ruler.evaluation-interval duration How frequently to evaluate rules (default 1m0s) -ruler.external.url string diff --git a/cmd/mimir/help.txt.tmpl b/cmd/mimir/help.txt.tmpl index 0d2aa6071c9..0d179a45692 100644 --- a/cmd/mimir/help.txt.tmpl +++ b/cmd/mimir/help.txt.tmpl @@ -528,7 +528,7 @@ Usage of ./cmd/mimir/mimir: -ruler.enable-api Enable the ruler config API. (default true) -ruler.evaluation-delay-duration duration - Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed. + Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed. (default 1m) -ruler.external.url string URL of alerts return path. -ruler.max-rule-groups-per-tenant int diff --git a/docs/sources/mimir/references/configuration-parameters/index.md b/docs/sources/mimir/references/configuration-parameters/index.md index 405a0422fa9..63843a798c1 100644 --- a/docs/sources/mimir/references/configuration-parameters/index.md +++ b/docs/sources/mimir/references/configuration-parameters/index.md @@ -2710,7 +2710,7 @@ The `limits` block configures default and per-tenant limits imposed by component # Duration to delay the evaluation of rules to ensure the underlying metrics # have been pushed. # CLI flag: -ruler.evaluation-delay-duration -[ruler_evaluation_delay_duration: | default = 0s] +[ruler_evaluation_delay_duration: | default = 1m] # The tenant's shard size when sharding is used by ruler. Value of 0 disables # shuffle sharding for the tenant, and tenant rules will be sharded across all diff --git a/integration/configs.go b/integration/configs.go index 4411c783402..a362927685e 100644 --- a/integration/configs.go +++ b/integration/configs.go @@ -116,7 +116,8 @@ var ( RulerFlags = func() map[string]string { return map[string]string{ - "-ruler.poll-interval": "2s", + "-ruler.poll-interval": "2s", + "-ruler.evaluation-delay-duration": "0", } } diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 0a4272c0ac1..42d8d0ef9bb 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -226,6 +226,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.QueryShardingMaxShardedQueries, "query-frontend.query-sharding-max-sharded-queries", 128, "The max number of sharded queries that can be run for a given received query. 0 to disable limit.") f.Var(&l.SplitInstantQueriesByInterval, "query-frontend.split-instant-queries-by-interval", "Split instant queries by an interval and execute in parallel. 0 to disable it.") + _ = l.RulerEvaluationDelay.Set("1m") f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed.") f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The tenant's shard size when sharding is used by ruler. Value of 0 disables shuffle sharding for the tenant, and tenant rules will be sharded across all ruler replicas.") f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 20, "Maximum number of rules per rule group per-tenant. 0 to disable.") From ef7c1b3fad84715309d21d6665bc4528c363a02f Mon Sep 17 00:00:00 2001 From: ying-jeanne <74549700+ying-jeanne@users.noreply.github.com> Date: Fri, 3 Mar 2023 12:39:28 +0100 Subject: [PATCH 12/19] [Chore] Update jsonnet manifest create query frontend discovery only when it is necessary (#4353) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [Chore] update jsonnet manifest, avoid setting querier.frontend-address or create query-frontend-discovery when deployement mode is microserivces or query-scheduler is enabled * linter and changelog * Helm: fix parity with jsonnet on query frontend headless service Do not generate query-frontend-headless service if query scheduler is enabled Signed-off-by: György Krajcsovits * Apply suggestions from code review Co-authored-by: Marco Pracucci * correct changelog * regenerate helm golden files * Update CHANGELOG.md --------- Signed-off-by: György Krajcsovits Co-authored-by: György Krajcsovits Co-authored-by: Marco Pracucci --- CHANGELOG.md | 1 + .../charts/mimir-distributed/CHANGELOG.md | 1 + .../query-frontend-svc-headless.yaml | 2 ++ .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 30 ----------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../query-frontend-svc-headless.yaml | 32 ------------------- .../test-autoscaling-generated.yaml | 20 ------------ .../mimir-tests/test-consul-generated.yaml | 20 ------------ .../test-consul-multi-zone-generated.yaml | 20 ------------ .../test-consul-ruler-disabled-generated.yaml | 20 ------------ .../mimir-tests/test-defaults-generated.yaml | 20 ------------ ...t-deployment-mode-migration-generated.yaml | 23 ------------- ...est-disable-chunk-streaming-generated.yaml | 20 ------------ .../test-extra-runtime-config-generated.yaml | 20 ------------ .../test-helm-parity-generated.yaml | 20 ------------ ...bel-migration-step-0-before-generated.yaml | 20 ------------ ...ster-label-migration-step-1-generated.yaml | 20 ------------ ...ster-label-migration-step-2-generated.yaml | 20 ------------ ...ster-label-migration-step-3-generated.yaml | 20 ------------ ...ist-migration-step-0-before-generated.yaml | 20 ------------ ...memberlist-migration-step-1-generated.yaml | 20 ------------ ...memberlist-migration-step-2-generated.yaml | 20 ------------ ...memberlist-migration-step-3-generated.yaml | 20 ------------ ...memberlist-migration-step-4-generated.yaml | 20 ------------ ...memberlist-migration-step-5-generated.yaml | 20 ------------ ...list-migration-step-6-final-generated.yaml | 20 ------------ .../test-multi-zone-generated.yaml | 20 ------------ ...zone-with-ongoing-migration-generated.yaml | 20 ------------ ...query-scheduler-consul-ring-generated.yaml | 20 ------------ ...and-ruler-remote-evaluation-generated.yaml | 23 ------------- ...y-scheduler-memberlist-ring-generated.yaml | 23 ------------- ...ist-ring-read-path-disabled-generated.yaml | 20 ------------ .../test-query-sharding-generated.yaml | 20 ------------ ...est-ruler-remote-evaluation-generated.yaml | 20 ------------ ...remote-evaluation-migration-generated.yaml | 20 ------------ .../test-shuffle-sharding-generated.yaml | 20 ------------ ...sharding-read-path-disabled-generated.yaml | 20 ------------ .../test-storage-azure-generated.yaml | 20 ------------ .../test-storage-gcs-generated.yaml | 20 ------------ .../test-storage-s3-generated.yaml | 20 ------------ operations/mimir/querier.libsonnet | 3 +- operations/mimir/query-frontend.libsonnet | 2 +- 54 files changed, 7 insertions(+), 1169 deletions(-) delete mode 100644 operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/large-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/small-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml delete mode 100644 operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 6deb26d4e1e..7f501e04f28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,6 +81,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. ### Jsonnet +* [CHANGE] Create the `query-frontend-discovery` service only when Mimir is deployed in microservice mode without query-scheduler. #4353 * [CHANGE] Add results cache backend config to `ruler-query-frontend` configuration to allow cache reuse for cardinality-estimation based sharding. #4257 * [ENHANCEMENT] Add support for ruler auto-scaling. #4046 * [ENHANCEMENT] Add optional `weight` param to `newQuerierScaledObject` and `newRulerQuerierScaledObject` to allow running multiple querier deployments on different node types. #4141 diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index e498fa00ab1..2fbcf33d4b5 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -30,6 +30,7 @@ Entries should include a reference to the Pull Request that introduced the chang * [ENHANCEMENT] Support autoscaling/v2 HorizontalPodAutoscaler for nginx autoscaling starting with Kubernetes 1.23. #4285 * [BUGFIX] Allow override of Kubernetes version for nginx HPA. #4299 +* [BUGFIX] Do not generate query-frontend-headless service if query scheduler is enabled. Fixes parity with jsonnet. #4353 ## 4.2.0 diff --git a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml index 595669a208a..b30e7f86630 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.query_scheduler.enabled }} apiVersion: v1 kind: Service metadata: @@ -26,3 +27,4 @@ spec: targetPort: grpc selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "query-frontend") | nindent 4 }} +{{- end }} diff --git a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 562378751bb..00000000000 --- a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: gateway-enterprise-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: gateway-enterprise-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: gateway-enterprise-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 8a5c4b08db7..00000000000 --- a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: gateway-nginx-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: gateway-nginx-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: gateway-nginx-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 2886b5c987e..00000000000 --- a/operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: graphite-enabled-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: graphite-enabled-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: graphite-enabled-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/large-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/large-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index a70d8c0ce8d..00000000000 --- a/operations/helm/tests/large-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: large-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: large-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: large-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 68835cc57c2..00000000000 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: metamonitoring-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: metamonitoring-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: metamonitoring-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 54698700e0c..00000000000 --- a/operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: scheduler-name-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: scheduler-name-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: scheduler-name-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/small-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/small-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index e0d3ce44863..00000000000 --- a/operations/helm/tests/small-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: small-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: small-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: small-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index a31e0549177..00000000000 --- a/operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-enterprise-configmap-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-enterprise-configmap-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-enterprise-configmap-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 56d989cc4d3..00000000000 --- a/operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-enterprise-k8s-1.25-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-enterprise-k8s-1.25-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-enterprise-k8s-1.25-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 6ec59325a95..00000000000 --- a/operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,30 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-enterprise-legacy-label-values-enterprise-metrics-query-frontend-headless - labels: - app: enterprise-metrics-query-frontend - heritage: Helm - release: test-enterprise-legacy-label-values - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app: enterprise-metrics-query-frontend - release: test-enterprise-legacy-label-values diff --git a/operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 82ac962494a..00000000000 --- a/operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-enterprise-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-enterprise-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-enterprise-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 50dfd758ceb..00000000000 --- a/operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-oss-k8s-1.25-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-k8s-1.25-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-k8s-1.25-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index 84a37d59cfd..00000000000 --- a/operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-oss-logical-multizone-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-logical-multizone-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-logical-multizone-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index ab6ff812709..00000000000 --- a/operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-oss-multizone-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-multizone-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-multizone-values - app.kubernetes.io/component: query-frontend diff --git a/operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml deleted file mode 100644 index a8411c9f1e6..00000000000 --- a/operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ /dev/null @@ -1,32 +0,0 @@ ---- -# Source: mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml -apiVersion: v1 -kind: Service -metadata: - name: test-oss-values-mimir-query-frontend-headless - labels: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-values - app.kubernetes.io/component: query-frontend - app.kubernetes.io/managed-by: Helm - prometheus.io/service-monitor: "false" - annotations: - {} - namespace: "citestns" -spec: - type: ClusterIP - clusterIP: None - publishNotReadyAddresses: true - ports: - - port: 8080 - protocol: TCP - name: http-metrics - targetPort: http-metrics - - port: 9095 - protocol: TCP - name: grpc - targetPort: grpc - selector: - app.kubernetes.io/name: mimir - app.kubernetes.io/instance: test-oss-values - app.kubernetes.io/component: query-frontend diff --git a/operations/mimir-tests/test-autoscaling-generated.yaml b/operations/mimir-tests/test-autoscaling-generated.yaml index fe54cae9766..3682d944c50 100644 --- a/operations/mimir-tests/test-autoscaling-generated.yaml +++ b/operations/mimir-tests/test-autoscaling-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-consul-generated.yaml b/operations/mimir-tests/test-consul-generated.yaml index 7306308972c..beeab61848f 100644 --- a/operations/mimir-tests/test-consul-generated.yaml +++ b/operations/mimir-tests/test-consul-generated.yaml @@ -521,26 +521,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-consul-multi-zone-generated.yaml b/operations/mimir-tests/test-consul-multi-zone-generated.yaml index a114bb0cf09..74b9cbcd06b 100644 --- a/operations/mimir-tests/test-consul-multi-zone-generated.yaml +++ b/operations/mimir-tests/test-consul-multi-zone-generated.yaml @@ -613,26 +613,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-consul-ruler-disabled-generated.yaml b/operations/mimir-tests/test-consul-ruler-disabled-generated.yaml index efa0cedf5c4..efa2821558e 100644 --- a/operations/mimir-tests/test-consul-ruler-disabled-generated.yaml +++ b/operations/mimir-tests/test-consul-ruler-disabled-generated.yaml @@ -521,26 +521,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-defaults-generated.yaml b/operations/mimir-tests/test-defaults-generated.yaml index 684645ffeac..d09b3ac0638 100644 --- a/operations/mimir-tests/test-defaults-generated.yaml +++ b/operations/mimir-tests/test-defaults-generated.yaml @@ -235,26 +235,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-deployment-mode-migration-generated.yaml b/operations/mimir-tests/test-deployment-mode-migration-generated.yaml index d04e23f26a7..8476ba23779 100644 --- a/operations/mimir-tests/test-deployment-mode-migration-generated.yaml +++ b/operations/mimir-tests/test-deployment-mode-migration-generated.yaml @@ -621,29 +621,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - - name: query-frontend-gossip-ring - port: 7946 - targetPort: 7946 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml b/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml index 008d866942a..9e1d3d215b7 100644 --- a/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml +++ b/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml @@ -271,26 +271,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-extra-runtime-config-generated.yaml b/operations/mimir-tests/test-extra-runtime-config-generated.yaml index 0887280508d..aa697f4ae7f 100644 --- a/operations/mimir-tests/test-extra-runtime-config-generated.yaml +++ b/operations/mimir-tests/test-extra-runtime-config-generated.yaml @@ -278,26 +278,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-helm-parity-generated.yaml b/operations/mimir-tests/test-helm-parity-generated.yaml index 5187030d289..39c1ad29389 100644 --- a/operations/mimir-tests/test-helm-parity-generated.yaml +++ b/operations/mimir-tests/test-helm-parity-generated.yaml @@ -297,26 +297,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml index 97c9510a10c..7e94e8f994c 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml index 92cb92f5e4b..43a89680448 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml index 1cd3a476c15..5d4407c5fca 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml index ba586c1af3e..7fafdf4f204 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml index 7306308972c..beeab61848f 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml @@ -521,26 +521,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml index 01a8c8f8989..661cc9144d8 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml @@ -554,26 +554,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml index 8fc4f2f9776..515e9aaf89f 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml @@ -554,26 +554,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml index 5ee51f90179..10d2bb7085c 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml @@ -554,26 +554,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml index f47bd78d1a1..c1c0d81376c 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml @@ -554,26 +554,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml index dfe8361ae31..2005a0d1e54 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml @@ -273,26 +273,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml index 97c9510a10c..7e94e8f994c 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-multi-zone-generated.yaml b/operations/mimir-tests/test-multi-zone-generated.yaml index 962a5070cec..98750fd1f55 100644 --- a/operations/mimir-tests/test-multi-zone-generated.yaml +++ b/operations/mimir-tests/test-multi-zone-generated.yaml @@ -368,26 +368,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml b/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml index 20ba839e40b..0098beee343 100644 --- a/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml +++ b/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml @@ -415,26 +415,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml b/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml index 26726ffb7c7..fe54a4aacc0 100644 --- a/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml @@ -521,26 +521,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml b/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml index b050be97a43..d92fd26458b 100644 --- a/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml @@ -273,29 +273,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - - name: query-frontend-gossip-ring - port: 7946 - targetPort: 7946 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml b/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml index 3c467e00cd9..0170db3b774 100644 --- a/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml @@ -273,29 +273,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - - name: query-frontend-gossip-ring - port: 7946 - targetPort: 7946 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml b/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml index ca7be253e7c..81a5326865a 100644 --- a/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-query-sharding-generated.yaml b/operations/mimir-tests/test-query-sharding-generated.yaml index 5fe1ffb0660..2769568edf4 100644 --- a/operations/mimir-tests/test-query-sharding-generated.yaml +++ b/operations/mimir-tests/test-query-sharding-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml b/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml index 958b34855df..9659e154791 100644 --- a/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml +++ b/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml b/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml index 0dd64b2821e..e26758a4617 100644 --- a/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml +++ b/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-shuffle-sharding-generated.yaml b/operations/mimir-tests/test-shuffle-sharding-generated.yaml index f602b4a0fed..2bc5befaf3e 100644 --- a/operations/mimir-tests/test-shuffle-sharding-generated.yaml +++ b/operations/mimir-tests/test-shuffle-sharding-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml b/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml index 63da625340e..a892cada1fd 100644 --- a/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml +++ b/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-storage-azure-generated.yaml b/operations/mimir-tests/test-storage-azure-generated.yaml index 8b189a7e39f..d56f975fce9 100644 --- a/operations/mimir-tests/test-storage-azure-generated.yaml +++ b/operations/mimir-tests/test-storage-azure-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-storage-gcs-generated.yaml b/operations/mimir-tests/test-storage-gcs-generated.yaml index 97c9510a10c..7e94e8f994c 100644 --- a/operations/mimir-tests/test-storage-gcs-generated.yaml +++ b/operations/mimir-tests/test-storage-gcs-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir-tests/test-storage-s3-generated.yaml b/operations/mimir-tests/test-storage-s3-generated.yaml index dd321f0fa3e..2c6440a6cd3 100644 --- a/operations/mimir-tests/test-storage-s3-generated.yaml +++ b/operations/mimir-tests/test-storage-s3-generated.yaml @@ -270,26 +270,6 @@ spec: --- apiVersion: v1 kind: Service -metadata: - labels: - name: query-frontend - name: query-frontend-discovery - namespace: default -spec: - clusterIP: None - ports: - - name: query-frontend-http-metrics - port: 8080 - targetPort: 8080 - - name: query-frontend-grpc - port: 9095 - targetPort: 9095 - publishNotReadyAddresses: true - selector: - name: query-frontend ---- -apiVersion: v1 -kind: Service metadata: labels: name: query-scheduler diff --git a/operations/mimir/querier.libsonnet b/operations/mimir/querier.libsonnet index 80981078b94..eba308d31a2 100644 --- a/operations/mimir/querier.libsonnet +++ b/operations/mimir/querier.libsonnet @@ -21,7 +21,8 @@ // Limit query concurrency to prevent multi large queries causing an OOM. 'querier.max-concurrent': $._config.querier.concurrency, - 'querier.frontend-address': 'query-frontend-discovery.%(namespace)s.svc.cluster.local:9095' % $._config, + 'querier.frontend-address': if !$._config.is_microservices_deployment_mode || $._config.query_scheduler_enabled then null else + 'query-frontend-discovery.%(namespace)s.svc.cluster.local:9095' % $._config, 'querier.frontend-client.grpc-max-send-msg-size': 100 << 20, // We request high memory but the Go heap is typically very low (< 100MB) and this causes diff --git a/operations/mimir/query-frontend.libsonnet b/operations/mimir/query-frontend.libsonnet index e39a76ec299..1ae6f6240d6 100644 --- a/operations/mimir/query-frontend.libsonnet +++ b/operations/mimir/query-frontend.libsonnet @@ -54,7 +54,7 @@ query_frontend_service: if !$._config.is_microservices_deployment_mode then null else $.util.serviceFor($.query_frontend_deployment, $._config.service_ignored_labels), - query_frontend_discovery_service: if !$._config.is_microservices_deployment_mode then null else + query_frontend_discovery_service: if !$._config.is_microservices_deployment_mode || $._config.query_scheduler_enabled then null else // Make sure that query frontend worker, running in the querier, do resolve // each query-frontend pod IP and NOT the service IP. To make it, we do NOT // use the service cluster IP so that when the service DNS is resolved it From d98bc789dbe6ccdd203a94d61d6516944f6e871d Mon Sep 17 00:00:00 2001 From: Andy Asp <90626759+andyasp@users.noreply.github.com> Date: Fri, 3 Mar 2023 09:39:44 -0500 Subject: [PATCH 13/19] Remove block validation mimirtool changelog entry (#4369) --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f501e04f28..9cc57a39deb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -90,7 +90,6 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. ### Mimirtool -* [CHANGE] Blocks uploaded with the `mimirtool backfill` command are now by default verified by a compactor before the upload is finalized. #3411 * [FEATURE] Added `keep_firing_for` support to rules configuration. #4099 * [ENHANCEMENT] Add `-tls-insecure-skip-verify` to rules, alertmanager and backfill commands. #4162 From 6030ca6446e0782d1e9e2adf4cb39a3cb9b26418 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 3 Mar 2023 15:55:08 +0100 Subject: [PATCH 14/19] Spread TSDB head compaction over the configured interval (#4364) * Spread TSDB head compaction over the configured interval Signed-off-by: Marco Pracucci * Fixed unit test Signed-off-by: Marco Pracucci * Apply suggestion from code review Signed-off-by: Marco Pracucci * Fix typo in CHANGELOG entry Signed-off-by: Marco Pracucci * Fix typo in CHANGELOG entry Signed-off-by: Marco Pracucci --------- Signed-off-by: Marco Pracucci --- CHANGELOG.md | 2 ++ cmd/mimir/config-descriptor.json | 2 +- cmd/mimir/help-all.txt.tmpl | 2 +- .../references/configuration-parameters/index.md | 8 +++++--- pkg/ingester/ingester.go | 13 ++++++++++++- pkg/storage/tsdb/config.go | 4 ++-- pkg/storage/tsdb/config_test.go | 2 +- pkg/util/time.go | 13 +++++++++++++ pkg/util/time_test.go | 14 ++++++++++++++ 9 files changed, 51 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cc57a39deb..adfa020dc6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,8 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. * [ENHANCEMENT] Memcached: added `-*.memcached.min-idle-connections-headroom-percentage` support to configure the minimum number of idle connections to keep open as a percentage (0-100) of the number of recently used idle connections. This feature is disabled when set to a negative value (default), which means idle connections are kept open indefinitely. #4249 * [ENHANCEMENT] Querier and store-gateway: optimized regular expression label matchers with case insensitive alternate operator. #4340 #4357 * [ENHANCEMENT] Compactor: added the experimental flag `-compactor.block-upload.block-validation-enabled` with the default `true` to configure whether block validation occurs on backfilled blocks. #3411 +* [ENHANCEMENT] Ingester: apply a jitter to the first TSDB head compaction interval configured via `-blocks-storage.tsdb.head-compaction-interval`. Subsequent checks will happen at the configured interval. This should help to spread the TSDB head compaction among different ingesters over the configured interval. #4364 +* [ENHANCEMENT] Ingester: the maximum accepted value for `-blocks-storage.tsdb.head-compaction-interval` has been increased from 5m to 15m. #4364 * [BUGFIX] Ingester: remove series from ephemeral storage even if there are no persistent series. #4052 * [BUGFIX] Store-gateway: return `Canceled` rather than `Aborted` or `Internal` error when the calling querier cancels a label names or values request, and return `Internal` if processing the request fails for another reason. #4061 * [BUGFIX] Ingester: reuse memory when ingesting ephemeral series. #4072 diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 81a8bb20068..65fc0290c63 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -5859,7 +5859,7 @@ "kind": "field", "name": "head_compaction_interval", "required": false, - "desc": "How frequently ingesters try to compact TSDB head. Block is only created if data covers smallest block range. Must be greater than 0 and max 5 minutes.", + "desc": "How frequently the ingester checks whether the TSDB head should be compacted and, if so, triggers the compaction. Mimir applies a jitter to the first check, while subsequent checks will happen at the configured interval. Block is only created if data covers smallest block range. The configured interval must be between 0 and 15 minutes.", "fieldValue": null, "fieldDefaultValue": 60000000000, "fieldFlag": "blocks-storage.tsdb.head-compaction-interval", diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 82aa08a90cd..931ee196266 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -504,7 +504,7 @@ Usage of ./cmd/mimir/mimir: -blocks-storage.tsdb.head-compaction-idle-timeout duration If TSDB head is idle for this duration, it is compacted. Note that up to 25% jitter is added to the value to avoid ingesters compacting concurrently. 0 means disabled. (default 1h0m0s) -blocks-storage.tsdb.head-compaction-interval duration - How frequently ingesters try to compact TSDB head. Block is only created if data covers smallest block range. Must be greater than 0 and max 5 minutes. (default 1m0s) + How frequently the ingester checks whether the TSDB head should be compacted and, if so, triggers the compaction. Mimir applies a jitter to the first check, while subsequent checks will happen at the configured interval. Block is only created if data covers smallest block range. The configured interval must be between 0 and 15 minutes. (default 1m0s) -blocks-storage.tsdb.head-postings-for-matchers-cache-force [experimental] Force the cache to be used for postings for matchers in the Head and OOOHead, even if it's not a concurrent (query-sharding) call. -blocks-storage.tsdb.head-postings-for-matchers-cache-size int diff --git a/docs/sources/mimir/references/configuration-parameters/index.md b/docs/sources/mimir/references/configuration-parameters/index.md index 63843a798c1..668dc7536b2 100644 --- a/docs/sources/mimir/references/configuration-parameters/index.md +++ b/docs/sources/mimir/references/configuration-parameters/index.md @@ -3181,9 +3181,11 @@ tsdb: # CLI flag: -blocks-storage.tsdb.ship-concurrency [ship_concurrency: | default = 10] - # (advanced) How frequently ingesters try to compact TSDB head. Block is only - # created if data covers smallest block range. Must be greater than 0 and max - # 5 minutes. + # (advanced) How frequently the ingester checks whether the TSDB head should + # be compacted and, if so, triggers the compaction. Mimir applies a jitter to + # the first check, while subsequent checks will happen at the configured + # interval. Block is only created if data covers smallest block range. The + # configured interval must be between 0 and 15 minutes. # CLI flag: -blocks-storage.tsdb.head-compaction-interval [head_compaction_interval: | default = 1m] diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index b2145d031d9..59a15e0b41e 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -2040,14 +2040,25 @@ func (i *Ingester) shipBlocks(ctx context.Context, allowed *util.AllowedTenants) } func (i *Ingester) compactionLoop(ctx context.Context) error { - ticker := time.NewTicker(i.cfg.BlocksStorageConfig.TSDB.HeadCompactionInterval) + // At ingester startup, spread the first compaction over the configured compaction + // interval. Then, the next compactions will happen at a regular interval. This logic + // helps to have different ingesters running the compaction at a different time, + // effectively spreading the compactions over the configured interval. + ticker := time.NewTicker(util.DurationWithNegativeJitter(i.cfg.BlocksStorageConfig.TSDB.HeadCompactionInterval, 1)) defer ticker.Stop() + tickerRunOnce := false for ctx.Err() == nil { select { case <-ticker.C: i.compactBlocks(ctx, false, nil) + // Run it at a regular (configured) interval after the fist compaction. + if !tickerRunOnce { + ticker.Reset(i.cfg.BlocksStorageConfig.TSDB.HeadCompactionInterval) + tickerRunOnce = true + } + case req := <-i.forceCompactTrigger: i.compactBlocks(ctx, true, req.users) close(req.callback) // Notify back. diff --git a/pkg/storage/tsdb/config.go b/pkg/storage/tsdb/config.go index d7c9e795a38..0144745030d 100644 --- a/pkg/storage/tsdb/config.go +++ b/pkg/storage/tsdb/config.go @@ -218,7 +218,7 @@ func (cfg *TSDBConfig) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.ShipConcurrency, "blocks-storage.tsdb.ship-concurrency", 10, "Maximum number of tenants concurrently shipping blocks to the storage.") f.Uint64Var(&cfg.SeriesHashCacheMaxBytes, "blocks-storage.tsdb.series-hash-cache-max-size-bytes", uint64(1*units.Gibibyte), "Max size - in bytes - of the in-memory series hash cache. The cache is shared across all tenants and it's used only when query sharding is enabled.") f.IntVar(&cfg.MaxTSDBOpeningConcurrencyOnStartup, "blocks-storage.tsdb.max-tsdb-opening-concurrency-on-startup", 10, "limit the number of concurrently opening TSDB's on startup") - f.DurationVar(&cfg.HeadCompactionInterval, "blocks-storage.tsdb.head-compaction-interval", 1*time.Minute, "How frequently ingesters try to compact TSDB head. Block is only created if data covers smallest block range. Must be greater than 0 and max 5 minutes.") + f.DurationVar(&cfg.HeadCompactionInterval, "blocks-storage.tsdb.head-compaction-interval", 1*time.Minute, "How frequently the ingester checks whether the TSDB head should be compacted and, if so, triggers the compaction. Mimir applies a jitter to the first check, while subsequent checks will happen at the configured interval. Block is only created if data covers smallest block range. The configured interval must be between 0 and 15 minutes.") f.IntVar(&cfg.HeadCompactionConcurrency, "blocks-storage.tsdb.head-compaction-concurrency", 1, "Maximum number of tenants concurrently compacting TSDB head into a new block") f.DurationVar(&cfg.HeadCompactionIdleTimeout, "blocks-storage.tsdb.head-compaction-idle-timeout", 1*time.Hour, "If TSDB head is idle for this duration, it is compacted. Note that up to 25% jitter is added to the value to avoid ingesters compacting concurrently. 0 means disabled.") f.IntVar(&cfg.HeadChunksWriteBufferSize, "blocks-storage.tsdb.head-chunks-write-buffer-size-bytes", chunks.DefaultWriteBufferSize, headChunkWriterBufferSizeHelp) @@ -246,7 +246,7 @@ func (cfg *TSDBConfig) Validate() error { return errInvalidOpeningConcurrency } - if cfg.HeadCompactionInterval <= 0 || cfg.HeadCompactionInterval > 5*time.Minute { + if cfg.HeadCompactionInterval <= 0 || cfg.HeadCompactionInterval > 15*time.Minute { return errInvalidCompactionInterval } diff --git a/pkg/storage/tsdb/config_test.go b/pkg/storage/tsdb/config_test.go index 97e893f48a7..ba12e673c44 100644 --- a/pkg/storage/tsdb/config_test.go +++ b/pkg/storage/tsdb/config_test.go @@ -69,7 +69,7 @@ func TestConfig_Validate(t *testing.T) { }, "should fail on too high compaction interval": { setup: func(cfg *BlocksStorageConfig) { - cfg.TSDB.HeadCompactionInterval = 10 * time.Minute + cfg.TSDB.HeadCompactionInterval = 20 * time.Minute }, expectedErr: errInvalidCompactionInterval, }, diff --git a/pkg/util/time.go b/pkg/util/time.go index a4657837b81..a0f27b14f2d 100644 --- a/pkg/util/time.go +++ b/pkg/util/time.go @@ -75,6 +75,19 @@ func DurationWithPositiveJitter(input time.Duration, variancePerc float64) time. return input + time.Duration(jitter) } +// DurationWithNegativeJitter returns random duration from "input - input*variance" to "input" interval. +func DurationWithNegativeJitter(input time.Duration, variancePerc float64) time.Duration { + // No duration? No jitter. + if input == 0 { + return 0 + } + + variance := int64(float64(input) * variancePerc) + jitter := rand.Int63n(variance) + + return input - time.Duration(jitter) +} + // NewDisableableTicker essentially wraps NewTicker but allows the ticker to be disabled by passing // zero duration as the interval. Returns a function for stopping the ticker, and the ticker channel. func NewDisableableTicker(interval time.Duration) (func(), <-chan time.Time) { diff --git a/pkg/util/time_test.go b/pkg/util/time_test.go index efd710c390e..acfe60bda2a 100644 --- a/pkg/util/time_test.go +++ b/pkg/util/time_test.go @@ -83,6 +83,20 @@ func TestDurationWithPositiveJitter_ZeroInputDuration(t *testing.T) { assert.Equal(t, time.Duration(0), DurationWithPositiveJitter(time.Duration(0), 0.5)) } +func TestDurationWithNegativeJitter(t *testing.T) { + const numRuns = 1000 + + for i := 0; i < numRuns; i++ { + actual := DurationWithNegativeJitter(time.Minute, 0.5) + assert.GreaterOrEqual(t, int64(actual), int64(30*time.Second)) + assert.LessOrEqual(t, int64(actual), int64(60*time.Second)) + } +} + +func TestDurationWithNegativeJitter_ZeroInputDuration(t *testing.T) { + assert.Equal(t, time.Duration(0), DurationWithNegativeJitter(time.Duration(0), 0.5)) +} + func TestParseTime(t *testing.T) { var tests = []struct { input string From 651a8edda93f1532ed8997d89d9c930b9fd4f3fc Mon Sep 17 00:00:00 2001 From: Ursula Kallio Date: Fri, 3 Mar 2023 16:01:49 +0100 Subject: [PATCH 15/19] Fix port number values. (#4368) --- docs/sources/mimir/operators-guide/get-started/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sources/mimir/operators-guide/get-started/_index.md b/docs/sources/mimir/operators-guide/get-started/_index.md index 9e0b68ba8ba..90e121f0471 100644 --- a/docs/sources/mimir/operators-guide/get-started/_index.md +++ b/docs/sources/mimir/operators-guide/get-started/_index.md @@ -178,7 +178,7 @@ metrics: In a new terminal, run a local Grafana server using Docker: ```bash -docker run --rm --name=grafana --network=host grafana/grafana +docker run --rm --name=grafana -p 3000:3000 grafana/grafana ``` ### Add Grafana Mimir as a Prometheus data source From fa098e9f93dbccc0fa894507fc2f23634ab9b7bc Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 6 Mar 2023 09:42:18 +0100 Subject: [PATCH 16/19] Ruler: change deployment max surge and max unavailable to reduce ownership spillover (#4381) * Ruler: change deployment max surge and max unavailable to reduce ownership spillover Signed-off-by: Marco Pracucci * Apply suggestions from code review Co-authored-by: Dimitar Dimitrov --------- Signed-off-by: Marco Pracucci Co-authored-by: Dimitar Dimitrov --- CHANGELOG.md | 1 + operations/helm/charts/mimir-distributed/CHANGELOG.md | 1 + operations/helm/charts/mimir-distributed/values.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- .../mimir-distributed/templates/ruler/ruler-dep.yaml | 4 ++-- operations/mimir-tests/test-autoscaling-generated.yaml | 4 ++-- operations/mimir-tests/test-consul-generated.yaml | 4 ++-- operations/mimir-tests/test-consul-multi-zone-generated.yaml | 4 ++-- .../mimir-tests/test-deployment-mode-migration-generated.yaml | 4 ++-- .../mimir-tests/test-disable-chunk-streaming-generated.yaml | 4 ++-- .../mimir-tests/test-extra-runtime-config-generated.yaml | 4 ++-- operations/mimir-tests/test-helm-parity-generated.yaml | 4 ++-- ...rlist-cluster-label-migration-step-0-before-generated.yaml | 4 ++-- ...t-memberlist-cluster-label-migration-step-1-generated.yaml | 4 ++-- ...t-memberlist-cluster-label-migration-step-2-generated.yaml | 4 ++-- ...t-memberlist-cluster-label-migration-step-3-generated.yaml | 4 ++-- .../test-memberlist-migration-step-0-before-generated.yaml | 4 ++-- .../test-memberlist-migration-step-1-generated.yaml | 4 ++-- .../test-memberlist-migration-step-2-generated.yaml | 4 ++-- .../test-memberlist-migration-step-3-generated.yaml | 4 ++-- .../test-memberlist-migration-step-4-generated.yaml | 4 ++-- .../test-memberlist-migration-step-5-generated.yaml | 4 ++-- .../test-memberlist-migration-step-6-final-generated.yaml | 4 ++-- operations/mimir-tests/test-multi-zone-generated.yaml | 4 ++-- .../test-multi-zone-with-ongoing-migration-generated.yaml | 4 ++-- .../test-query-scheduler-consul-ring-generated.yaml | 4 ++-- ...memberlist-ring-and-ruler-remote-evaluation-generated.yaml | 4 ++-- .../test-query-scheduler-memberlist-ring-generated.yaml | 4 ++-- ...cheduler-memberlist-ring-read-path-disabled-generated.yaml | 4 ++-- operations/mimir-tests/test-query-sharding-generated.yaml | 4 ++-- .../mimir-tests/test-ruler-remote-evaluation-generated.yaml | 4 ++-- .../test-ruler-remote-evaluation-migration-generated.yaml | 4 ++-- operations/mimir-tests/test-shuffle-sharding-generated.yaml | 4 ++-- .../test-shuffle-sharding-read-path-disabled-generated.yaml | 4 ++-- operations/mimir-tests/test-storage-azure-generated.yaml | 4 ++-- operations/mimir-tests/test-storage-gcs-generated.yaml | 4 ++-- operations/mimir-tests/test-storage-s3-generated.yaml | 4 ++-- operations/mimir/ruler.libsonnet | 4 ++-- 51 files changed, 100 insertions(+), 98 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index adfa020dc6f..3b773162fce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,6 +85,7 @@ Querying with using `{__mimir_storage__="ephemeral"}` selector no longer works. * [CHANGE] Create the `query-frontend-discovery` service only when Mimir is deployed in microservice mode without query-scheduler. #4353 * [CHANGE] Add results cache backend config to `ruler-query-frontend` configuration to allow cache reuse for cardinality-estimation based sharding. #4257 +* [CHANGE] Ruler: changed ruler deployment max surge from `0` to `50%`, and max unavailable from `1` to `0`. #4381 * [ENHANCEMENT] Add support for ruler auto-scaling. #4046 * [ENHANCEMENT] Add optional `weight` param to `newQuerierScaledObject` and `newRulerQuerierScaledObject` to allow running multiple querier deployments on different node types. #4141 * [ENHANCEMENT] Add support for query-frontend and ruler-query-frontend auto-scaling. #4199 diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index 2fbcf33d4b5..e5f7849f92e 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -28,6 +28,7 @@ Entries should include a reference to the Pull Request that introduced the chang ## main / unreleased +* [CHANGE] Ruler: changed ruler deployment max surge from `0` to `50%`, and max unavailable from `1` to `0`. #4381 * [ENHANCEMENT] Support autoscaling/v2 HorizontalPodAutoscaler for nginx autoscaling starting with Kubernetes 1.23. #4285 * [BUGFIX] Allow override of Kubernetes version for nginx HPA. #4299 * [BUGFIX] Do not generate query-frontend-headless service if query scheduler is enabled. Fixes parity with jsonnet. #4353 diff --git a/operations/helm/charts/mimir-distributed/values.yaml b/operations/helm/charts/mimir-distributed/values.yaml index 7e061a1790b..41e92041a78 100644 --- a/operations/helm/charts/mimir-distributed/values.yaml +++ b/operations/helm/charts/mimir-distributed/values.yaml @@ -1015,8 +1015,8 @@ ruler: strategy: type: RollingUpdate rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 terminationGracePeriodSeconds: 180 diff --git a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 63cf6616689..8d6b7cfa5fa 100644 --- a/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/gateway-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 20e581e3911..c4e57fd497e 100644 --- a/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/gateway-nginx-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index b5c63927c41..e2b7590490a 100644 --- a/operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/graphite-enabled-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/large-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/large-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index f3e3e8c1eee..ce13736a980 100644 --- a/operations/helm/tests/large-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/large-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index d4a2dda1350..b9982a984c1 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 40d316cef27..f4b4742716b 100644 --- a/operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/scheduler-name-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/small-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/small-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 350c3cab40d..f0756f4ff0b 100644 --- a/operations/helm/tests/small-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/small-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 092ea229a8e..7d641c9d16e 100644 --- a/operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-enterprise-configmap-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 8f43a0c45bc..03b41d38042 100644 --- a/operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-enterprise-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 3b8df782fad..011bbaf841d 100644 --- a/operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-enterprise-legacy-label-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -19,8 +19,8 @@ spec: release: test-enterprise-legacy-label-values strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index eb6abda8efb..070a44e60ce 100644 --- a/operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-enterprise-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 897bffb3224..011ca0a0262 100644 --- a/operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-oss-k8s-1.25-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 3c67a3306be..55013d39b2c 100644 --- a/operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-oss-logical-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index ae47edd9992..540bef0dcb4 100644 --- a/operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-oss-multizone-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml b/operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml index 9b0b396e93a..5d00abfb599 100644 --- a/operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml +++ b/operations/helm/tests/test-oss-values-generated/mimir-distributed/templates/ruler/ruler-dep.yaml @@ -22,8 +22,8 @@ spec: app.kubernetes.io/component: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 type: RollingUpdate template: metadata: diff --git a/operations/mimir-tests/test-autoscaling-generated.yaml b/operations/mimir-tests/test-autoscaling-generated.yaml index 3682d944c50..0e8be418a11 100644 --- a/operations/mimir-tests/test-autoscaling-generated.yaml +++ b/operations/mimir-tests/test-autoscaling-generated.yaml @@ -750,8 +750,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-consul-generated.yaml b/operations/mimir-tests/test-consul-generated.yaml index beeab61848f..70802a75810 100644 --- a/operations/mimir-tests/test-consul-generated.yaml +++ b/operations/mimir-tests/test-consul-generated.yaml @@ -1042,8 +1042,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-consul-multi-zone-generated.yaml b/operations/mimir-tests/test-consul-multi-zone-generated.yaml index 74b9cbcd06b..47026f021a2 100644 --- a/operations/mimir-tests/test-consul-multi-zone-generated.yaml +++ b/operations/mimir-tests/test-consul-multi-zone-generated.yaml @@ -1240,8 +1240,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-deployment-mode-migration-generated.yaml b/operations/mimir-tests/test-deployment-mode-migration-generated.yaml index 8476ba23779..4544897da4c 100644 --- a/operations/mimir-tests/test-deployment-mode-migration-generated.yaml +++ b/operations/mimir-tests/test-deployment-mode-migration-generated.yaml @@ -1269,8 +1269,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml b/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml index 9e1d3d215b7..9f17d7a86bb 100644 --- a/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml +++ b/operations/mimir-tests/test-disable-chunk-streaming-generated.yaml @@ -676,8 +676,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-extra-runtime-config-generated.yaml b/operations/mimir-tests/test-extra-runtime-config-generated.yaml index aa697f4ae7f..71b0ce036e3 100644 --- a/operations/mimir-tests/test-extra-runtime-config-generated.yaml +++ b/operations/mimir-tests/test-extra-runtime-config-generated.yaml @@ -703,8 +703,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-helm-parity-generated.yaml b/operations/mimir-tests/test-helm-parity-generated.yaml index 39c1ad29389..c5c5318b84c 100644 --- a/operations/mimir-tests/test-helm-parity-generated.yaml +++ b/operations/mimir-tests/test-helm-parity-generated.yaml @@ -760,8 +760,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml index 7e94e8f994c..3255a3f7e14 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-0-before-generated.yaml @@ -675,8 +675,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml index 43a89680448..a991b5ebe9d 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-1-generated.yaml @@ -677,8 +677,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml index 5d4407c5fca..896817a1257 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-2-generated.yaml @@ -679,8 +679,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml index 7fafdf4f204..7872fa29399 100644 --- a/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml +++ b/operations/mimir-tests/test-memberlist-cluster-label-migration-step-3-generated.yaml @@ -677,8 +677,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml index beeab61848f..70802a75810 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-0-before-generated.yaml @@ -1042,8 +1042,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml index 661cc9144d8..b2ff0f95b8a 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-1-generated.yaml @@ -1096,8 +1096,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml index 515e9aaf89f..c24b328938a 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-2-generated.yaml @@ -1096,8 +1096,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml index 10d2bb7085c..363e90dbdbf 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-3-generated.yaml @@ -1096,8 +1096,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml index c1c0d81376c..e0c8adba427 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-4-generated.yaml @@ -1096,8 +1096,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml index 2005a0d1e54..5f3e1b19e20 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-5-generated.yaml @@ -678,8 +678,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml b/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml index 7e94e8f994c..3255a3f7e14 100644 --- a/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml +++ b/operations/mimir-tests/test-memberlist-migration-step-6-final-generated.yaml @@ -675,8 +675,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-multi-zone-generated.yaml b/operations/mimir-tests/test-multi-zone-generated.yaml index 98750fd1f55..6b54638d787 100644 --- a/operations/mimir-tests/test-multi-zone-generated.yaml +++ b/operations/mimir-tests/test-multi-zone-generated.yaml @@ -885,8 +885,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml b/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml index 0098beee343..0ede092f09c 100644 --- a/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml +++ b/operations/mimir-tests/test-multi-zone-with-ongoing-migration-generated.yaml @@ -953,8 +953,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml b/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml index fe54a4aacc0..812a8fcc917 100644 --- a/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-consul-ring-generated.yaml @@ -1052,8 +1052,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml b/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml index d92fd26458b..ac08da5af5c 100644 --- a/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-memberlist-ring-and-ruler-remote-evaluation-generated.yaml @@ -789,8 +789,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml b/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml index 0170db3b774..68c78a4c68b 100644 --- a/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-memberlist-ring-generated.yaml @@ -702,8 +702,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml b/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml index 81a5326865a..214288dbc3d 100644 --- a/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml +++ b/operations/mimir-tests/test-query-scheduler-memberlist-ring-read-path-disabled-generated.yaml @@ -690,8 +690,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-query-sharding-generated.yaml b/operations/mimir-tests/test-query-sharding-generated.yaml index 2769568edf4..d502188965f 100644 --- a/operations/mimir-tests/test-query-sharding-generated.yaml +++ b/operations/mimir-tests/test-query-sharding-generated.yaml @@ -680,8 +680,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml b/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml index 9659e154791..4c0df50d519 100644 --- a/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml +++ b/operations/mimir-tests/test-ruler-remote-evaluation-generated.yaml @@ -753,8 +753,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml b/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml index e26758a4617..78c98463829 100644 --- a/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml +++ b/operations/mimir-tests/test-ruler-remote-evaluation-migration-generated.yaml @@ -753,8 +753,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-shuffle-sharding-generated.yaml b/operations/mimir-tests/test-shuffle-sharding-generated.yaml index 2bc5befaf3e..2459dca82c0 100644 --- a/operations/mimir-tests/test-shuffle-sharding-generated.yaml +++ b/operations/mimir-tests/test-shuffle-sharding-generated.yaml @@ -680,8 +680,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml b/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml index a892cada1fd..54a04335253 100644 --- a/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml +++ b/operations/mimir-tests/test-shuffle-sharding-read-path-disabled-generated.yaml @@ -680,8 +680,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-storage-azure-generated.yaml b/operations/mimir-tests/test-storage-azure-generated.yaml index d56f975fce9..1077c71b8ed 100644 --- a/operations/mimir-tests/test-storage-azure-generated.yaml +++ b/operations/mimir-tests/test-storage-azure-generated.yaml @@ -677,8 +677,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-storage-gcs-generated.yaml b/operations/mimir-tests/test-storage-gcs-generated.yaml index 7e94e8f994c..3255a3f7e14 100644 --- a/operations/mimir-tests/test-storage-gcs-generated.yaml +++ b/operations/mimir-tests/test-storage-gcs-generated.yaml @@ -675,8 +675,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir-tests/test-storage-s3-generated.yaml b/operations/mimir-tests/test-storage-s3-generated.yaml index 2c6440a6cd3..ebca9003d01 100644 --- a/operations/mimir-tests/test-storage-s3-generated.yaml +++ b/operations/mimir-tests/test-storage-s3-generated.yaml @@ -676,8 +676,8 @@ spec: name: ruler strategy: rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 + maxSurge: 50% + maxUnavailable: 0 template: metadata: labels: diff --git a/operations/mimir/ruler.libsonnet b/operations/mimir/ruler.libsonnet index da0b698c87e..418c749c0df 100644 --- a/operations/mimir/ruler.libsonnet +++ b/operations/mimir/ruler.libsonnet @@ -48,8 +48,8 @@ deployment.new(name, 2, [$.ruler_container]) + (if !std.isObject($._config.node_selector) then {} else deployment.mixin.spec.template.spec.withNodeSelectorMixin($._config.node_selector)) + - deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(0) + - deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge('50%') + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) + deployment.mixin.spec.template.spec.withTerminationGracePeriodSeconds(600) + $.newMimirSpreadTopology(name, $._config.querier_topology_spread_max_skew) + $.mimirVolumeMounts, From aadf31201d6b5fc40d9e71b7311f822b8cf7afab Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 6 Mar 2023 10:23:00 +0100 Subject: [PATCH 17/19] Move "Note:" about cross-zone costs to "Costs" (#4370) This note was in an unrelated section. Signed-off-by: Oleg Zaytsev --- .../configure/configure-zone-aware-replication.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md b/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md index 621890b6bfa..ec837eff73a 100644 --- a/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md +++ b/docs/sources/mimir/operators-guide/configure/configure-zone-aware-replication.md @@ -55,10 +55,6 @@ Zone-aware replication in the ingester ensures that Grafana Mimir replicates eac 2. Roll out ingesters so that each ingester replica runs with a configured zone. 3. Set the `-ingester.ring.zone-awareness-enabled=true` CLI flag or its respective YAML configuration parameter for distributors, ingesters, and queriers. -> **Note:** The requests that the distributors receive are usually compressed, and the requests that the distributors send to the ingesters are uncompressed by default. -> This can result in increased cross-zone bandwidth costs (because at least two ingesters will be in different availability zones). -> If this cost is a concern, you can compress those requests by setting the `-ingester.client.grpc-compression` CLI flag, or its respective YAML configuration parameter, to `snappy` or `gzip` in the distributors. - ## Configuring store-gateway blocks replication To enable zone-aware replication for the store-gateways, refer to [Zone awareness]({{< relref "../architecture/components/store-gateway.md#zone-awareness" >}}). @@ -82,6 +78,10 @@ When replica counts are unbalanced, zones with fewer replicas have higher resour Most cloud providers charge for inter-availability zone networking. Deploying Grafana Mimir with zone-aware replication across multiple cloud provider availability zones likely results in additional networking costs. +> **Note:** The requests that the distributors receive are usually compressed, and the requests that the distributors send to the ingesters are uncompressed by default. +> This can result in increased cross-zone bandwidth costs (because at least two ingesters will be in different availability zones). +> If this cost is a concern, you can compress those requests by setting the `-ingester.client.grpc-compression` CLI flag, or its respective YAML configuration parameter, to `snappy` or `gzip` in the distributors. + ## Kubernetes operator for simplifying rollouts of zone-aware components The [Kubernetes Rollout Operator](https://github.com/grafana/rollout-operator) is a Kubernetes operator that makes it easier for you to manage multi-availability-zone rollouts. Consider using the Kubernetes Rollout Operator when you run Grafana Mimir on Kubernetes with zone awareness enabled. From 35b6661543b7d339233a4e47cf61fd4fae7aab4a Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 6 Mar 2023 10:31:39 +0100 Subject: [PATCH 18/19] Change default -blocks-storage.tsdb.retention-period from 24h to 13h (#4382) Signed-off-by: Marco Pracucci --- CHANGELOG.md | 1 + cmd/mimir/config-descriptor.json | 2 +- cmd/mimir/help-all.txt.tmpl | 2 +- cmd/mimir/help.txt.tmpl | 2 +- docs/sources/mimir/references/configuration-parameters/index.md | 2 +- pkg/storage/tsdb/config.go | 2 +- 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b773162fce..21a2210faf2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Grafana Mimir +* [CHANGE] Ingester: changed default value of `-blocks-storage.tsdb.retention-period` from `24h` to `13h`. If you're running Mimir with a custom configuration and you're overriding `-querier.query-store-after` to a value greater than the default `12h` then you should increase `-blocks-storage.tsdb.retention-period` accordingly. #4382 * [CHANGE] Ruler: changed default value of `-ruler.evaluation-delay-duration` option from 0 to 1m. #4250 * [CHANGE] Querier: Errors with status code `422` coming from the store-gateway are propagated and not converted to the consistency check error anymore. #4100 * [CHANGE] Store-gateway: When a query hits `max_fetched_chunks_per_query` and `max_fetched_series_per_query` limits, an error with the status code `422` is created and returned. #4056 diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 65fc0290c63..89bdb63399e 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -5829,7 +5829,7 @@ "required": false, "desc": "TSDB blocks retention in the ingester before a block is removed. If shipping is enabled, the retention will be relative to the time when the block was uploaded to storage. If shipping is disabled then its relative to the creation time of the block. This should be larger than the -blocks-storage.tsdb.block-ranges-period, -querier.query-store-after and large enough to give store-gateways and queriers enough time to discover newly uploaded blocks.", "fieldValue": null, - "fieldDefaultValue": 86400000000000, + "fieldDefaultValue": 46800000000000, "fieldFlag": "blocks-storage.tsdb.retention-period", "fieldType": "duration" }, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 931ee196266..fd05efca449 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -518,7 +518,7 @@ Usage of ./cmd/mimir/mimir: -blocks-storage.tsdb.out-of-order-capacity-max int [experimental] Maximum capacity for out of order chunks, in samples between 1 and 255. (default 32) -blocks-storage.tsdb.retention-period duration - TSDB blocks retention in the ingester before a block is removed. If shipping is enabled, the retention will be relative to the time when the block was uploaded to storage. If shipping is disabled then its relative to the creation time of the block. This should be larger than the -blocks-storage.tsdb.block-ranges-period, -querier.query-store-after and large enough to give store-gateways and queriers enough time to discover newly uploaded blocks. (default 24h0m0s) + TSDB blocks retention in the ingester before a block is removed. If shipping is enabled, the retention will be relative to the time when the block was uploaded to storage. If shipping is disabled then its relative to the creation time of the block. This should be larger than the -blocks-storage.tsdb.block-ranges-period, -querier.query-store-after and large enough to give store-gateways and queriers enough time to discover newly uploaded blocks. (default 13h0m0s) -blocks-storage.tsdb.series-hash-cache-max-size-bytes uint Max size - in bytes - of the in-memory series hash cache. The cache is shared across all tenants and it's used only when query sharding is enabled. (default 1073741824) -blocks-storage.tsdb.ship-concurrency int diff --git a/cmd/mimir/help.txt.tmpl b/cmd/mimir/help.txt.tmpl index 0d179a45692..5bf6d047c4b 100644 --- a/cmd/mimir/help.txt.tmpl +++ b/cmd/mimir/help.txt.tmpl @@ -194,7 +194,7 @@ Usage of ./cmd/mimir/mimir: -blocks-storage.tsdb.dir string Directory to store TSDBs (including WAL) in the ingesters. This directory is required to be persisted between restarts. (default "./tsdb/") -blocks-storage.tsdb.retention-period duration - TSDB blocks retention in the ingester before a block is removed. If shipping is enabled, the retention will be relative to the time when the block was uploaded to storage. If shipping is disabled then its relative to the creation time of the block. This should be larger than the -blocks-storage.tsdb.block-ranges-period, -querier.query-store-after and large enough to give store-gateways and queriers enough time to discover newly uploaded blocks. (default 24h0m0s) + TSDB blocks retention in the ingester before a block is removed. If shipping is enabled, the retention will be relative to the time when the block was uploaded to storage. If shipping is disabled then its relative to the creation time of the block. This should be larger than the -blocks-storage.tsdb.block-ranges-period, -querier.query-store-after and large enough to give store-gateways and queriers enough time to discover newly uploaded blocks. (default 13h0m0s) -common.storage.azure.account-key string Azure storage account key -common.storage.azure.account-name string diff --git a/docs/sources/mimir/references/configuration-parameters/index.md b/docs/sources/mimir/references/configuration-parameters/index.md index 668dc7536b2..a94b5f205ff 100644 --- a/docs/sources/mimir/references/configuration-parameters/index.md +++ b/docs/sources/mimir/references/configuration-parameters/index.md @@ -3169,7 +3169,7 @@ tsdb: # large enough to give store-gateways and queriers enough time to discover # newly uploaded blocks. # CLI flag: -blocks-storage.tsdb.retention-period - [retention_period: | default = 24h] + [retention_period: | default = 13h] # (advanced) How frequently the TSDB blocks are scanned and new ones are # shipped to the storage. 0 means shipping is disabled. diff --git a/pkg/storage/tsdb/config.go b/pkg/storage/tsdb/config.go index 0144745030d..4d93735ae42 100644 --- a/pkg/storage/tsdb/config.go +++ b/pkg/storage/tsdb/config.go @@ -213,7 +213,7 @@ func (cfg *TSDBConfig) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.Dir, "blocks-storage.tsdb.dir", "./tsdb/", "Directory to store TSDBs (including WAL) in the ingesters. This directory is required to be persisted between restarts.") f.Var(&cfg.BlockRanges, "blocks-storage.tsdb.block-ranges-period", "TSDB blocks range period.") - f.DurationVar(&cfg.Retention, "blocks-storage.tsdb.retention-period", 24*time.Hour, "TSDB blocks retention in the ingester before a block is removed. If shipping is enabled, the retention will be relative to the time when the block was uploaded to storage. If shipping is disabled then its relative to the creation time of the block. This should be larger than the -blocks-storage.tsdb.block-ranges-period, -querier.query-store-after and large enough to give store-gateways and queriers enough time to discover newly uploaded blocks.") + f.DurationVar(&cfg.Retention, "blocks-storage.tsdb.retention-period", 13*time.Hour, "TSDB blocks retention in the ingester before a block is removed. If shipping is enabled, the retention will be relative to the time when the block was uploaded to storage. If shipping is disabled then its relative to the creation time of the block. This should be larger than the -blocks-storage.tsdb.block-ranges-period, -querier.query-store-after and large enough to give store-gateways and queriers enough time to discover newly uploaded blocks.") f.DurationVar(&cfg.ShipInterval, "blocks-storage.tsdb.ship-interval", 1*time.Minute, "How frequently the TSDB blocks are scanned and new ones are shipped to the storage. 0 means shipping is disabled.") f.IntVar(&cfg.ShipConcurrency, "blocks-storage.tsdb.ship-concurrency", 10, "Maximum number of tenants concurrently shipping blocks to the storage.") f.Uint64Var(&cfg.SeriesHashCacheMaxBytes, "blocks-storage.tsdb.series-hash-cache-max-size-bytes", uint64(1*units.Gibibyte), "Max size - in bytes - of the in-memory series hash cache. The cache is shared across all tenants and it's used only when query sharding is enabled.") From a0332543237a5d5df065aa8258925a0534e20cb5 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar Date: Mon, 6 Mar 2023 15:21:45 +0530 Subject: [PATCH 19/19] Support histograms in pkg/storage and update other breakages (#4354) * Support histograms in pkg/storage and update other breakages Signed-off-by: Ganesh Vernekar --- .../querysharding_test_utils_test.go | 2 +- .../querymiddleware/sharded_queryable.go | 2 +- pkg/querier/matrix.go | 29 +++- pkg/storage/chunk/chunk.go | 37 ++-- pkg/storage/series/series_set.go | 136 ++++++++++++--- pkg/storage/series/series_set_test.go | 106 +++++++++++- pkg/util/chunkcompat/compat.go | 29 +++- pkg/util/modelutil/merger.go | 92 ++++++++++ pkg/util/modelutil/merger_test.go | 161 ++++++++++++++++++ 9 files changed, 543 insertions(+), 51 deletions(-) create mode 100644 pkg/util/modelutil/merger.go create mode 100644 pkg/util/modelutil/merger_test.go diff --git a/pkg/frontend/querymiddleware/querysharding_test_utils_test.go b/pkg/frontend/querymiddleware/querysharding_test_utils_test.go index b0032d1f84d..4dcaaa539db 100644 --- a/pkg/frontend/querymiddleware/querysharding_test_utils_test.go +++ b/pkg/frontend/querymiddleware/querysharding_test_utils_test.go @@ -73,7 +73,7 @@ func newMockShardedQueryable( sets := genLabels(labelSet, labelBuckets) xs := make([]storage.Series, 0, len(sets)) for _, ls := range sets { - xs = append(xs, series.NewConcreteSeries(ls, samples)) + xs = append(xs, series.NewConcreteSeries(ls, samples, nil)) } return &mockShardedQueryable{ diff --git a/pkg/frontend/querymiddleware/sharded_queryable.go b/pkg/frontend/querymiddleware/sharded_queryable.go index 5a82817dbfd..d4124c01243 100644 --- a/pkg/frontend/querymiddleware/sharded_queryable.go +++ b/pkg/frontend/querymiddleware/sharded_queryable.go @@ -248,7 +248,7 @@ func newSeriesSetFromEmbeddedQueriesResults(results [][]SampleStream, hints *sto }) } - set = append(set, series.NewConcreteSeries(mimirpb.FromLabelAdaptersToLabels(stream.Labels), samples)) + set = append(set, series.NewConcreteSeries(mimirpb.FromLabelAdaptersToLabels(stream.Labels), samples, nil)) } } return series.NewConcreteSeriesSet(set) diff --git a/pkg/querier/matrix.go b/pkg/querier/matrix.go index 4939cbc433b..45275b45fc2 100644 --- a/pkg/querier/matrix.go +++ b/pkg/querier/matrix.go @@ -9,22 +9,37 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/grafana/mimir/pkg/mimirpb" "github.com/grafana/mimir/pkg/storage/chunk" "github.com/grafana/mimir/pkg/storage/series" - "github.com/grafana/mimir/pkg/util" + "github.com/grafana/mimir/pkg/util/modelutil" ) func mergeChunks(chunks []chunk.Chunk, from, through model.Time) chunkenc.Iterator { - samples := make([][]model.SamplePair, 0, len(chunks)) + var ( + samples = make([][]model.SamplePair, 0, len(chunks)) + histograms [][]mimirpb.Histogram + mergedSamples []model.SamplePair + mergedHistograms []mimirpb.Histogram + ) for _, c := range chunks { - ss, err := c.Samples(from, through) + sf, sh, err := c.Samples(from, through) if err != nil { return series.NewErrIterator(err) } - - samples = append(samples, ss) + if len(sf) > 0 { + samples = append(samples, sf) + } + if len(sh) > 0 { + histograms = append(histograms, sh) + } + } + if len(histograms) > 0 { + mergedHistograms = modelutil.MergeNHistogramSets(histograms...) + } + if len(samples) > 0 { + mergedSamples = modelutil.MergeNSampleSets(samples...) } - merged := util.MergeNSampleSets(samples...) - return series.NewConcreteSeriesIterator(series.NewConcreteSeries(nil, merged)) + return series.NewConcreteSeriesIterator(series.NewConcreteSeries(nil, mergedSamples, mergedHistograms)) } diff --git a/pkg/storage/chunk/chunk.go b/pkg/storage/chunk/chunk.go index 10a5e371cf2..3e31ec6a523 100644 --- a/pkg/storage/chunk/chunk.go +++ b/pkg/storage/chunk/chunk.go @@ -6,6 +6,7 @@ package chunk import ( + "fmt" "io" "unsafe" @@ -13,6 +14,8 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunkenc" + + "github.com/grafana/mimir/pkg/mimirpb" ) const ( @@ -120,24 +123,38 @@ func NewChunk(metric labels.Labels, c EncodedChunk, from, through model.Time) Ch } } -// Samples returns all SamplePairs for the chunk. -func (c *Chunk) Samples(from, through model.Time) ([]model.SamplePair, error) { +// Samples returns all SamplePairs and Histograms for the chunk. +func (c *Chunk) Samples(from, through model.Time) ([]model.SamplePair, []mimirpb.Histogram, error) { it := c.Data.NewIterator(nil) return rangeValues(it, from, through) } // rangeValues is a utility function that retrieves all values within the given // range from an Iterator. -func rangeValues(it Iterator, oldestInclusive, newestInclusive model.Time) ([]model.SamplePair, error) { - result := []model.SamplePair{} - if it.FindAtOrAfter(oldestInclusive) == chunkenc.ValNone { - return result, it.Err() +func rangeValues(it Iterator, oldestInclusive, newestInclusive model.Time) ([]model.SamplePair, []mimirpb.Histogram, error) { + resultFloat := []model.SamplePair{} + resultHist := []mimirpb.Histogram{} + currValType := it.FindAtOrAfter(oldestInclusive) + if currValType == chunkenc.ValNone { + return resultFloat, resultHist, it.Err() } - for !it.Value().Timestamp.After(newestInclusive) { - result = append(result, it.Value()) - if it.Scan() == chunkenc.ValNone { + for !model.Time(it.Timestamp()).After(newestInclusive) { + switch currValType { + case chunkenc.ValFloat: + resultFloat = append(resultFloat, it.Value()) + case chunkenc.ValHistogram: + t, h := it.AtHistogram() + resultHist = append(resultHist, mimirpb.FromHistogramToHistogramProto(t, h)) + case chunkenc.ValFloatHistogram: + t, h := it.AtFloatHistogram() + resultHist = append(resultHist, mimirpb.FromFloatHistogramToHistogramProto(t, h)) + default: + return nil, nil, fmt.Errorf("unknown value type %v in iterator", currValType) + } + currValType = it.Scan() + if currValType == chunkenc.ValNone { break } } - return result, it.Err() + return resultFloat, resultHist, it.Err() } diff --git a/pkg/storage/series/series_set.go b/pkg/storage/series/series_set.go index d1139516aa1..f57c105e788 100644 --- a/pkg/storage/series/series_set.go +++ b/pkg/storage/series/series_set.go @@ -14,6 +14,8 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" + + "github.com/grafana/mimir/pkg/mimirpb" ) // ConcreteSeriesSet implements storage.SeriesSet. @@ -55,15 +57,17 @@ func (c *ConcreteSeriesSet) Warnings() storage.Warnings { // ConcreteSeries implements storage.Series. type ConcreteSeries struct { - labels labels.Labels - samples []model.SamplePair + labels labels.Labels + samples []model.SamplePair + histograms []mimirpb.Histogram } -// NewConcreteSeries instantiates an in memory series from a list of samples & labels -func NewConcreteSeries(ls labels.Labels, samples []model.SamplePair) *ConcreteSeries { +// NewConcreteSeries instantiates an in memory series from a list of samples & histograms & labels +func NewConcreteSeries(ls labels.Labels, samples []model.SamplePair, histograms []mimirpb.Histogram) *ConcreteSeries { return &ConcreteSeries{ - labels: ls, - samples: samples, + labels: ls, + samples: samples, + histograms: histograms, } } @@ -79,52 +83,138 @@ func (c *ConcreteSeries) Iterator(_ chunkenc.Iterator) chunkenc.Iterator { // concreteSeriesIterator implements chunkenc.Iterator. type concreteSeriesIterator struct { - cur int - series *ConcreteSeries + curFloat int + curHisto int + atHisto bool + series *ConcreteSeries } // NewConcreteSeriesIterator instantiates an in memory chunkenc.Iterator func NewConcreteSeriesIterator(series *ConcreteSeries) chunkenc.Iterator { return &concreteSeriesIterator{ - cur: -1, - series: series, + curFloat: -1, + curHisto: -1, + atHisto: false, + series: series, + } +} + +// atTypeHisto is an internal method to differentiate between histogram and float histogram value types +// Checking that c.curHisto is a valid index in the c.series.histograms array and that +// c.atHisto is true must be done outside of this +func (c *concreteSeriesIterator) atTypeHisto() chunkenc.ValueType { + if c.series.histograms[c.curHisto].IsFloatHistogram() { + return chunkenc.ValFloatHistogram + } + return chunkenc.ValHistogram +} + +// atType returns current timestamp and value type +func (c *concreteSeriesIterator) atType() (int64, chunkenc.ValueType) { + if c.atHisto { + if c.curHisto < 0 || c.curHisto >= len(c.series.histograms) { + return 0, chunkenc.ValNone + } + return c.series.histograms[c.curHisto].Timestamp, c.atTypeHisto() } + if c.curFloat < 0 || c.curFloat >= len(c.series.samples) { + return 0, chunkenc.ValNone + } + return int64(c.series.samples[c.curFloat].Timestamp), chunkenc.ValFloat } func (c *concreteSeriesIterator) Seek(t int64) chunkenc.ValueType { - c.cur = sort.Search(len(c.series.samples), func(n int) bool { + oldTime, oldType := c.atType() + if oldTime >= t { // only advance via Seek + return oldType + } + + c.curFloat = sort.Search(len(c.series.samples), func(n int) bool { return c.series.samples[n].Timestamp >= model.Time(t) }) - if c.cur < len(c.series.samples) { + c.curHisto = sort.Search(len(c.series.histograms), func(n int) bool { + return c.series.histograms[n].Timestamp >= t + }) + + if c.curFloat >= len(c.series.samples) && c.curHisto >= len(c.series.histograms) { + return chunkenc.ValNone + } + if c.curFloat >= len(c.series.samples) { + c.atHisto = true + return c.atTypeHisto() + } + if c.curHisto >= len(c.series.histograms) { + c.atHisto = false return chunkenc.ValFloat } - return chunkenc.ValNone + if int64(c.series.samples[c.curFloat].Timestamp) < c.series.histograms[c.curHisto].Timestamp { + c.curHisto-- + c.atHisto = false + return chunkenc.ValFloat + } + c.curFloat-- + c.atHisto = true + return c.atTypeHisto() } func (c *concreteSeriesIterator) At() (t int64, v float64) { - s := c.series.samples[c.cur] + if c.atHisto { + panic(errors.New("concreteSeriesIterator: Calling At() when cursor is at histogram")) + } + s := c.series.samples[c.curFloat] return int64(s.Timestamp), float64(s.Value) } func (c *concreteSeriesIterator) Next() chunkenc.ValueType { - c.cur++ - if c.cur < len(c.series.samples) { + if c.curFloat+1 >= len(c.series.samples) && c.curHisto+1 >= len(c.series.histograms) { + c.curFloat = len(c.series.samples) + c.curHisto = len(c.series.histograms) + return chunkenc.ValNone + } + if c.curFloat+1 >= len(c.series.samples) { + c.curHisto++ + c.atHisto = true + return c.atTypeHisto() + } + if c.curHisto+1 >= len(c.series.histograms) { + c.curFloat++ + c.atHisto = false return chunkenc.ValFloat } - return chunkenc.ValNone + if int64(c.series.samples[c.curFloat+1].Timestamp) < c.series.histograms[c.curHisto+1].Timestamp { + c.curFloat++ + c.atHisto = false + return chunkenc.ValFloat + } + c.curHisto++ + c.atHisto = true + return c.atTypeHisto() } func (c *concreteSeriesIterator) AtHistogram() (int64, *histogram.Histogram) { - panic(errors.New("concreteSeriesIterator: AtHistogram not implemented")) + if !c.atHisto { + panic(errors.New("concreteSeriesIterator: Calling AtHistogram() when cursor is not at histogram")) + } + h := c.series.histograms[c.curHisto] + return h.Timestamp, mimirpb.FromHistogramProtoToHistogram(&h) } func (c *concreteSeriesIterator) AtFloatHistogram() (int64, *histogram.FloatHistogram) { - panic(errors.New("concreteSeriesIterator: AtHistogram not implemented")) + if !c.atHisto { + panic(errors.New("concreteSeriesIterator: Calling AtFloatHistogram() when cursor is not at histogram")) + } + h := c.series.histograms[c.curHisto] + if h.IsFloatHistogram() { + return h.Timestamp, mimirpb.FromHistogramProtoToFloatHistogram(&h) + } + return h.Timestamp, mimirpb.FromHistogramProtoToHistogram(&h).ToFloat() } func (c *concreteSeriesIterator) AtT() int64 { - s := c.series.samples[c.cur] - return int64(s.Timestamp) + if c.atHisto { + return c.series.histograms[c.curHisto].Timestamp + } + return int64(c.series.samples[c.curFloat].Timestamp) } func (c *concreteSeriesIterator) Err() error { @@ -177,6 +267,7 @@ func MatrixToSeriesSet(m model.Matrix) storage.SeriesSet { series = append(series, &ConcreteSeries{ labels: metricToLabels(ss.Metric), samples: ss.Values, + // histograms: ss.Histograms, // cannot convert the decoded matrix form to the expected encoded format. this method is only used in tests so ignoring histogram support for now }) } return NewConcreteSeriesSet(series) @@ -187,8 +278,7 @@ func LabelsToSeriesSet(ls []labels.Labels) storage.SeriesSet { series := make([]storage.Series, 0, len(ls)) for _, l := range ls { series = append(series, &ConcreteSeries{ - labels: l, - samples: nil, + labels: l, }) } return NewConcreteSeriesSet(series) diff --git a/pkg/storage/series/series_set_test.go b/pkg/storage/series/series_set_test.go index 9c051afecfa..90c864f8643 100644 --- a/pkg/storage/series/series_set_test.go +++ b/pkg/storage/series/series_set_test.go @@ -11,22 +11,37 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/stretchr/testify/require" + + "github.com/grafana/mimir/pkg/mimirpb" + "github.com/grafana/mimir/pkg/util/test" +) + +var ( + generateTestHistogram = test.GenerateTestHistogram + generateTestFloatHistogram = test.GenerateTestFloatHistogram ) func TestConcreteSeriesSet(t *testing.T) { series1 := &ConcreteSeries{ labels: labels.FromStrings("foo", "bar"), - samples: []model.SamplePair{{Value: 1, Timestamp: 2}}, + samples: []model.SamplePair{{Timestamp: 1, Value: 2}}, } series2 := &ConcreteSeries{ labels: labels.FromStrings("foo", "baz"), - samples: []model.SamplePair{{Value: 3, Timestamp: 4}}, + samples: []model.SamplePair{{Timestamp: 3, Value: 4}}, } - c := NewConcreteSeriesSet([]storage.Series{series2, series1}) + series3 := &ConcreteSeries{ + labels: labels.FromStrings("foo", "bay"), + histograms: []mimirpb.Histogram{mimirpb.FromHistogramToHistogramProto(5, generateTestHistogram(6))}, + } + c := NewConcreteSeriesSet([]storage.Series{series3, series2, series1}) require.True(t, c.Next()) require.Equal(t, series1, c.At()) require.True(t, c.Next()) + require.Equal(t, series3, c.At()) + require.True(t, c.Next()) require.Equal(t, series2, c.At()) require.False(t, c.Next()) } @@ -51,3 +66,88 @@ func TestMatrixToSeriesSetSortsMetricLabels(t *testing.T) { l := ss.At().Labels() require.Equal(t, labels.FromStrings(model.MetricNameLabel, "testmetric", "a", "b", "c", "d", "e", "f", "g", "h"), l) } + +func TestConcreteSeriesSetIterator(t *testing.T) { + series := &ConcreteSeries{ + labels: labels.FromStrings("foo", "bar"), + samples: []model.SamplePair{{Timestamp: 1, Value: 2}, {Timestamp: 5, Value: 6}, {Timestamp: 9, Value: 10}}, + histograms: []mimirpb.Histogram{mimirpb.FromHistogramToHistogramProto(3, generateTestHistogram(4)), mimirpb.FromFloatHistogramToHistogramProto(7, generateTestFloatHistogram(8)), mimirpb.FromHistogramToHistogramProto(11, generateTestHistogram(12))}, + } + + // test next + it := series.Iterator(nil) + require.Equal(t, chunkenc.ValFloat, it.Next()) + ts, v := it.At() + require.Equal(t, int64(1), ts) + require.Equal(t, float64(2), v) + require.Equal(t, chunkenc.ValHistogram, it.Next()) + ts, h := it.AtHistogram() + require.Equal(t, int64(3), ts) + require.Equal(t, generateTestHistogram(4), h) + require.Equal(t, chunkenc.ValFloat, it.Next()) + ts, v = it.At() + require.Equal(t, int64(5), ts) + require.Equal(t, float64(6), v) + require.Equal(t, chunkenc.ValFloatHistogram, it.Next()) + ts, fh := it.AtFloatHistogram() + require.Equal(t, int64(7), ts) + require.Equal(t, generateTestFloatHistogram(8), fh) + require.Equal(t, chunkenc.ValFloat, it.Next()) + ts, v = it.At() + require.Equal(t, int64(9), ts) + require.Equal(t, float64(10), v) + require.Equal(t, chunkenc.ValHistogram, it.Next()) + ts, h = it.AtHistogram() + require.Equal(t, int64(11), ts) + require.Equal(t, generateTestHistogram(12), h) + // You can also call AtFloatHistogram() on ValHistogram. + ts, fh = it.AtFloatHistogram() + require.Equal(t, int64(11), ts) + require.Equal(t, generateTestHistogram(12).ToFloat(), fh) + + require.Equal(t, chunkenc.ValNone, it.Next()) + + // test seek to same and next + it = series.Iterator(nil) + require.Equal(t, chunkenc.ValHistogram, it.Seek(3)) // Seek to middle + ts, h = it.AtHistogram() + require.Equal(t, int64(3), ts) + require.Equal(t, generateTestHistogram(4), h) + require.Equal(t, chunkenc.ValHistogram, it.Seek(3)) // Seek to same place + ts, h = it.AtHistogram() + require.Equal(t, int64(3), ts) + require.Equal(t, generateTestHistogram(4), h) + require.Equal(t, chunkenc.ValFloat, it.Next()) + ts, v = it.At() + require.Equal(t, int64(5), ts) + require.Equal(t, float64(6), v) + + // test seek to earlier and next, then to later and next + it = series.Iterator(nil) + require.Equal(t, chunkenc.ValHistogram, it.Seek(3)) // Seek to middle + ts, h = it.AtHistogram() + require.Equal(t, int64(3), ts) + require.Equal(t, generateTestHistogram(4), h) + require.Equal(t, chunkenc.ValHistogram, it.Seek(1)) // Ensure seek doesn't do anything if already past seek target. + ts, h = it.AtHistogram() + require.Equal(t, int64(3), ts) + require.Equal(t, generateTestHistogram(4), h) + require.Equal(t, chunkenc.ValFloat, it.Next()) + ts, v = it.At() + require.Equal(t, int64(5), ts) + require.Equal(t, float64(6), v) + require.Equal(t, chunkenc.ValFloatHistogram, it.Seek(7)) // Seek to later + ts, fh = it.AtFloatHistogram() + require.Equal(t, int64(7), ts) + require.Equal(t, generateTestFloatHistogram(8), fh) + require.Equal(t, chunkenc.ValFloat, it.Next()) + ts, v = it.At() + require.Equal(t, int64(9), ts) + require.Equal(t, float64(10), v) + require.Equal(t, chunkenc.ValHistogram, it.Seek(11)) // Seek to end + ts, h = it.AtHistogram() + require.Equal(t, int64(11), ts) + require.Equal(t, generateTestHistogram(12), h) + require.Equal(t, chunkenc.ValNone, it.Seek(13)) // Seek to past end + require.Equal(t, chunkenc.ValNone, it.Seek(13)) // Ensure that seeking to same end still returns ValNone +} diff --git a/pkg/util/chunkcompat/compat.go b/pkg/util/chunkcompat/compat.go index 8485fc15def..6dd4834ed8f 100644 --- a/pkg/util/chunkcompat/compat.go +++ b/pkg/util/chunkcompat/compat.go @@ -14,7 +14,7 @@ import ( "github.com/grafana/mimir/pkg/ingester/client" "github.com/grafana/mimir/pkg/mimirpb" "github.com/grafana/mimir/pkg/storage/chunk" - "github.com/grafana/mimir/pkg/util" + "github.com/grafana/mimir/pkg/util/modelutil" ) // StreamsToMatrix converts a slice of QueryStreamResponse to a model.Matrix. @@ -52,18 +52,33 @@ func SeriesChunksToMatrix(from, through model.Time, serieses []client.TimeSeries } samples := []model.SamplePair{} + histograms := []mimirpb.Histogram{} for _, chunk := range chunks { - ss, err := chunk.Samples(from, through) + sf, sh, err := chunk.Samples(from, through) if err != nil { return nil, err } - samples = util.MergeSampleSets(samples, ss) + samples = modelutil.MergeSampleSets(samples, sf) + histograms = modelutil.MergeHistogramSets(histograms, sh) } - result = append(result, &model.SampleStream{ + stream := &model.SampleStream{ Metric: metric, - Values: samples, - }) + } + if len(samples) > 0 { + stream.Values = samples + } + if len(histograms) > 0 { + histogramsDecoded := make([]model.SampleHistogramPair, 0, len(histograms)) + for _, h := range histograms { + histogramsDecoded = append(histogramsDecoded, model.SampleHistogramPair{ + Timestamp: model.Time(h.Timestamp), + Histogram: mimirpb.FromHistogramProtoToPromHistogram(&h), + }) + } + stream.Histograms = histogramsDecoded + } + result = append(result, stream) } return result, nil } @@ -87,6 +102,8 @@ func TimeseriesToMatrix(from, through model.Time, series []mimirpb.TimeSeries) ( Timestamp: model.Time(sam.TimestampMs), Value: model.SampleValue(sam.Value), }) + + // Only used in tests. Add native histogram support later: https://github.com/grafana/mimir/issues/4378 } result = append(result, &model.SampleStream{ diff --git a/pkg/util/modelutil/merger.go b/pkg/util/modelutil/merger.go new file mode 100644 index 00000000000..855c2cdf54d --- /dev/null +++ b/pkg/util/modelutil/merger.go @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/util/merger.go +// Provenance-includes-location: https://github.com/thanos-io/thanos/blob/main/pkg/strutil/merge.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. +// Provenance-includes-copyright: The Thanos Authors. + +package modelutil + +import ( + "github.com/prometheus/common/model" + + "github.com/grafana/mimir/pkg/mimirpb" +) + +// MergeSampleSets merges and dedupes two sets of already sorted sample pairs. +func MergeSampleSets(a, b []model.SamplePair) []model.SamplePair { + result := make([]model.SamplePair, 0, len(a)+len(b)) + i, j := 0, 0 + for i < len(a) && j < len(b) { + if a[i].Timestamp < b[j].Timestamp { + result = append(result, a[i]) + i++ + } else if a[i].Timestamp > b[j].Timestamp { + result = append(result, b[j]) + j++ + } else { + result = append(result, a[i]) + i++ + j++ + } + } + // Add the rest of a or b. One of them is empty now. + result = append(result, a[i:]...) + result = append(result, b[j:]...) + return result +} + +// MergeNSampleSets merges and dedupes n sets of already sorted sample pairs. +func MergeNSampleSets(sampleSets ...[]model.SamplePair) []model.SamplePair { + l := len(sampleSets) + switch l { + case 0: + return []model.SamplePair{} + case 1: + return sampleSets[0] + } + + n := l / 2 + left := MergeNSampleSets(sampleSets[:n]...) + right := MergeNSampleSets(sampleSets[n:]...) + return MergeSampleSets(left, right) +} + +// MergeHistogramSets merges and dedupes two sets of already sorted histograms. +func MergeHistogramSets(a, b []mimirpb.Histogram) []mimirpb.Histogram { + result := make([]mimirpb.Histogram, 0, len(a)+len(b)) + i, j := 0, 0 + for i < len(a) && j < len(b) { + if a[i].GetTimestamp() < b[j].GetTimestamp() { + result = append(result, a[i]) + i++ + } else if a[i].GetTimestamp() > b[j].GetTimestamp() { + result = append(result, b[j]) + j++ + } else { + result = append(result, a[i]) + i++ + j++ + } + } + // Add the rest of a or b. One of them is empty now. + result = append(result, a[i:]...) + result = append(result, b[j:]...) + return result +} + +// MergeNHistogramSets merges and dedupes n sets of already sorted histograms. +func MergeNHistogramSets(sampleSets ...[]mimirpb.Histogram) []mimirpb.Histogram { + l := len(sampleSets) + switch l { + case 0: + return []mimirpb.Histogram{} + case 1: + return sampleSets[0] + } + + n := l / 2 + left := MergeNHistogramSets(sampleSets[:n]...) + right := MergeNHistogramSets(sampleSets[n:]...) + return MergeHistogramSets(left, right) +} diff --git a/pkg/util/modelutil/merger_test.go b/pkg/util/modelutil/merger_test.go new file mode 100644 index 00000000000..402d9ecadc9 --- /dev/null +++ b/pkg/util/modelutil/merger_test.go @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/util/merger_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. + +package modelutil + +import ( + "testing" + "time" + + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/grafana/mimir/pkg/mimirpb" + "github.com/grafana/mimir/pkg/util/test" +) + +func TestMergeSampleSets(t *testing.T) { + now := model.Now() + sample1 := model.SamplePair{Timestamp: now, Value: 1} + sample2 := model.SamplePair{Timestamp: now.Add(1 * time.Second), Value: 2} + sample3 := model.SamplePair{Timestamp: now.Add(4 * time.Second), Value: 3} + sample4 := model.SamplePair{Timestamp: now.Add(8 * time.Second), Value: 7} + + for _, c := range []struct { + samplesA []model.SamplePair + samplesB []model.SamplePair + expected []model.SamplePair + }{ + { + samplesA: []model.SamplePair{}, + samplesB: []model.SamplePair{}, + expected: []model.SamplePair{}, + }, + { + samplesA: []model.SamplePair{sample1}, + samplesB: []model.SamplePair{}, + expected: []model.SamplePair{sample1}, + }, + { + samplesA: []model.SamplePair{}, + samplesB: []model.SamplePair{sample1}, + expected: []model.SamplePair{sample1}, + }, + { + samplesA: []model.SamplePair{sample1}, + samplesB: []model.SamplePair{sample1}, + expected: []model.SamplePair{sample1}, + }, + { + samplesA: []model.SamplePair{sample1, sample2, sample3}, + samplesB: []model.SamplePair{sample1, sample3, sample4}, + expected: []model.SamplePair{sample1, sample2, sample3, sample4}, + }, + } { + samples := MergeSampleSets(c.samplesA, c.samplesB) + require.Equal(t, c.expected, samples) + } +} + +func TestMergeNSampleSets(t *testing.T) { + now := model.Now() + sample1 := model.SamplePair{Timestamp: now, Value: 1} + sample2 := model.SamplePair{Timestamp: now.Add(1 * time.Second), Value: 2} + sample3 := model.SamplePair{Timestamp: now.Add(4 * time.Second), Value: 3} + sample4 := model.SamplePair{Timestamp: now.Add(8 * time.Second), Value: 7} + + for _, c := range []struct { + sampleSets [][]model.SamplePair + expected []model.SamplePair + }{ + { + sampleSets: [][]model.SamplePair{{}, {}, {}}, + expected: []model.SamplePair{}, + }, + { + sampleSets: [][]model.SamplePair{ + {sample1, sample2}, + {sample2}, + {sample1, sample3, sample4}, + }, + expected: []model.SamplePair{sample1, sample2, sample3, sample4}, + }, + } { + samples := MergeNSampleSets(c.sampleSets...) + require.Equal(t, c.expected, samples) + } +} + +func TestMergeHistogramSets(t *testing.T) { + now := model.Now() + sample1 := mimirpb.FromFloatHistogramToHistogramProto(int64(now), test.GenerateTestFloatHistogram(1)) + sample2 := mimirpb.FromHistogramToHistogramProto(int64(now.Add(1*time.Second)), test.GenerateTestHistogram(2)) + sample3 := mimirpb.FromFloatHistogramToHistogramProto(int64(now.Add(4*time.Second)), test.GenerateTestFloatHistogram(3)) + sample4 := mimirpb.FromHistogramToHistogramProto(int64(now.Add(8*time.Second)), test.GenerateTestHistogram(7)) + + for _, c := range []struct { + samplesA []mimirpb.Histogram + samplesB []mimirpb.Histogram + expected []mimirpb.Histogram + }{ + { + samplesA: []mimirpb.Histogram{}, + samplesB: []mimirpb.Histogram{}, + expected: []mimirpb.Histogram{}, + }, + { + samplesA: []mimirpb.Histogram{sample1}, + samplesB: []mimirpb.Histogram{}, + expected: []mimirpb.Histogram{sample1}, + }, + { + samplesA: []mimirpb.Histogram{}, + samplesB: []mimirpb.Histogram{sample1}, + expected: []mimirpb.Histogram{sample1}, + }, + { + samplesA: []mimirpb.Histogram{sample1}, + samplesB: []mimirpb.Histogram{sample1}, + expected: []mimirpb.Histogram{sample1}, + }, + { + samplesA: []mimirpb.Histogram{sample1, sample2, sample3}, + samplesB: []mimirpb.Histogram{sample1, sample3, sample4}, + expected: []mimirpb.Histogram{sample1, sample2, sample3, sample4}, + }, + } { + samples := MergeHistogramSets(c.samplesA, c.samplesB) + require.Equal(t, c.expected, samples) + } +} + +func TestMergeNHistogramSets(t *testing.T) { + now := model.Now() + sample1 := mimirpb.FromFloatHistogramToHistogramProto(int64(now), test.GenerateTestFloatHistogram(1)) + sample2 := mimirpb.FromHistogramToHistogramProto(int64(now.Add(1*time.Second)), test.GenerateTestHistogram(2)) + sample3 := mimirpb.FromFloatHistogramToHistogramProto(int64(now.Add(4*time.Second)), test.GenerateTestFloatHistogram(3)) + sample4 := mimirpb.FromHistogramToHistogramProto(int64(now.Add(8*time.Second)), test.GenerateTestHistogram(7)) + + for _, c := range []struct { + sampleSets [][]mimirpb.Histogram + expected []mimirpb.Histogram + }{ + { + sampleSets: [][]mimirpb.Histogram{{}, {}, {}}, + expected: []mimirpb.Histogram{}, + }, + { + sampleSets: [][]mimirpb.Histogram{ + {sample1, sample2}, + {sample2}, + {sample1, sample3, sample4}, + }, + expected: []mimirpb.Histogram{sample1, sample2, sample3, sample4}, + }, + } { + samples := MergeNHistogramSets(c.sampleSets...) + require.Equal(t, c.expected, samples) + } +}