From 6ac5d6d5d5a5397c6028107addab948e2fbcbdf4 Mon Sep 17 00:00:00 2001 From: Dustin Wilson Date: Mon, 27 Jan 2025 12:57:41 -0500 Subject: [PATCH] Shuffle User Order in UsersScanner (#10513) If there is a rollout, or any kind of graceful shutdown of compactors, they terminate ongoing block cleanup and bucket index update jobs. This can lead to the bucket index not being updated for an extended period if jobs are terminated back-to-back. This PR addresses the above by shuffling the order of users submitted to BlocksCleaner by UsersScanner. When users are always processed in lexicographic order, successive restarts can cause read path failures for tenants with stale indexes. By randomizing this order, we reduce the probability that these indexes become stale. --- CHANGELOG.md | 1 + pkg/compactor/blocks_cleaner.go | 5 +++++ pkg/storage/bucket/client_mock.go | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b06d7aa9abf..c77a707950d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ * `cortex_ingester_tsdb_block_postings_for_matchers_cache_misses_total` * `cortex_ingester_tsdb_block_postings_for_matchers_cache_requests_total` * `cortex_ingester_tsdb_block_postings_for_matchers_cache_skips_total` +* [ENHANCEMENT] Compactor: Shuffle users' order in `BlocksCleaner`. Prevents bucket indexes from going an extended period without cleanup during compactor restarts. #10513 * [BUGFIX] Distributor: Use a boolean to track changes while merging the ReplicaDesc components, rather than comparing the objects directly. #10185 * [BUGFIX] Querier: fix timeout responding to query-frontend when response size is very close to `-querier.frontend-client.grpc-max-send-msg-size`. #10154 * [BUGFIX] Query-frontend and querier: show warning/info annotations in some cases where they were missing (if a lazy querier was used). #10277 diff --git a/pkg/compactor/blocks_cleaner.go b/pkg/compactor/blocks_cleaner.go index 55bfeffcdf1..aac5b13d999 100644 --- a/pkg/compactor/blocks_cleaner.go +++ b/pkg/compactor/blocks_cleaner.go @@ -8,6 +8,7 @@ package compactor import ( "context" "fmt" + "math/rand" "strconv" "strings" "sync" @@ -232,6 +233,10 @@ func (c *BlocksCleaner) refreshOwnedUsers(ctx context.Context) ([]string, map[st return nil, nil, errors.Wrap(err, "failed to discover users from bucket") } + rand.Shuffle(len(users), func(i, j int) { + users[i], users[j] = users[j], users[i] + }) + isActive := util.StringsMap(users) isDeleted := util.StringsMap(deleted) allUsers := append(users, deleted...) diff --git a/pkg/storage/bucket/client_mock.go b/pkg/storage/bucket/client_mock.go index 2d188474c67..a978d79b432 100644 --- a/pkg/storage/bucket/client_mock.go +++ b/pkg/storage/bucket/client_mock.go @@ -127,7 +127,7 @@ func (m *ClientMock) MockGetAndLastModified(name, content string, lastModified t } else { m.On("Exists", mock.Anything, name).Return(false, err) m.On("Get", mock.Anything, name).Return(nil, ErrObjectDoesNotExist) - m.On("Attributes", mock.Anything, name).Return(nil, ErrObjectDoesNotExist) + m.On("Attributes", mock.Anything, name).Return(objstore.ObjectAttributes{}, ErrObjectDoesNotExist) } }