Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow dynamic replication set of TSDB blocks on store-gateways #10382

Merged
merged 13 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -11655,6 +11655,38 @@
"fieldValue": null,
"fieldDefaultValue": null
},
{
"kind": "block",
"name": "dynamic_replication",
"required": false,
"desc": "",
"blockEntries": [
{
"kind": "field",
"name": "enabled",
"required": false,
"desc": "Use a higher number of replicas for recent blocks. Useful to spread query load more evenly at the cost of slightly higher disk usage.",
"fieldValue": null,
"fieldDefaultValue": false,
"fieldFlag": "store-gateway.dynamic-replication.enabled",
"fieldType": "boolean",
"fieldCategory": "experimental"
},
{
"kind": "field",
"name": "max_time_threshold",
"required": false,
"desc": "Threshold of the most recent sample in a block used to determine it is eligible for higher than default replication. If a block has samples within this amount of time, it is considered recent and will be owned by more replicas.",
"fieldValue": null,
"fieldDefaultValue": 90000000000000,
"fieldFlag": "store-gateway.dynamic-replication.max-time-threshold",
"fieldType": "duration",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
"fieldDefaultValue": null
},
{
"kind": "field",
"name": "enabled_tenants",
Expand Down
4 changes: 4 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -3191,6 +3191,10 @@ Usage of ./cmd/mimir/mimir:
How long to wait between SIGTERM and shutdown. After receiving SIGTERM, Mimir will report not-ready status via /ready endpoint.
-store-gateway.disabled-tenants comma-separated-list-of-strings
Comma separated list of tenants that cannot be loaded by the store-gateway. If specified, and the store-gateway would normally load a given tenant for (via -store-gateway.enabled-tenants or sharding), it will be ignored instead.
-store-gateway.dynamic-replication.enabled
[experimental] Use a higher number of replicas for recent blocks. Useful to spread query load more evenly at the cost of slightly higher disk usage.
-store-gateway.dynamic-replication.max-time-threshold duration
[experimental] Threshold of the most recent sample in a block used to determine it is eligible for higher than default replication. If a block has samples within this amount of time, it is considered recent and will be owned by more replicas. (default 25h0m0s)
-store-gateway.enabled-tenants comma-separated-list-of-strings
Comma separated list of tenants that can be loaded by the store-gateway. If specified, only blocks for these tenants will be loaded by the store-gateway, otherwise all tenants can be loaded. Subject to sharding.
-store-gateway.sharding-ring.auto-forget-enabled
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ prometheus:
- 'ruler-2:8023'
- 'compactor:8006'
- 'query-frontend:8007'
- 'store-gateway-1:8008'
- 'store-gateway-2:8009'
- 'query-scheduler:8011'
- 'query-scheduler:8008'
- 'store-gateway-1:8011'
- 'store-gateway-2:8012'
- 'store-gateway-3:8013'
- 'memcached-exporter:9150'
- 'continuous-test:8090'
- 'load-generator:9900'
labels:
cluster: 'docker-compose'
Expand Down
2 changes: 1 addition & 1 deletion development/mimir-microservices-mode/config/mimir.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ compactor:

store_gateway:
sharding_ring:
replication_factor: 1
replication_factor: 3
heartbeat_period: 5s
heartbeat_timeout: 15s
wait_stability_min_duration: 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,20 @@ receivers:
cluster: 'docker-compose'
namespace: 'mimir-microservices-mode'
container: 'query-frontend'
- job_name: mimir-microservices-mode/store-gateway
- job_name: mimir-microservices-mode/query-scheduler
static_configs:
- targets: ['store-gateway-1:8008', 'store-gateway-2:8009']
- targets: ['query-scheduler:8008']
labels:
cluster: 'docker-compose'
namespace: 'mimir-microservices-mode'
container: 'store-gateway'
- job_name: mimir-microservices-mode/query-scheduler
container: 'query-scheduler'
- job_name: mimir-microservices-mode/store-gateway
static_configs:
- targets: ['query-scheduler:8011']
- targets: ['store-gateway-1:8011', 'store-gateway-2:8012', 'store-gateway-3:8013']
labels:
cluster: 'docker-compose'
namespace: 'mimir-microservices-mode'
container: 'query-scheduler'
container: 'store-gateway'
- job_name: mimir-microservices-mode/memcached-exporter
static_configs:
- targets: ['memcached-exporter:9150']
Expand Down
9 changes: 6 additions & 3 deletions development/mimir-microservices-mode/config/prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@ scrape_configs:
- 'ruler-2:8023'
- 'compactor:8006'
- 'query-frontend:8007'
- 'store-gateway-1:8008'
- 'store-gateway-2:8009'
- 'query-scheduler:8011'
- 'query-scheduler:8008'
- 'store-gateway-1:8011'
- 'store-gateway-2:8012'
- 'store-gateway-3:8013'
- 'memcached-exporter:9150'
- 'continuous-test:8090'
- 'load-generator:9900'
labels:
cluster: 'docker-compose'
namespace: 'mimir-microservices-mode'
Expand Down
28 changes: 11 additions & 17 deletions development/mimir-microservices-mode/docker-compose.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ std.manifestYamlDoc({
self.distributor +
self.ingesters +
self.read_components + // Querier, Frontend and query-scheduler, if enabled.
self.store_gateways +
self.store_gateways(3) +
self.compactor +
self.rulers(2) +
self.alertmanagers(3) +
Expand Down Expand Up @@ -113,7 +113,7 @@ std.manifestYamlDoc({
httpPort: 8005,
extraArguments:
// Use of scheduler is activated by `-querier.scheduler-address` option and setting -querier.frontend-address option to nothing.
if $._config.use_query_scheduler then '-querier.scheduler-address=query-scheduler:9011 -querier.frontend-address=' else '',
if $._config.use_query_scheduler then '-querier.scheduler-address=query-scheduler:9008 -querier.frontend-address=' else '',
}),

'query-frontend': mimirService({
Expand All @@ -124,14 +124,14 @@ std.manifestYamlDoc({
extraArguments:
'-query-frontend.max-total-query-length=8760h' +
// Use of scheduler is activated by `-query-frontend.scheduler-address` option.
(if $._config.use_query_scheduler then ' -query-frontend.scheduler-address=query-scheduler:9011' else ''),
(if $._config.use_query_scheduler then ' -query-frontend.scheduler-address=query-scheduler:9008' else ''),
}),
} + (
if $._config.use_query_scheduler then {
'query-scheduler': mimirService({
name: 'query-scheduler',
target: 'query-scheduler',
httpPort: 8011,
httpPort: 8008,
extraArguments: '-query-frontend.max-total-query-length=8760h',
}),
} else {}
Expand Down Expand Up @@ -167,20 +167,14 @@ std.manifestYamlDoc({
for id in std.range(1, count)
},

store_gateways:: {
'store-gateway-1': mimirService({
name: 'store-gateway-1',
store_gateways(count):: {
['store-gateway-%d' % id]: mimirService({
name: 'store-gateway-' + id,
target: 'store-gateway',
httpPort: 8008,
jaegerApp: 'store-gateway-1',
}),

'store-gateway-2': mimirService({
name: 'store-gateway-2',
target: 'store-gateway',
httpPort: 8009,
jaegerApp: 'store-gateway-2',
}),
httpPort: 8010 + id,
jaegerApp: 'store-gateway-%d' % id,
})
for id in std.range(1, count)
},

continuous_test:: {
Expand Down
Loading