-
Notifications
You must be signed in to change notification settings - Fork 55
/
Copy pathtsdb.libsonnet
290 lines (249 loc) · 14.6 KB
/
tsdb.libsonnet
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
{
local pvc = $.core.v1.persistentVolumeClaim,
local volumeMount = $.core.v1.volumeMount,
local container = $.core.v1.container,
local statefulSet = $.apps.v1.statefulSet,
local service = $.core.v1.service,
_config+:: {
// Enforce blocks storage
storage_backend: 'none',
storage_engine: 'blocks',
// Allow to configure the ingester disk.
cortex_ingester_data_disk_size: '100Gi',
cortex_ingester_data_disk_class: 'fast',
// Allow to configure the store-gateway disk.
cortex_store_gateway_data_disk_size: '50Gi',
cortex_store_gateway_data_disk_class: 'standard',
// Allow to configure the compactor disk.
cortex_compactor_data_disk_size: '250Gi',
cortex_compactor_data_disk_class: 'standard',
// Allow to fine tune compactor.
cortex_compactor_max_concurrency: 1,
// While this is the default value, we want to pass the same to the -blocks-storage.bucket-store.sync-interval
cortex_compactor_cleanup_interval: '15m',
// Enable use of bucket index by querier, ruler and store-gateway.
// Bucket index is generated by compactor from Cortex 1.7, there is no flag required to enable this on compactor.
cortex_bucket_index_enabled: false,
},
blocks_chunks_caching_config::
(
if $._config.memcached_index_queries_enabled then {
'blocks-storage.bucket-store.index-cache.backend': 'memcached',
'blocks-storage.bucket-store.index-cache.memcached.addresses': 'dnssrvnoa+memcached-index-queries.%(namespace)s.svc.cluster.local:11211' % $._config,
'blocks-storage.bucket-store.index-cache.memcached.timeout': '200ms',
'blocks-storage.bucket-store.index-cache.memcached.max-item-size': $._config.memcached_index_queries_max_item_size_mb * 1024 * 1024,
'blocks-storage.bucket-store.index-cache.memcached.max-async-buffer-size': '25000',
'blocks-storage.bucket-store.index-cache.memcached.max-async-concurrency': '50',
'blocks-storage.bucket-store.index-cache.memcached.max-get-multi-batch-size': '100',
} else {}
) + (
if $._config.memcached_chunks_enabled then {
'blocks-storage.bucket-store.chunks-cache.backend': 'memcached',
'blocks-storage.bucket-store.chunks-cache.memcached.addresses': 'dnssrvnoa+memcached.%(namespace)s.svc.cluster.local:11211' % $._config,
'blocks-storage.bucket-store.chunks-cache.memcached.timeout': '200ms',
'blocks-storage.bucket-store.chunks-cache.memcached.max-item-size': $._config.memcached_chunks_max_item_size_mb * 1024 * 1024,
'blocks-storage.bucket-store.chunks-cache.memcached.max-async-buffer-size': '25000',
'blocks-storage.bucket-store.chunks-cache.memcached.max-async-concurrency': '50',
'blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-batch-size': '100',
} else {}
),
blocks_metadata_caching_config:: if $._config.memcached_metadata_enabled then {
'blocks-storage.bucket-store.metadata-cache.backend': 'memcached',
'blocks-storage.bucket-store.metadata-cache.memcached.addresses': 'dnssrvnoa+memcached-metadata.%(namespace)s.svc.cluster.local:11211' % $._config,
'blocks-storage.bucket-store.metadata-cache.memcached.timeout': '200ms',
'blocks-storage.bucket-store.metadata-cache.memcached.max-item-size': $._config.memcached_metadata_max_item_size_mb * 1024 * 1024,
'blocks-storage.bucket-store.metadata-cache.memcached.max-async-buffer-size': '25000',
'blocks-storage.bucket-store.metadata-cache.memcached.max-async-concurrency': '50',
'blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-batch-size': '100',
} else {},
bucket_index_config:: if $._config.cortex_bucket_index_enabled then {
'blocks-storage.bucket-store.bucket-index.enabled': true,
// Bucket index is updated by compactor on each cleanup cycle.
'blocks-storage.bucket-store.sync-interval': $._config.cortex_compactor_cleanup_interval,
} else {},
querier_args+:: $._config.queryBlocksStorageConfig + $.blocks_metadata_caching_config + $.bucket_index_config,
ruler_args+:: $._config.queryBlocksStorageConfig + $.blocks_metadata_caching_config + $.bucket_index_config,
// The ingesters should persist TSDB blocks and WAL on a persistent
// volume in order to be crash resilient.
local ingester_data_pvc =
pvc.new() +
pvc.mixin.spec.resources.withRequests({ storage: $._config.cortex_ingester_data_disk_size }) +
pvc.mixin.spec.withAccessModes(['ReadWriteOnce']) +
pvc.mixin.spec.withStorageClassName($._config.cortex_ingester_data_disk_class) +
pvc.mixin.metadata.withName('ingester-data'),
ingester_deployment: {},
ingester_args+:: {
'blocks-storage.tsdb.dir': '/data/tsdb',
'blocks-storage.tsdb.block-ranges-period': '2h',
'blocks-storage.tsdb.retention-period': '96h', // 4 days protection against blocks not being uploaded from ingesters.
'blocks-storage.tsdb.ship-interval': '1m',
// Disable TSDB blocks transfer because of persistent volumes
'ingester.max-transfer-retries': 0,
'ingester.join-after': '0s',
// Persist ring tokens so that when the ingester will be restarted
// it will pick the same tokens
'ingester.tokens-file-path': '/data/tokens',
},
newIngesterStatefulSet(name, container, with_anti_affinity=true)::
statefulSet.new(name, 3, [
container + $.core.v1.container.withVolumeMountsMixin([
volumeMount.new('ingester-data', '/data'),
]),
], ingester_data_pvc) +
statefulSet.mixin.spec.withServiceName(name) +
statefulSet.mixin.metadata.withNamespace($._config.namespace) +
statefulSet.mixin.metadata.withLabels({ name: name }) +
statefulSet.mixin.spec.template.metadata.withLabels({ name: name } + $.ingester_deployment_labels) +
statefulSet.mixin.spec.selector.withMatchLabels({ name: name }) +
statefulSet.mixin.spec.template.spec.securityContext.withRunAsUser(0) +
// When the ingester needs to flush blocks to the storage, it may take quite a lot of time.
// For this reason, we grant an high termination period (80 minutes).
statefulSet.mixin.spec.template.spec.withTerminationGracePeriodSeconds(1200) +
statefulSet.mixin.spec.updateStrategy.withType('RollingUpdate') +
$.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex') +
$.util.podPriority('high') +
// Parallelly scale up/down ingester instances instead of starting them
// one by one. This does NOT affect rolling updates: they will continue to be
// rolled out one by one (the next pod will be rolled out once the previous is
// ready).
statefulSet.mixin.spec.withPodManagementPolicy('Parallel') +
(if with_anti_affinity then $.util.antiAffinity else {}),
ingester_statefulset: self.newIngesterStatefulSet('ingester', $.ingester_container),
ingester_service:
$.util.serviceFor($.ingester_statefulset, $.ingester_service_ignored_labels),
// The compactor runs a statefulset with a single replica, because
// it does not support horizontal scalability yet.
local compactor_data_pvc =
pvc.new() +
pvc.mixin.spec.resources.withRequests({ storage: $._config.cortex_compactor_data_disk_size }) +
pvc.mixin.spec.withAccessModes(['ReadWriteOnce']) +
pvc.mixin.spec.withStorageClassName($._config.cortex_compactor_data_disk_class) +
pvc.mixin.metadata.withName('compactor-data'),
compactor_args::
$._config.grpcConfig +
$._config.storageConfig +
$._config.blocksStorageConfig +
$._config.compactorLimitsConfig +
{
target: 'compactor',
// Compactor config.
'compactor.block-ranges': '2h,12h,24h',
'compactor.data-dir': '/data',
'compactor.compaction-interval': '30m',
'compactor.compaction-concurrency': $._config.cortex_compactor_max_concurrency,
'compactor.cleanup-interval': $._config.cortex_compactor_cleanup_interval,
// Enable sharding.
'compactor.sharding-enabled': true,
'compactor.ring.store': 'consul',
'compactor.ring.consul.hostname': 'consul.%s.svc.cluster.local:8500' % $._config.namespace,
'compactor.ring.prefix': '',
// Limits config.
'runtime-config.file': '/etc/cortex/overrides.yaml',
},
compactor_ports:: $.util.defaultPorts,
compactor_container::
container.new('compactor', $._images.compactor) +
container.withPorts($.compactor_ports) +
container.withArgsMixin($.util.mapToFlags($.compactor_args)) +
container.withVolumeMountsMixin([volumeMount.new('compactor-data', '/data')]) +
// Do not limit compactor CPU and request enough cores to honor configured max concurrency.
$.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '6Gi') +
$.util.resourcesLimits(null, '6Gi') +
$.util.readinessProbe +
$.jaeger_mixin,
newCompactorStatefulSet(name, container)::
statefulSet.new(name, 1, [container], compactor_data_pvc) +
statefulSet.mixin.spec.withServiceName(name) +
statefulSet.mixin.metadata.withNamespace($._config.namespace) +
statefulSet.mixin.metadata.withLabels({ name: name }) +
statefulSet.mixin.spec.template.metadata.withLabels({ name: name }) +
statefulSet.mixin.spec.selector.withMatchLabels({ name: name }) +
statefulSet.mixin.spec.template.spec.securityContext.withRunAsUser(0) +
statefulSet.mixin.spec.updateStrategy.withType('RollingUpdate') +
statefulSet.mixin.spec.template.spec.withTerminationGracePeriodSeconds(900) +
// Parallelly scale up/down compactor instances instead of starting them
// one by one. This does NOT affect rolling updates: they will continue to be
// rolled out one by one (the next pod will be rolled out once the previous is
// ready).
statefulSet.mixin.spec.withPodManagementPolicy('Parallel') +
$.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex'),
compactor_statefulset:
$.newCompactorStatefulSet('compactor', $.compactor_container),
// The store-gateway runs a statefulset.
local store_gateway_data_pvc =
pvc.new() +
pvc.mixin.spec.resources.withRequests({ storage: $._config.cortex_store_gateway_data_disk_size }) +
pvc.mixin.spec.withAccessModes(['ReadWriteOnce']) +
pvc.mixin.spec.withStorageClassName($._config.cortex_store_gateway_data_disk_class) +
pvc.mixin.metadata.withName('store-gateway-data'),
store_gateway_args::
$._config.grpcConfig +
$._config.storageConfig +
$._config.blocksStorageConfig +
$._config.queryBlocksStorageConfig +
{
target: 'store-gateway',
'runtime-config.file': '/etc/cortex/overrides.yaml',
// Persist ring tokens so that when the store-gateway will be restarted
// it will pick the same tokens
'store-gateway.sharding-ring.tokens-file-path': '/data/tokens',
// Block index-headers are pre-downloaded but lazy mmaped and loaded at query time.
'blocks-storage.bucket-store.index-header-lazy-loading-enabled': 'true',
'blocks-storage.bucket-store.index-header-lazy-loading-idle-timeout': '60m',
'blocks-storage.bucket-store.max-chunk-pool-bytes': 12 * 1024 * 1024 * 1024,
// We should keep a number of idle connections equal to the max "get" concurrency,
// in order to avoid re-opening connections continuously (this would be slower
// and fill up the conntrack table too).
//
// The downside of this approach is that we'll end up with an higher number of
// active connections to memcached, so we have to make sure connections limit
// set in memcached is high enough.
'blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency': 100,
'blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency': 100,
'blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency': 100,
'blocks-storage.bucket-store.index-cache.memcached.max-idle-connections': $.store_gateway_args['blocks-storage.bucket-store.index-cache.memcached.max-get-multi-concurrency'],
'blocks-storage.bucket-store.chunks-cache.memcached.max-idle-connections': $.store_gateway_args['blocks-storage.bucket-store.chunks-cache.memcached.max-get-multi-concurrency'],
'blocks-storage.bucket-store.metadata-cache.memcached.max-idle-connections': $.store_gateway_args['blocks-storage.bucket-store.metadata-cache.memcached.max-get-multi-concurrency'],
} +
$.blocks_chunks_caching_config +
$.blocks_metadata_caching_config +
$.bucket_index_config,
store_gateway_ports:: $.util.defaultPorts,
store_gateway_container::
container.new('store-gateway', $._images.store_gateway) +
container.withPorts($.store_gateway_ports) +
container.withArgsMixin($.util.mapToFlags($.store_gateway_args)) +
container.withVolumeMountsMixin([volumeMount.new('store-gateway-data', '/data')]) +
$.util.resourcesRequests('1', '12Gi') +
$.util.resourcesLimits(null, '18Gi') +
$.util.readinessProbe +
$.jaeger_mixin,
newStoreGatewayStatefulSet(name, container)::
statefulSet.new(name, 3, [container], store_gateway_data_pvc) +
statefulSet.mixin.spec.withServiceName(name) +
statefulSet.mixin.metadata.withNamespace($._config.namespace) +
statefulSet.mixin.metadata.withLabels({ name: name }) +
statefulSet.mixin.spec.template.metadata.withLabels({ name: name }) +
statefulSet.mixin.spec.selector.withMatchLabels({ name: name }) +
statefulSet.mixin.spec.template.spec.securityContext.withRunAsUser(0) +
statefulSet.mixin.spec.updateStrategy.withType('RollingUpdate') +
statefulSet.mixin.spec.template.spec.withTerminationGracePeriodSeconds(120) +
// Parallelly scale up/down store-gateway instances instead of starting them
// one by one. This does NOT affect rolling updates: they will continue to be
// rolled out one by one (the next pod will be rolled out once the previous is
// ready).
statefulSet.mixin.spec.withPodManagementPolicy('Parallel') +
$.util.configVolumeMount($._config.overrides_configmap, '/etc/cortex'),
store_gateway_statefulset: self.newStoreGatewayStatefulSet('store-gateway', $.store_gateway_container),
store_gateway_service:
$.util.serviceFor($.store_gateway_statefulset),
local podDisruptionBudget = $.policy.v1beta1.podDisruptionBudget,
store_gateway_pdb:
podDisruptionBudget.new() +
podDisruptionBudget.mixin.metadata.withName('store-gateway-pdb') +
podDisruptionBudget.mixin.metadata.withLabels({ name: 'store-gateway-pdb' }) +
podDisruptionBudget.mixin.spec.selector.withMatchLabels({ name: 'store-gateway' }) +
// To avoid any disruption in the read path we need at least 1 replica of each
// block available, so the disruption budget depends on the blocks replication factor.
podDisruptionBudget.mixin.spec.withMaxUnavailable(if $._config.store_gateway_replication_factor > 1 then $._config.store_gateway_replication_factor - 1 else 1),
}