Skip to content

Commit d178f4c

Browse files
feat: Implement WAL segment ingestion via Kafka with partition ring (#14043)
Co-authored-by: George Robinson <[email protected]>
1 parent 55e374e commit d178f4c

File tree

105 files changed

+23877
-629
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+23877
-629
lines changed

docs/sources/shared/configuration.md

+98-93
Original file line numberDiff line numberDiff line change
@@ -1031,74 +1031,56 @@ metastore_client:
10311031
# Configures the gRPC client used to communicate with the metastore.
10321032
[grpc_client_config: <grpc_client>]
10331033

1034-
partition_ring:
1035-
# The key-value store used to share the hash ring across multiple instances.
1036-
# This option needs be set on ingesters, distributors, queriers, and rulers
1037-
# when running in microservices mode.
1038-
kvstore:
1039-
# Backend storage to use for the ring. Supported values are: consul, etcd,
1040-
# inmemory, memberlist, multi.
1041-
# CLI flag: -ingester.partition-ring.store
1042-
[store: <string> | default = "memberlist"]
1043-
1044-
# The prefix for the keys in the store. Should end with a /.
1045-
# CLI flag: -ingester.partition-ring.prefix
1046-
[prefix: <string> | default = "collectors/"]
1047-
1048-
# Configuration for a Consul client. Only applies if the selected kvstore is
1049-
# consul.
1050-
# The CLI flags prefix for this block configuration is:
1051-
# ingester.partition-ring.consul
1052-
[consul: <consul>]
1053-
1054-
# Configuration for an ETCD v3 client. Only applies if the selected kvstore
1055-
# is etcd.
1056-
# The CLI flags prefix for this block configuration is:
1057-
# ingester.partition-ring.etcd
1058-
[etcd: <etcd>]
1059-
1060-
multi:
1061-
# Primary backend storage used by multi-client.
1062-
# CLI flag: -ingester.partition-ring.multi.primary
1063-
[primary: <string> | default = ""]
1064-
1065-
# Secondary backend storage used by multi-client.
1066-
# CLI flag: -ingester.partition-ring.multi.secondary
1067-
[secondary: <string> | default = ""]
1068-
1069-
# Mirror writes to secondary store.
1070-
# CLI flag: -ingester.partition-ring.multi.mirror-enabled
1071-
[mirror_enabled: <boolean> | default = false]
1072-
1073-
# Timeout for storing value to secondary store.
1074-
# CLI flag: -ingester.partition-ring.multi.mirror-timeout
1075-
[mirror_timeout: <duration> | default = 2s]
1076-
1077-
# Minimum number of owners to wait before a PENDING partition gets switched to
1078-
# ACTIVE.
1079-
# CLI flag: -ingester.partition-ring.min-partition-owners-count
1080-
[min_partition_owners_count: <int> | default = 1]
1081-
1082-
# How long the minimum number of owners are enforced before a PENDING
1083-
# partition gets switched to ACTIVE.
1084-
# CLI flag: -ingester.partition-ring.min-partition-owners-duration
1085-
[min_partition_owners_duration: <duration> | default = 10s]
1086-
1087-
# How long to wait before an INACTIVE partition is eligible for deletion. The
1088-
# partition is deleted only if it has been in INACTIVE state for at least the
1089-
# configured duration and it has no owners registered. A value of 0 disables
1090-
# partitions deletion.
1091-
# CLI flag: -ingester.partition-ring.delete-inactive-partition-after
1092-
[delete_inactive_partition_after: <duration> | default = 13h]
1093-
10941034
kafka_config:
1095-
# the kafka endpoint to connect to
1096-
# CLI flag: -address
1035+
# The Kafka backend address.
1036+
# CLI flag: -kafka.address
10971037
[address: <string> | default = "localhost:9092"]
10981038

10991039
# The Kafka topic name.
1100-
# CLI flag: -.topic
1101-
[topic: <string> | default = "loki.push"]
1040+
# CLI flag: -kafka.topic
1041+
[topic: <string> | default = ""]
1042+
1043+
# The Kafka client ID.
1044+
# CLI flag: -kafka.client-id
1045+
[client_id: <string> | default = ""]
1046+
1047+
# The maximum time allowed to open a connection to a Kafka broker.
1048+
# CLI flag: -kafka.dial-timeout
1049+
[dial_timeout: <duration> | default = 2s]
1050+
1051+
# How long to wait for an incoming write request to be successfully committed
1052+
# to the Kafka backend.
1053+
# CLI flag: -kafka.write-timeout
1054+
[write_timeout: <duration> | default = 10s]
1055+
1056+
# The consumer group used by the consumer to track the last consumed offset.
1057+
# The consumer group must be different for each ingester. If the configured
1058+
# consumer group contains the '<partition>' placeholder, it is replaced with
1059+
# the actual partition ID owned by the ingester. When empty (recommended),
1060+
# Mimir uses the ingester instance ID to guarantee uniqueness.
1061+
# CLI flag: -kafka.consumer-group
1062+
[consumer_group: <string> | default = ""]
1063+
1064+
# How long to retry a failed request to get the last produced offset.
1065+
# CLI flag: -kafka.last-produced-offset-retry-timeout
1066+
[last_produced_offset_retry_timeout: <duration> | default = 10s]
1067+
1068+
# Enable auto-creation of Kafka topic if it doesn't exist.
1069+
# CLI flag: -kafka.auto-create-topic-enabled
1070+
[auto_create_topic_enabled: <boolean> | default = true]
1071+
1072+
# The maximum size of a Kafka record data that should be generated by the
1073+
# producer. An incoming write request larger than this size is split into
1074+
# multiple Kafka records. We strongly recommend to not change this setting
1075+
# unless for testing purposes.
1076+
# CLI flag: -kafka.producer-max-record-size-bytes
1077+
[producer_max_record_size_bytes: <int> | default = 15983616]
1078+
1079+
# The maximum size of (uncompressed) buffered and unacknowledged produced
1080+
# records sent to Kafka. The produce request fails once this limit is reached.
1081+
# This limit is per Kafka client. 0 to disable the limit.
1082+
# CLI flag: -kafka.producer-max-buffered-bytes
1083+
[producer_max_buffered_bytes: <int> | default = 1073741824]
11021084

11031085
kafka_ingester:
11041086
# Whether the kafka ingester is enabled.
@@ -1251,46 +1233,75 @@ kafka_ingester:
12511233
# CLI flag: -kafka-ingester.shutdown-marker-path
12521234
[shutdown_marker_path: <string> | default = ""]
12531235

1236+
# The interval at which the ingester will flush and commit offsets to Kafka.
1237+
# If not set, the default flush interval will be used.
1238+
# CLI flag: -kafka-ingester.flush-interval
1239+
[flush_interval: <duration> | default = 15s]
1240+
1241+
# The size at which the ingester will flush and commit offsets to Kafka. If
1242+
# not set, the default flush size will be used.
1243+
# CLI flag: -kafka-ingester.flush-size
1244+
[flush_size: <int> | default = 314572800]
1245+
12541246
partition_ring:
12551247
# The key-value store used to share the hash ring across multiple instances.
12561248
# This option needs be set on ingesters, distributors, queriers, and rulers
12571249
# when running in microservices mode.
12581250
kvstore:
1259-
[store: <string> | default = ""]
1251+
# Backend storage to use for the ring. Supported values are: consul, etcd,
1252+
# inmemory, memberlist, multi.
1253+
# CLI flag: -ingester.partition-ring.store
1254+
[store: <string> | default = "memberlist"]
12601255

1261-
[prefix: <string> | default = ""]
1256+
# The prefix for the keys in the store. Should end with a /.
1257+
# CLI flag: -ingester.partition-ring.prefix
1258+
[prefix: <string> | default = "collectors/"]
12621259

12631260
# Configuration for a Consul client. Only applies if the selected kvstore
12641261
# is consul.
12651262
# The CLI flags prefix for this block configuration is:
1266-
# common.storage.ring.consul
1263+
# ingester.partition-ring.consul
12671264
[consul: <consul>]
12681265

12691266
# Configuration for an ETCD v3 client. Only applies if the selected
12701267
# kvstore is etcd.
12711268
# The CLI flags prefix for this block configuration is:
1272-
# common.storage.ring.etcd
1269+
# ingester.partition-ring.etcd
12731270
[etcd: <etcd>]
12741271

12751272
multi:
1273+
# Primary backend storage used by multi-client.
1274+
# CLI flag: -ingester.partition-ring.multi.primary
12761275
[primary: <string> | default = ""]
12771276

1277+
# Secondary backend storage used by multi-client.
1278+
# CLI flag: -ingester.partition-ring.multi.secondary
12781279
[secondary: <string> | default = ""]
12791280

1280-
[mirror_enabled: <boolean>]
1281-
1282-
[mirror_timeout: <duration>]
1283-
1284-
[min_partition_owners_count: <int>]
1281+
# Mirror writes to secondary store.
1282+
# CLI flag: -ingester.partition-ring.multi.mirror-enabled
1283+
[mirror_enabled: <boolean> | default = false]
12851284

1286-
[min_partition_owners_duration: <duration>]
1285+
# Timeout for storing value to secondary store.
1286+
# CLI flag: -ingester.partition-ring.multi.mirror-timeout
1287+
[mirror_timeout: <duration> | default = 2s]
12871288

1288-
[delete_inactive_partition_after: <duration>]
1289+
# Minimum number of owners to wait before a PENDING partition gets switched
1290+
# to ACTIVE.
1291+
# CLI flag: -ingester.partition-ring.min-partition-owners-count
1292+
[min_partition_owners_count: <int> | default = 1]
12891293

1290-
kafkaconfig:
1291-
[address: <string> | default = ""]
1294+
# How long the minimum number of owners are enforced before a PENDING
1295+
# partition gets switched to ACTIVE.
1296+
# CLI flag: -ingester.partition-ring.min-partition-owners-duration
1297+
[min_partition_owners_duration: <duration> | default = 10s]
12921298

1293-
[topic: <string> | default = ""]
1299+
# How long to wait before an INACTIVE partition is eligible for deletion.
1300+
# The partition is deleted only if it has been in INACTIVE state for at
1301+
# least the configured duration and it has no owners registered. A value of
1302+
# 0 disables partitions deletion.
1303+
# CLI flag: -ingester.partition-ring.delete-inactive-partition-after
1304+
[delete_inactive_partition_after: <duration> | default = 13h]
12941305

12951306
# Configuration for 'runtime config' module, responsible for reloading runtime
12961307
# configuration file.
@@ -2244,10 +2255,14 @@ ring:
22442255
22452256
# Configuration for a Consul client. Only applies if the selected kvstore is
22462257
# consul.
2258+
# The CLI flags prefix for this block configuration is:
2259+
# common.storage.ring.consul
22472260
[consul: <consul>]
22482261
22492262
# Configuration for an ETCD v3 client. Only applies if the selected kvstore
22502263
# is etcd.
2264+
# The CLI flags prefix for this block configuration is:
2265+
# common.storage.ring.etcd
22512266
[etcd: <etcd>]
22522267
22532268
multi:
@@ -3578,26 +3593,16 @@ The `ingester_client` block configures how the distributor will connect to inges
35783593
```yaml
35793594
# Configures how connections are pooled.
35803595
pool_config:
3581-
# How frequently to clean up clients for ingesters that have gone away.
3582-
# CLI flag: -distributor.client-cleanup-period
3583-
[client_cleanup_period: <duration> | default = 15s]
3596+
[client_cleanup_period: <duration>]
35843597
3585-
# Run a health check on each ingester client during periodic cleanup.
3586-
# CLI flag: -distributor.health-check-ingesters
3587-
[health_check_ingesters: <boolean> | default = true]
3598+
[health_check_ingesters: <boolean>]
35883599
3589-
# How quickly a dead client will be removed after it has been detected to
3590-
# disappear. Set this to a value to allow time for a secondary health check to
3591-
# recover the missing client.
3592-
# CLI flag: -ingester.client.healthcheck-timeout
3593-
[remote_timeout: <duration> | default = 1s]
3600+
[remote_timeout: <duration>]
35943601
3595-
# The remote request timeout on the client side.
3596-
# CLI flag: -ingester.client.timeout
3597-
[remote_timeout: <duration> | default = 5s]
3602+
[remote_timeout: <duration>]
35983603
35993604
# Configures how the gRPC connection to ingesters work as a client.
3600-
# The CLI flags prefix for this block configuration is: ingester.client
3605+
# The CLI flags prefix for this block configuration is: ingester-rf1.client
36013606
[grpc_client_config: <grpc_client>]
36023607
```
36033608

go.mod

+6-3
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ require (
139139
github.com/shirou/gopsutil/v4 v4.24.0-alpha.1
140140
github.com/thanos-io/objstore v0.0.0-20240818203309-0363dadfdfb1
141141
github.com/twmb/franz-go v1.17.1
142+
github.com/twmb/franz-go/pkg/kadm v1.13.0
143+
github.com/twmb/franz-go/pkg/kfake v0.0.0-20240821035758-b77dd13e2bfa
144+
github.com/twmb/franz-go/pkg/kmsg v1.8.0
145+
github.com/twmb/franz-go/plugin/kotel v1.5.0
142146
github.com/twmb/franz-go/plugin/kprom v1.1.0
143147
github.com/willf/bloom v2.0.3+incompatible
144148
go.opentelemetry.io/collector/pdata v1.12.0
@@ -178,7 +182,6 @@ require (
178182
github.com/shoenig/go-m1cpu v0.1.6 // indirect
179183
github.com/tklauser/go-sysconf v0.3.12 // indirect
180184
github.com/tklauser/numcpus v0.6.1 // indirect
181-
github.com/twmb/franz-go/pkg/kmsg v1.8.0 // indirect
182185
github.com/yusufpapurcu/wmi v1.2.4 // indirect
183186
go.opentelemetry.io/otel/sdk v1.28.0 // indirect
184187
go.opentelemetry.io/otel/sdk/metric v1.28.0 // indirect
@@ -347,9 +350,9 @@ require (
347350
go.opentelemetry.io/collector/semconv v0.105.0 // indirect
348351
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.53.0 // indirect
349352
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
350-
go.opentelemetry.io/otel v1.28.0 // indirect
353+
go.opentelemetry.io/otel v1.28.0
351354
go.opentelemetry.io/otel/metric v1.28.0 // indirect
352-
go.opentelemetry.io/otel/trace v1.28.0 // indirect
355+
go.opentelemetry.io/otel/trace v1.28.0
353356
go.uber.org/multierr v1.11.0 // indirect
354357
go.uber.org/zap v1.21.0 // indirect
355358
golang.org/x/mod v0.19.0 // indirect

go.sum

+6
Original file line numberDiff line numberDiff line change
@@ -1816,8 +1816,14 @@ github.com/ttacon/chalk v0.0.0-20160626202418-22c06c80ed31/go.mod h1:onvgF043R+l
18161816
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
18171817
github.com/twmb/franz-go v1.17.1 h1:0LwPsbbJeJ9R91DPUHSEd4su82WJWcTY1Zzbgbg4CeQ=
18181818
github.com/twmb/franz-go v1.17.1/go.mod h1:NreRdJ2F7dziDY/m6VyspWd6sNxHKXdMZI42UfQ3GXM=
1819+
github.com/twmb/franz-go/pkg/kadm v1.13.0 h1:bJq4C2ZikUE2jh/wl9MtMTQ/kpmnBgVFh8XMQBEC+60=
1820+
github.com/twmb/franz-go/pkg/kadm v1.13.0/go.mod h1:VMvpfjz/szpH9WB+vGM+rteTzVv0djyHFimci9qm2C0=
1821+
github.com/twmb/franz-go/pkg/kfake v0.0.0-20240821035758-b77dd13e2bfa h1:OmQ4DJhqeOPdIH60Psut1vYU8A6LGyxJbF09w5RAa2w=
1822+
github.com/twmb/franz-go/pkg/kfake v0.0.0-20240821035758-b77dd13e2bfa/go.mod h1:nkBI/wGFp7t1NJnnCeJdS4sX5atPAqwCPpDXKuI7SC8=
18191823
github.com/twmb/franz-go/pkg/kmsg v1.8.0 h1:lAQB9Z3aMrIP9qF9288XcFf/ccaSxEitNA1CDTEIeTA=
18201824
github.com/twmb/franz-go/pkg/kmsg v1.8.0/go.mod h1:HzYEb8G3uu5XevZbtU0dVbkphaKTHk0X68N5ka4q6mU=
1825+
github.com/twmb/franz-go/plugin/kotel v1.5.0 h1:TiPfGUbQK384OO7ZYGdo7JuPCbJn+/8njQ/D9Je9CDE=
1826+
github.com/twmb/franz-go/plugin/kotel v1.5.0/go.mod h1:wRXzRo76x1myOUMaVHAyraXoGBdEcvlLChGTVv5+DWU=
18211827
github.com/twmb/franz-go/plugin/kprom v1.1.0 h1:grGeIJbm4llUBF8jkDjTb/b8rKllWSXjMwIqeCCcNYQ=
18221828
github.com/twmb/franz-go/plugin/kprom v1.1.0/go.mod h1:cTDrPMSkyrO99LyGx3AtiwF9W6+THHjZrkDE2+TEBIU=
18231829
github.com/uber-go/atomic v1.3.2/go.mod h1:/Ct5t2lcmbJ4OSe/waGBoaVvVqtO0bmtfVNex1PFV8g=

0 commit comments

Comments
 (0)