Skip to content

Commit 3195036

Browse files
authored
refactor(bloom planner): Compute gaps and build tasks from metas and TSDBs (#12994)
1 parent 7a3338e commit 3195036

File tree

14 files changed

+1568
-10
lines changed

14 files changed

+1568
-10
lines changed

docs/sources/shared/configuration.md

+27
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,23 @@ bloom_build:
333333
[enabled: <boolean> | default = false]
334334

335335
planner:
336+
# Interval at which to re-run the bloom creation planning.
337+
# CLI flag: -bloom-build.planner.interval
338+
[planning_interval: <duration> | default = 8h]
339+
340+
# Newest day-table offset (from today, inclusive) to build blooms for.
341+
# Increase to lower cost by not re-writing data to object storage too
342+
# frequently since recent data changes more often at the cost of not having
343+
# blooms available as quickly.
344+
# CLI flag: -bloom-build.planner.min-table-offset
345+
[min_table_offset: <int> | default = 1]
346+
347+
# Oldest day-table offset (from today, inclusive) to compact. This can be
348+
# used to lower cost by not trying to compact older data which doesn't
349+
# change. This can be optimized by aligning it with the maximum
350+
# `reject_old_samples_max_age` setting of any tenant.
351+
# CLI flag: -bloom-build.planner.max-table-offset
352+
[max_table_offset: <int> | default = 2]
336353

337354
builder:
338355

@@ -3382,6 +3399,16 @@ shard_streams:
33823399
# CLI flag: -bloom-compactor.max-bloom-size
33833400
[bloom_compactor_max_bloom_size: <int> | default = 128MB]
33843401

3402+
# Experimental. Whether to create blooms for the tenant.
3403+
# CLI flag: -bloom-build.enable
3404+
[bloom_creation_enabled: <boolean> | default = false]
3405+
3406+
# Experimental. Number of splits to create for the series keyspace when building
3407+
# blooms. The series keyspace is split into this many parts to parallelize bloom
3408+
# creation.
3409+
# CLI flag: -bloom-build.split-keyspace-by
3410+
[bloom_split_series_keyspace_by: <int> | default = 256]
3411+
33853412
# Experimental. Length of the n-grams created when computing blooms from log
33863413
# lines.
33873414
# CLI flag: -bloom-compactor.ngram-length

pkg/bloombuild/planner/config.go

+24-5
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,40 @@
11
package planner
22

3-
import "flag"
3+
import (
4+
"flag"
5+
"fmt"
6+
"time"
7+
)
48

59
// Config configures the bloom-planner component.
610
type Config struct {
7-
// TODO: Add config
11+
PlanningInterval time.Duration `yaml:"planning_interval"`
12+
MinTableOffset int `yaml:"min_table_offset"`
13+
MaxTableOffset int `yaml:"max_table_offset"`
814
}
915

1016
// RegisterFlagsWithPrefix registers flags for the bloom-planner configuration.
11-
func (cfg *Config) RegisterFlagsWithPrefix(_ string, _ *flag.FlagSet) {
12-
// TODO: Register flags with flagsPrefix
17+
func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
18+
f.DurationVar(&cfg.PlanningInterval, prefix+".interval", 8*time.Hour, "Interval at which to re-run the bloom creation planning.")
19+
f.IntVar(&cfg.MinTableOffset, prefix+".min-table-offset", 1, "Newest day-table offset (from today, inclusive) to build blooms for. Increase to lower cost by not re-writing data to object storage too frequently since recent data changes more often at the cost of not having blooms available as quickly.")
20+
// TODO(owen-d): ideally we'd set this per tenant based on their `reject_old_samples_max_age` setting,
21+
// but due to how we need to discover tenants, we can't do that yet. Tenant+Period discovery is done by
22+
// iterating the table periods in object storage and looking for tenants within that period.
23+
// In order to have this done dynamically, we'd need to account for tenant specific overrides, which are also
24+
// dynamically reloaded.
25+
// I'm doing it the simple way for now.
26+
f.IntVar(&cfg.MaxTableOffset, prefix+".max-table-offset", 2, "Oldest day-table offset (from today, inclusive) to compact. This can be used to lower cost by not trying to compact older data which doesn't change. This can be optimized by aligning it with the maximum `reject_old_samples_max_age` setting of any tenant.")
1327
}
1428

1529
func (cfg *Config) Validate() error {
30+
if cfg.MinTableOffset > cfg.MaxTableOffset {
31+
return fmt.Errorf("min-table-offset (%d) must be less than or equal to max-table-offset (%d)", cfg.MinTableOffset, cfg.MaxTableOffset)
32+
}
33+
1634
return nil
1735
}
1836

1937
type Limits interface {
20-
// TODO: Add limits
38+
BloomCreationEnabled(tenantID string) bool
39+
BloomSplitSeriesKeyspaceBy(tenantID string) int
2140
}

pkg/bloombuild/planner/metrics.go

+36
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,19 @@ import (
88
const (
99
metricsNamespace = "loki"
1010
metricsSubsystem = "bloomplanner"
11+
12+
statusSuccess = "success"
13+
statusFailure = "failure"
1114
)
1215

1316
type Metrics struct {
1417
running prometheus.Gauge
18+
19+
buildStarted prometheus.Counter
20+
buildCompleted *prometheus.CounterVec
21+
buildTime *prometheus.HistogramVec
22+
23+
tenantsDiscovered prometheus.Counter
1524
}
1625

1726
func NewMetrics(r prometheus.Registerer) *Metrics {
@@ -22,5 +31,32 @@ func NewMetrics(r prometheus.Registerer) *Metrics {
2231
Name: "running",
2332
Help: "Value will be 1 if bloom planner is currently running on this instance",
2433
}),
34+
35+
buildStarted: promauto.With(r).NewCounter(prometheus.CounterOpts{
36+
Namespace: metricsNamespace,
37+
Subsystem: metricsSubsystem,
38+
Name: "build_started_total",
39+
Help: "Total number of builds started",
40+
}),
41+
buildCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
42+
Namespace: metricsNamespace,
43+
Subsystem: metricsSubsystem,
44+
Name: "build_completed_total",
45+
Help: "Total number of builds completed",
46+
}, []string{"status"}),
47+
buildTime: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
48+
Namespace: metricsNamespace,
49+
Subsystem: metricsSubsystem,
50+
Name: "build_time_seconds",
51+
Help: "Time spent during a builds cycle.",
52+
Buckets: prometheus.DefBuckets,
53+
}, []string{"status"}),
54+
55+
tenantsDiscovered: promauto.With(r).NewCounter(prometheus.CounterOpts{
56+
Namespace: metricsNamespace,
57+
Subsystem: metricsSubsystem,
58+
Name: "tenants_discovered_total",
59+
Help: "Number of tenants discovered during the current build iteration",
60+
}),
2561
}
2662
}

0 commit comments

Comments
 (0)