Skip to content

Commit d0a0bad

Browse files
committed
Smooth out chunk flush operations
In order to prevent spikes in chunk flush operations every flush check period, we smooth out the flush operations over the period of the interval. Signed-off-by: Christian Haudum <[email protected]>
1 parent 9b62fe7 commit d0a0bad

File tree

2 files changed

+34
-3
lines changed

2 files changed

+34
-3
lines changed

pkg/ingester/flush.go

+29-3
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ import (
88
"time"
99

1010
"github.com/go-kit/log/level"
11+
"github.com/grafana/dskit/tenant"
1112
"github.com/prometheus/client_golang/prometheus"
1213
"github.com/prometheus/common/model"
1314
"github.com/prometheus/prometheus/model/labels"
1415
"github.com/weaveworks/common/user"
1516
"golang.org/x/net/context"
16-
17-
"github.com/grafana/dskit/tenant"
17+
"golang.org/x/time/rate"
1818

1919
"github.com/grafana/loki/pkg/chunkenc"
2020
"github.com/grafana/loki/pkg/storage/chunk"
@@ -27,6 +27,9 @@ const (
2727
// position, not wallclock time.
2828
flushBackoff = 1 * time.Second
2929

30+
// Lower bound on flushes per check period for rate-limiter
31+
minFlushes = 100
32+
3033
nameLabel = "__name__"
3134
logsValue = "logs"
3235

@@ -87,13 +90,14 @@ func (o *flushOp) Priority() int64 {
8790
return -int64(o.from)
8891
}
8992

90-
// sweepUsers periodically schedules series for flushing and garbage collects users with no series
93+
// sweepUsers periodically schedules series for flushing and garbage collects users with no streams
9194
func (i *Ingester) sweepUsers(immediate, mayRemoveStreams bool) {
9295
instances := i.getInstances()
9396

9497
for _, instance := range instances {
9598
i.sweepInstance(instance, immediate, mayRemoveStreams)
9699
}
100+
i.setFlushRate()
97101
}
98102

99103
func (i *Ingester) sweepInstance(instance *instance, immediate, mayRemoveStreams bool) {
@@ -125,6 +129,24 @@ func (i *Ingester) sweepStream(instance *instance, stream *stream, immediate boo
125129
})
126130
}
127131

132+
// Compute a rate such to spread calls to the store over nearly all of the flush period,
133+
// for example if we have 600 items in the queue and period 1 min we will send 10.5 per second.
134+
// Note if the store can't keep up with this rate then it doesn't make any difference.
135+
func (i *Ingester) setFlushRate() {
136+
totalQueueLength := 0
137+
for _, q := range i.flushQueues {
138+
totalQueueLength += q.Length()
139+
}
140+
const fudge = 1.05 // aim to finish a little bit before the end of the period
141+
flushesPerSecond := float64(totalQueueLength) / i.cfg.FlushCheckPeriod.Seconds() * fudge
142+
// Avoid going very slowly with tiny queues
143+
if flushesPerSecond*i.cfg.FlushCheckPeriod.Seconds() < minFlushes {
144+
flushesPerSecond = minFlushes / i.cfg.FlushCheckPeriod.Seconds()
145+
}
146+
level.Debug(util_log.Logger).Log("msg", "computed flush rate", "rate", flushesPerSecond)
147+
i.flushRateLimiter.SetLimit(rate.Limit(flushesPerSecond))
148+
}
149+
128150
func (i *Ingester) flushLoop(j int) {
129151
defer func() {
130152
level.Debug(util_log.Logger).Log("msg", "Ingester.flushLoop() exited")
@@ -138,6 +160,10 @@ func (i *Ingester) flushLoop(j int) {
138160
}
139161
op := o.(*flushOp)
140162

163+
if !op.immediate {
164+
_ = i.flushRateLimiter.Wait(context.Background())
165+
}
166+
141167
err := i.flushUserSeries(op.userID, op.fp, op.immediate)
142168
if err != nil {
143169
level.Error(util_log.WithUserID(op.userID, util_log.Logger)).Log("msg", "failed to flush", "err", err)

pkg/ingester/ingester.go

+5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"github.com/prometheus/client_golang/prometheus/promauto"
2525
"github.com/prometheus/common/model"
2626
"github.com/prometheus/prometheus/model/labels"
27+
"golang.org/x/time/rate"
2728
"google.golang.org/grpc/health/grpc_health_v1"
2829

2930
"github.com/grafana/loki/pkg/analytics"
@@ -222,6 +223,9 @@ type Ingester struct {
222223
flushQueues []*util.PriorityQueue
223224
flushQueuesDone sync.WaitGroup
224225

226+
// Spread out calls to the chunk store over the flush period
227+
flushRateLimiter *rate.Limiter
228+
225229
limiter *Limiter
226230

227231
// Denotes whether the ingester should flush on shutdown.
@@ -268,6 +272,7 @@ func New(cfg Config, clientConfig client.Config, store ChunkStore, limits Limits
268272
periodicConfigs: store.GetSchemaConfigs(),
269273
loopQuit: make(chan struct{}),
270274
flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes),
275+
flushRateLimiter: rate.NewLimiter(rate.Inf, 1),
271276
tailersQuit: make(chan struct{}),
272277
metrics: metrics,
273278
flushOnShutdownSwitch: &OnceSwitch{},

0 commit comments

Comments
 (0)