From cc70f09f48d786c98a86e2e25a60eeb29b416fa7 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 16 Dec 2021 10:04:57 +0000 Subject: [PATCH] kvserver: throttle `AddSSTable` requests with `IngestAsWrites` `Store.Send()` limits the number of concurrent `AddSSTable` requests and delays them depending on LSM health via `Engine.PreIngestDelay`, to prevent overwhelming Pebble. However, requests with `IngestAsWrites` were not throttled, which has been seen to cause significant read amplification. This patch subjects `IngestAsWrites` requests to `Engine.PreIngestDelay` as well, and adds a separate limit for `IngestAsWrites` requests controlled via the cluster setting `kv.bulk_io_write.concurrent_addsstable_as_writes_requests` (default 10). Since these requests are generally small, and will end up in the Pebble memtable before being flushed to disk, we can tolerate a larger limit for these requests than regular `AddSSTable` requests (1). Release note (performance improvement): Bulk ingestion of small write batches (e.g. index backfill into a large number of ranges) is now throttled, to avoid buildup of read amplification and associated performance degradation. Concurrency is controlled by the new cluster setting `kv.bulk_io_write.concurrent_addsstable_as_writes_requests`. --- pkg/kv/kvserver/batcheval/eval_context.go | 9 +++++---- pkg/kv/kvserver/store.go | 24 +++++++++++++++++++++-- pkg/kv/kvserver/store_send.go | 9 +++------ 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/pkg/kv/kvserver/batcheval/eval_context.go b/pkg/kv/kvserver/batcheval/eval_context.go index 87dd73bcef97..86e3e1240c7a 100644 --- a/pkg/kv/kvserver/batcheval/eval_context.go +++ b/pkg/kv/kvserver/batcheval/eval_context.go @@ -34,10 +34,11 @@ import ( // Limiters is the collection of per-store limits used during cmd evaluation. type Limiters struct { - BulkIOWriteRate *rate.Limiter - ConcurrentImportRequests limit.ConcurrentRequestLimiter - ConcurrentExportRequests limit.ConcurrentRequestLimiter - ConcurrentAddSSTableRequests limit.ConcurrentRequestLimiter + BulkIOWriteRate *rate.Limiter + ConcurrentImportRequests limit.ConcurrentRequestLimiter + ConcurrentExportRequests limit.ConcurrentRequestLimiter + ConcurrentAddSSTableRequests limit.ConcurrentRequestLimiter + ConcurrentAddSSTableAsWritesRequests limit.ConcurrentRequestLimiter // concurrentRangefeedIters is a semaphore used to limit the number of // rangefeeds in the "catch-up" state across the store. The "catch-up" state // is a temporary state at the beginning of a rangefeed which is expensive diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 5583992c1b4e..0fc188de0933 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -133,11 +133,23 @@ var importRequestsLimit = settings.RegisterIntSetting( // addSSTableRequestLimit limits concurrent AddSSTable requests. var addSSTableRequestLimit = settings.RegisterIntSetting( "kv.bulk_io_write.concurrent_addsstable_requests", - "number of AddSSTable requests a store will handle concurrently before queuing", + "number of concurrent AddSSTable requests per store before queueing", 1, settings.PositiveInt, ) +// addSSTableAsWritesRequestLimit limits concurrent AddSSTable requests with +// IngestAsWrites set. These are smaller (kv.bulk_io_write.small_write_size), +// and will end up in the Pebble memtable (default 64 MB) before flushing to +// disk, so we can allow a greater amount of concurrency than regular AddSSTable +// requests. Applied independently of concurrent_addsstable_requests. +var addSSTableAsWritesRequestLimit = settings.RegisterIntSetting( + "kv.bulk_io_write.concurrent_addsstable_as_writes_requests", + "number of concurrent AddSSTable requests ingested as writes per store before queueing", + 10, + settings.PositiveInt, +) + // concurrentRangefeedItersLimit limits concurrent rangefeed catchup iterators. var concurrentRangefeedItersLimit = settings.RegisterIntSetting( "kv.rangefeed.concurrent_catchup_iterators", @@ -892,7 +904,15 @@ func NewStore( "addSSTableRequestLimiter", int(addSSTableRequestLimit.Get(&cfg.Settings.SV)), ) addSSTableRequestLimit.SetOnChange(&cfg.Settings.SV, func() { - s.limiters.ConcurrentAddSSTableRequests.SetLimit(int(addSSTableRequestLimit.Get(&cfg.Settings.SV))) + s.limiters.ConcurrentAddSSTableRequests.SetLimit( + int(addSSTableRequestLimit.Get(&cfg.Settings.SV))) + }) + s.limiters.ConcurrentAddSSTableAsWritesRequests = limit.MakeConcurrentRequestLimiter( + "addSSTableAsWritesRequestLimiter", int(addSSTableAsWritesRequestLimit.Get(&cfg.Settings.SV)), + ) + addSSTableAsWritesRequestLimit.SetOnChange(&cfg.Settings.SV, func() { + s.limiters.ConcurrentAddSSTableAsWritesRequests.SetLimit( + int(addSSTableAsWritesRequestLimit.Get(&cfg.Settings.SV))) }) s.limiters.ConcurrentRangefeedIters = limit.MakeConcurrentRequestLimiter( "rangefeedIterLimiter", int(concurrentRangefeedItersLimit.Get(&cfg.Settings.SV)), diff --git a/pkg/kv/kvserver/store_send.go b/pkg/kv/kvserver/store_send.go index 84bf9d5d6ced..8f24554176f0 100644 --- a/pkg/kv/kvserver/store_send.go +++ b/pkg/kv/kvserver/store_send.go @@ -263,15 +263,12 @@ func (s *Store) maybeThrottleBatch( switch t := ba.Requests[0].GetInner().(type) { case *roachpb.AddSSTableRequest: - // Limit the number of concurrent AddSSTable requests, since they're - // expensive and block all other writes to the same span. However, don't - // limit AddSSTable requests that are going to ingest as a WriteBatch. + limiter := s.limiters.ConcurrentAddSSTableRequests if t.IngestAsWrites { - return nil, nil + limiter = s.limiters.ConcurrentAddSSTableAsWritesRequests } - before := timeutil.Now() - res, err := s.limiters.ConcurrentAddSSTableRequests.Begin(ctx) + res, err := limiter.Begin(ctx) if err != nil { return nil, err }