From 776935342306d5f4cb997539d72298a7b56443eb Mon Sep 17 00:00:00 2001
From: Radu Berinde <radu@cockroachlabs.com>
Date: Mon, 29 Mar 2021 12:56:41 -0700
Subject: [PATCH] tenantrate: switch to a single token bucket

Tenant rate limiting currently uses four separate token buckets, each
with its own rate and burst limits. In this commit we switch to a
single shared token bucket (see #55114 for motivation).

We use a "cost model" to map read and write requests to "KV Compute
Units". Requests have a base cost plus a per-byte cost. The details
are documented in settings.go. The values were chosen based on
experiments ran by Nathan:
https://docs.google.com/spreadsheets/d/1PPlIcKnusOqWtBoOZVd9xBEMPe5Ss1FgrJlYFgpQZaM/edit#gid=735409177

The rate was chosen so that it maps to 20% of 1 KV vCPU. This keeps
the rate limit on small requests roughly the same as before
(especially on mixed workloads).

The largest departure from the previous limits is that we allow much
more read bytes (the per-byte cost of reads is small in the cost
model). If we were to keep close to the previous limits, the value of
kv.tenant_rate_limiter.read_cost_per_megabyte would be 200 instead of
10.  Perhaps we want to be more conservative here and make this
value somewhere in-between?

Fixes #55114.

Release note: None
---
 pkg/kv/kvserver/tenantrate/factory.go      |  14 +-
 pkg/kv/kvserver/tenantrate/helpers_test.go |  45 ++-
 pkg/kv/kvserver/tenantrate/limiter.go      | 306 +++++++++------------
 pkg/kv/kvserver/tenantrate/limiter_test.go |  24 +-
 pkg/kv/kvserver/tenantrate/settings.go     | 176 ++++++------
 pkg/kv/kvserver/tenantrate/testdata/basic  |  47 ++--
 pkg/kv/kvserver/tenantrate/testdata/burst  |  33 ++-
 pkg/kv/kvserver/tenantrate/testdata/cancel |  16 +-
 pkg/kv/kvserver/tenantrate/testdata/reads  |  33 +--
 pkg/kv/kvserver/tenantrate/testdata/update |  19 +-
 10 files changed, 347 insertions(+), 366 deletions(-)

diff --git a/pkg/kv/kvserver/tenantrate/factory.go b/pkg/kv/kvserver/tenantrate/factory.go
index cd075e64bfc6..890d88a5566f 100644
--- a/pkg/kv/kvserver/tenantrate/factory.go
+++ b/pkg/kv/kvserver/tenantrate/factory.go
@@ -32,7 +32,7 @@ type LimiterFactory struct {
 	systemLimiter systemLimiter
 	mu            struct {
 		syncutil.RWMutex
-		limits  LimitConfigs
+		limits  Config
 		tenants map[roachpb.TenantID]*refCountedLimiter
 	}
 }
@@ -53,12 +53,12 @@ func NewLimiterFactory(st *cluster.Settings, knobs *TestingKnobs) *LimiterFactor
 		rl.knobs = *knobs
 	}
 	rl.mu.tenants = make(map[roachpb.TenantID]*refCountedLimiter)
-	rl.mu.limits = LimitConfigsFromSettings(st)
+	rl.mu.limits = ConfigFromSettings(st)
 	rl.systemLimiter = systemLimiter{
 		tenantMetrics: rl.metrics.tenantMetrics(roachpb.SystemTenantID),
 	}
-	for _, setOnChange := range settingsSetOnChangeFuncs {
-		setOnChange(&st.SV, rl.updateLimits)
+	for _, setting := range configSettings {
+		setting.SetOnChange(&st.SV, rl.updateConfig)
 	}
 	return rl
 }
@@ -114,12 +114,12 @@ func (rl *LimiterFactory) Release(lim Limiter) {
 	}
 }
 
-func (rl *LimiterFactory) updateLimits() {
+func (rl *LimiterFactory) updateConfig() {
 	rl.mu.Lock()
 	defer rl.mu.Unlock()
-	rl.mu.limits = LimitConfigsFromSettings(rl.settings)
+	rl.mu.limits = ConfigFromSettings(rl.settings)
 	for _, rcLim := range rl.mu.tenants {
-		rcLim.lim.updateLimits(rl.mu.limits)
+		rcLim.lim.updateConfig(rl.mu.limits)
 	}
 }
 
diff --git a/pkg/kv/kvserver/tenantrate/helpers_test.go b/pkg/kv/kvserver/tenantrate/helpers_test.go
index 81f29ec0ff94..c42526917f53 100644
--- a/pkg/kv/kvserver/tenantrate/helpers_test.go
+++ b/pkg/kv/kvserver/tenantrate/helpers_test.go
@@ -10,17 +10,38 @@
 
 package tenantrate
 
-import "github.com/cockroachdb/cockroach/pkg/settings/cluster"
+import "github.com/cockroachdb/cockroach/pkg/settings"
 
-// OverrideSettingsWithRateLimits utilizes LimitConfigs from the values stored in the
-// settings.
-func OverrideSettingsWithRateLimits(settings *cluster.Settings, rl LimitConfigs) {
-	readRequestRateLimit.Override(&settings.SV, float64(rl.ReadRequests.Rate))
-	readRequestBurstLimit.Override(&settings.SV, rl.ReadRequests.Burst)
-	writeRequestRateLimit.Override(&settings.SV, float64(rl.WriteRequests.Rate))
-	writeRequestBurstLimit.Override(&settings.SV, rl.WriteRequests.Burst)
-	readRateLimit.Override(&settings.SV, int64(rl.ReadBytes.Rate))
-	readBurstLimit.Override(&settings.SV, rl.ReadBytes.Burst)
-	writeRateLimit.Override(&settings.SV, int64(rl.WriteBytes.Rate))
-	writeBurstLimit.Override(&settings.SV, rl.WriteBytes.Burst)
+// SettingValues is a struct that can be populated from test files, via YAML.
+type SettingValues struct {
+	Rate  float64
+	Burst float64
+
+	Read  Factors
+	Write Factors
+}
+
+// Factors for reads and writes.
+type Factors struct {
+	Base    float64
+	PerByte float64
+}
+
+// OverrideSettings sets the cluster setting according to the given
+// settingValues.
+//
+// Uninitialized (zero) values are ignored.
+func OverrideSettings(sv *settings.Values, vals SettingValues) {
+	override := func(setting *settings.FloatSetting, val float64) {
+		if val != 0 {
+			setting.Override(sv, val)
+		}
+	}
+	override(kvcuRateLimit, vals.Rate)
+	override(kvcuBurstLimitSeconds, vals.Burst/kvcuRateLimit.Get(sv))
+
+	override(readRequestCost, vals.Read.Base)
+	override(readCostPerMB, vals.Read.PerByte*1024*1024)
+	override(writeRequestCost, vals.Write.Base)
+	override(writeCostPerMB, vals.Write.PerByte*1024*1024)
 }
diff --git a/pkg/kv/kvserver/tenantrate/limiter.go b/pkg/kv/kvserver/tenantrate/limiter.go
index 5276840e0f22..f22f4254d3fd 100644
--- a/pkg/kv/kvserver/tenantrate/limiter.go
+++ b/pkg/kv/kvserver/tenantrate/limiter.go
@@ -47,13 +47,11 @@ import (
 // The Limiter is backed by a FIFO queue which provides fairness.
 type Limiter interface {
 
-	// Wait acquires n quota from the limiter. This acquisition cannot be
-	// released. Each call to wait will consume 1 read or write request
-	// depending on isWrite, 1 read byte, and writeBytes from the token buckets.
-	// Calls to Wait will block until the buckets contain adequate resources. If
-	// a request attempts to write more than the burst limit, it will wait until
-	// the bucket is completely full before acquiring the requested quantity and
-	// putting the limiter in debt.
+	// Wait acquires the quota necessary to admit a read or write request. This
+	// acquisition cannot be released.  Calls to Wait will block until the buckets
+	// contain adequate resources. If a request attempts to write more than the
+	// burst limit, it will wait until the bucket is completely full before
+	// acquiring the requested quantity and putting the limiter in debt.
 	//
 	// The only errors which should be returned are due to the context.
 	Wait(ctx context.Context, isWrite bool, writeBytes int64) error
@@ -76,7 +74,7 @@ type limiter struct {
 func (rl *limiter) init(
 	parent *LimiterFactory,
 	tenantID roachpb.TenantID,
-	conf LimitConfigs,
+	config Config,
 	metrics tenantMetrics,
 	options ...quotapool.Option,
 ) {
@@ -85,46 +83,41 @@ func (rl *limiter) init(
 		tenantID: tenantID,
 		metrics:  metrics,
 	}
-	buckets := tokenBuckets{
-		readRequests:  makeTokenBucket(conf.ReadRequests),
-		writeRequests: makeTokenBucket(conf.WriteRequests),
-		readBytes:     makeTokenBucket(conf.ReadBytes),
-		writeBytes:    makeTokenBucket(conf.WriteBytes),
-	}
-	options = append(options, quotapool.OnAcquisition(func(
-		ctx context.Context, poolName string, r quotapool.Request, start time.Time,
-	) {
-		req := r.(*waitRequest)
-		if req.readRequests > 0 {
-			rl.metrics.readRequestsAdmitted.Inc(req.readRequests)
-		}
-		if req.writeRequests > 0 {
-			rl.metrics.writeRequestsAdmitted.Inc(req.writeRequests)
-		}
-		// Accounted for in limiter.RecordRead.
-		// if req.readBytes > 0 {
-		// 	rl.metrics.readBytesAdmitted.Inc(req.readBytes)
-		// }
-		if req.writeBytes > 0 {
-			rl.metrics.writeBytesAdmitted.Inc(req.writeBytes)
-		}
-	}))
-	rl.qp = quotapool.New(tenantID.String(), &buckets, options...)
-	buckets.clock = rl.qp.TimeSource()
-	buckets.lastUpdated = buckets.clock.Now()
+	// Note: if multiple token buckets are needed, consult the history of
+	// this file as of 0e70529f84 for a sample implementation.
+	bucket := makeTokenBucket(config)
+	rl.qp = quotapool.New(tenantID.String(), &bucket, options...)
+	bucket.clock = rl.qp.TimeSource()
+	bucket.lastUpdated = bucket.clock.Now()
 }
 
+// Wait is part of the Limiter interface.
 func (rl *limiter) Wait(ctx context.Context, isWrite bool, writeBytes int64) error {
+	// TODO(radu): find a way to omit these atomic operations in the case when we
+	// don't have to wait.
 	rl.metrics.currentBlocked.Inc(1)
 	defer rl.metrics.currentBlocked.Dec(1)
+
 	r := newWaitRequest(isWrite, writeBytes)
 	defer putWaitRequest(r)
+
 	if err := rl.qp.Acquire(ctx, r); err != nil {
 		return err
 	}
+
+	if isWrite {
+		rl.metrics.writeRequestsAdmitted.Inc(1)
+		rl.metrics.writeBytesAdmitted.Inc(writeBytes)
+	} else {
+		// We don't know how much we will read; the bytes will be accounted for
+		// after the fact in RecordRead.
+		rl.metrics.readRequestsAdmitted.Inc(1)
+	}
+
 	return nil
 }
 
+// RecordRead is part of the Limiter interface.
 func (rl *limiter) RecordRead(ctx context.Context, readBytes int64) {
 	rb := newReadBytesResource(readBytes)
 	defer putReadBytesResource(rb)
@@ -132,130 +125,75 @@ func (rl *limiter) RecordRead(ctx context.Context, readBytes int64) {
 	rl.qp.Add(rb)
 }
 
-// updateLimits is used by the factory to inform the limiter of a new
+// updateConfig is used by the factory to inform the limiter of a new
 // configuration.
-func (rl *limiter) updateLimits(limits LimitConfigs) {
-	rl.qp.Add(limits)
-}
-
-// tokenBuckets is the implementation of Resource which remains in the quotapool
-// for a limiter.
-type tokenBuckets struct {
-	clock         timeutil.TimeSource
-	lastUpdated   time.Time
-	readRequests  tokenBucket
-	writeRequests tokenBucket
-	readBytes     tokenBucket
-	writeBytes    tokenBucket
-}
-
-var _ quotapool.Resource = (*tokenBuckets)(nil)
-
-func (rb *tokenBuckets) update() {
-	now := rb.clock.Now()
-
-	// Update token bucket capacity given the passage of clock.
-	// TODO(ajwerner): Consider instituting a minimum update frequency to avoid
-	// spinning too fast on timers for tons of tiny allocations at a fast rate.
-	if since := now.Sub(rb.lastUpdated); since > 0 {
-		rb.readRequests.update(since)
-		rb.writeRequests.update(since)
-		rb.readBytes.update(since)
-		rb.writeBytes.update(since)
-		rb.lastUpdated = now
-	}
+func (rl *limiter) updateConfig(config Config) {
+	rl.qp.Add(config)
 }
 
-// check determines whether a request can be fulfilled by the given tokens in
-// the bucket. If not, it determines when the buckets will be adequately full
-// to fulfill the request.
-func (rb *tokenBuckets) check(req *waitRequest) (fulfilled bool, tryAgainAfter time.Duration) {
-	fulfilled = true
-	check := func(t *tokenBucket, needed int64) {
-		if ok, after := t.check(needed); !ok {
-			fulfilled = false
-			if after > tryAgainAfter {
-				tryAgainAfter = after
-			}
-		}
-	}
-	check(&rb.readRequests, req.readRequests)
-	check(&rb.writeRequests, req.writeRequests)
-	check(&rb.readBytes, req.readBytes)
-	check(&rb.writeBytes, req.writeBytes)
-	return fulfilled, tryAgainAfter
-}
-
-func (rb *tokenBuckets) subtract(req *waitRequest) {
-	rb.readRequests.tokens -= float64(req.readRequests)
-	rb.writeRequests.tokens -= float64(req.writeRequests)
-	rb.readBytes.tokens -= float64(req.readBytes)
-	rb.writeBytes.tokens -= float64(req.writeBytes)
-}
-
-func (rb *tokenBuckets) Merge(val interface{}) (shouldNotify bool) {
-	switch toAdd := val.(type) {
-	case LimitConfigs:
-		// Account for the accumulation since lastUpdate and now under the old
-		// configuration.
-		rb.update()
-
-		rb.readRequests.setConf(toAdd.ReadRequests)
-		rb.writeRequests.setConf(toAdd.WriteRequests)
-		rb.readBytes.setConf(toAdd.ReadBytes)
-		rb.writeBytes.setConf(toAdd.WriteBytes)
-		return true
-	case *readBytesResource:
-		rb.readBytes.tokens -= float64(*toAdd)
-		// Do not notify the head of the queue. In the best case we did not disturb
-		// the time at which it can be fulfilled and in the worst case, we made it
-		// further in the future.
-		return false
-	default:
-		panic(errors.AssertionFailedf("merge not implemented for %T", val))
-	}
-}
-
-// tokenBucket represents a token bucket for a given resource and its associated
-// configuration.
+// tokenBucket represents the token bucket for KV Compute Units and its
+// associated configuration. It implements quotapool.Resource.
 type tokenBucket struct {
-	LimitConfig
+	config      Config
+	clock       timeutil.TimeSource
+	lastUpdated time.Time
+	// Current number of tokens, in KV Compute Units.
 	tokens float64
 }
 
-func makeTokenBucket(rl LimitConfig) tokenBucket {
+var _ quotapool.Resource = (*tokenBucket)(nil)
+
+func makeTokenBucket(config Config) tokenBucket {
 	return tokenBucket{
-		LimitConfig: rl,
-		tokens:      float64(rl.Burst),
+		config: config,
+		tokens: float64(config.Burst),
 	}
 }
 
-// update applies the positive time delta update for the resource.
-func (t *tokenBucket) update(deltaT time.Duration) {
-	t.tokens += float64(t.Rate) * deltaT.Seconds()
-	t.clampTokens()
+// update accounts for the passing of time.
+func (tb *tokenBucket) update() {
+	now := tb.clock.Now()
+
+	if since := now.Sub(tb.lastUpdated); since > 0 {
+		tb.tokens += float64(tb.config.Rate) * since.Seconds()
+		tb.clampTokens()
+		tb.lastUpdated = now
+	}
 }
 
-// checkQuota returns whether needed will be satisfied by quota. Note that the
-// definition of satisfied is either that the integer part of quota exceeds
-// needed or that quota is equal to the burst. This is because we want to
-// have request put the rate limiter in debt rather than prevent execution of
-// requests.
+// tryToFulfill calculates the number of KV Compute Units needed for the
+// request and tries to remove them from the bucket.
+//
+// If the request can be fulfilled, the current token amount is adjusted. Note
+// if the current amount is equal to Burst, then we allow any request to be
+// fulfilled. This is because we want to have request put the rate limiter
+// in debt rather than prevent execution of requests.
 //
 // If the request is not satisfied, the amount of clock that must be waited for
 // the request to be satisfied at the current rate is returned.
-func (t *tokenBucket) check(needed int64) (fulfilled bool, tryAgainAfter time.Duration) {
-	if q := int64(t.tokens); needed <= q || q == t.Burst {
+func (tb *tokenBucket) tryToFulfill(
+	req *waitRequest,
+) (fulfilled bool, tryAgainAfter time.Duration) {
+	var needed float64
+	if req.isWrite {
+		needed = tb.config.WriteRequestUnits + float64(req.writeBytes)*tb.config.WriteUnitsPerByte
+	} else {
+		// We don't know the size of the read upfront; we will adjust the bucket
+		// after the fact in RecordRead.
+		needed = tb.config.ReadRequestUnits
+	}
+	if q := tb.tokens; needed <= q || q == tb.config.Burst {
+		tb.tokens -= needed
 		return true, 0
 	}
 
 	// We'll calculate the amount of clock until the quota is full if we're
 	// requesting more than the burst limit.
-	if needed > t.Burst {
-		needed = t.Burst
+	if needed > tb.config.Burst {
+		needed = tb.config.Burst
 	}
-	delta := float64(needed) - t.tokens
-	tryAgainAfter = time.Duration((delta * float64(time.Second)) / float64(t.Rate))
+	delta := needed - tb.tokens
+	tryAgainAfter = time.Duration((delta * float64(time.Second)) / tb.config.Rate)
 	return false, tryAgainAfter
 }
 
@@ -266,26 +204,49 @@ func (t *tokenBucket) check(needed int64) (fulfilled bool, tryAgainAfter time.Du
 // It's not obvious that we want to add tokens when increasing the burst as
 // that might lead to a big spike in load immediately upon increasing this
 // limit.
-func (t *tokenBucket) setConf(rl LimitConfig) {
-	t.LimitConfig = rl
-	t.clampTokens()
+func (tb *tokenBucket) updateConfig(config Config) {
+	tb.config = config
+	tb.clampTokens()
 }
 
 // clampTokens ensures that tokens does not exceed burst.
-func (t *tokenBucket) clampTokens() {
-	if burst := float64(t.Burst); t.tokens > burst {
-		t.tokens = burst
+func (tb *tokenBucket) clampTokens() {
+	if tb.tokens > tb.config.Burst {
+		tb.tokens = tb.config.Burst
+	}
+}
+
+// Merge is part of quotapool.Resource.
+func (tb *tokenBucket) Merge(val interface{}) (shouldNotify bool) {
+	switch val := val.(type) {
+	case Config:
+		// Account for the accumulation since lastUpdate and now under the old
+		// configuration.
+		tb.update()
+
+		tb.updateConfig(val)
+		return true
+
+	case *readBytesResource:
+		tb.tokens -= float64(val.readBytes) * tb.config.ReadUnitsPerByte
+		// Do not notify the head of the queue. In the best case we did not disturb
+		// the time at which it can be fulfilled and in the worst case, we made it
+		// further in the future.
+		return false
+
+	default:
+		panic(errors.AssertionFailedf("merge not implemented for %T", val))
 	}
 }
 
 // waitRequest is used to wait for adequate resources in the tokenBuckets.
 type waitRequest struct {
-	readRequests  int64
-	writeRequests int64
-	writeBytes    int64
-	readBytes     int64
+	isWrite    bool
+	writeBytes int64
 }
 
+var _ quotapool.Request = (*waitRequest)(nil)
+
 var waitRequestSyncPool = sync.Pool{
 	New: func() interface{} { return new(waitRequest) },
 }
@@ -295,15 +256,8 @@ var waitRequestSyncPool = sync.Pool{
 func newWaitRequest(isWrite bool, writeBytes int64) *waitRequest {
 	r := waitRequestSyncPool.Get().(*waitRequest)
 	*r = waitRequest{
-		readRequests:  0,
-		writeRequests: 0,
-		readBytes:     1,
-		writeBytes:    writeBytes,
-	}
-	if isWrite {
-		r.writeRequests = 1
-	} else {
-		r.readRequests = 1
+		isWrite:    isWrite,
+		writeBytes: writeBytes,
 	}
 	return r
 }
@@ -313,7 +267,23 @@ func putWaitRequest(r *waitRequest) {
 	waitRequestSyncPool.Put(r)
 }
 
-type readBytesResource int64
+// Acquire is part of quotapool.Request.
+func (req *waitRequest) Acquire(
+	ctx context.Context, res quotapool.Resource,
+) (fulfilled bool, tryAgainAfter time.Duration) {
+	r := res.(*tokenBucket)
+	r.update()
+	return r.tryToFulfill(req)
+}
+
+// ShouldWait is part of quotapool.Request.
+func (req *waitRequest) ShouldWait() bool {
+	return true
+}
+
+type readBytesResource struct {
+	readBytes int64
+}
 
 var readBytesResourceSyncPool = sync.Pool{
 	New: func() interface{} { return new(readBytesResource) },
@@ -321,27 +291,13 @@ var readBytesResourceSyncPool = sync.Pool{
 
 func newReadBytesResource(readBytes int64) *readBytesResource {
 	rb := readBytesResourceSyncPool.Get().(*readBytesResource)
-	*rb = readBytesResource(readBytes)
+	*rb = readBytesResource{
+		readBytes: readBytes,
+	}
 	return rb
 }
 
 func putReadBytesResource(rb *readBytesResource) {
-	*rb = 0
+	*rb = readBytesResource{}
 	readBytesResourceSyncPool.Put(rb)
 }
-
-func (req *waitRequest) Acquire(
-	ctx context.Context, res quotapool.Resource,
-) (fulfilled bool, tryAgainAfter time.Duration) {
-	r := res.(*tokenBuckets)
-	r.update()
-	if fulfilled, tryAgainAfter = r.check(req); !fulfilled {
-		return false, tryAgainAfter
-	}
-	r.subtract(req)
-	return true, 0
-}
-
-func (req *waitRequest) ShouldWait() bool {
-	return true
-}
diff --git a/pkg/kv/kvserver/tenantrate/limiter_test.go b/pkg/kv/kvserver/tenantrate/limiter_test.go
index 1c94d70bb958..9a910e891ed8 100644
--- a/pkg/kv/kvserver/tenantrate/limiter_test.go
+++ b/pkg/kv/kvserver/tenantrate/limiter_test.go
@@ -48,9 +48,9 @@ func TestCloser(t *testing.T) {
 	limiter := factory.GetTenant(tenant, closer)
 	ctx := context.Background()
 	// First Wait call will not block.
-	require.NoError(t, limiter.Wait(ctx, false, 1))
+	require.NoError(t, limiter.Wait(ctx, true, 1))
 	errCh := make(chan error, 1)
-	go func() { errCh <- limiter.Wait(ctx, false, 1<<30) }()
+	go func() { errCh <- limiter.Wait(ctx, true, 1<<30) }()
 	testutils.SucceedsSoon(t, func() error {
 		if timers := timeSource.Timers(); len(timers) != 1 {
 			return errors.Errorf("expected 1 timer, found %d", len(timers))
@@ -142,9 +142,8 @@ func (ts *testState) init(t *testing.T, d *datadriven.TestData) string {
 	ts.tenants = make(map[roachpb.TenantID][]tenantrate.Limiter)
 	ts.clock = timeutil.NewManualTime(t0)
 	ts.settings = cluster.MakeTestingClusterSettings()
-	limits := tenantrate.LimitConfigsFromSettings(ts.settings)
-	parseLimits(t, d, &limits)
-	tenantrate.OverrideSettingsWithRateLimits(ts.settings, limits)
+	settings := parseSettings(t, d)
+	tenantrate.OverrideSettings(&ts.settings.SV, settings)
 	ts.rl = tenantrate.NewLimiterFactory(ts.settings, &tenantrate.TestingKnobs{
 		TimeSource: ts.clock,
 	})
@@ -157,9 +156,8 @@ func (ts *testState) init(t *testing.T, d *datadriven.TestData) string {
 // yaml object representing the limits and updates accordingly. It returns
 // the current time. See init for more details as the semantics are the same.
 func (ts *testState) updateSettings(t *testing.T, d *datadriven.TestData) string {
-	limits := tenantrate.LimitConfigsFromSettings(ts.settings)
-	parseLimits(t, d, &limits)
-	tenantrate.OverrideSettingsWithRateLimits(ts.settings, limits)
+	settings := parseSettings(t, d)
+	tenantrate.OverrideSettings(&ts.settings.SV, settings)
 	return ts.formatTime()
 }
 
@@ -363,11 +361,11 @@ func (ts *testState) metrics(t *testing.T, d *datadriven.TestData) string {
 	if err := testutils.SucceedsSoonError(func() error {
 		got := ts.getMetricsText(t, d)
 		if got != exp {
-			return errors.Errorf("got: %q, exp: %q", got, exp)
+			return errors.Errorf("got:\n%s\nexp:\n%s\n", got, exp)
 		}
 		return nil
 	}); err != nil {
-		d.Fatalf(t, "failed to find expected timers: %v", err)
+		d.Fatalf(t, "failed to find expected metrics: %v", err)
 	}
 	return d.Expected
 }
@@ -516,10 +514,12 @@ func parseTenantIDs(t *testing.T, d *datadriven.TestData) []uint64 {
 	return tenantIDs
 }
 
-func parseLimits(t *testing.T, d *datadriven.TestData, limits *tenantrate.LimitConfigs) {
-	if err := yaml.UnmarshalStrict([]byte(d.Input), &limits); err != nil {
+func parseSettings(t *testing.T, d *datadriven.TestData) tenantrate.SettingValues {
+	var vals tenantrate.SettingValues
+	if err := yaml.UnmarshalStrict([]byte(d.Input), &vals); err != nil {
 		d.Fatalf(t, "failed to unmarshal limits: %v", err)
 	}
+	return vals
 }
 
 func parseStrings(t *testing.T, d *datadriven.TestData) []string {
diff --git a/pkg/kv/kvserver/tenantrate/settings.go b/pkg/kv/kvserver/tenantrate/settings.go
index 938b4c785f50..02a03644341c 100644
--- a/pkg/kv/kvserver/tenantrate/settings.go
+++ b/pkg/kv/kvserver/tenantrate/settings.go
@@ -15,107 +15,111 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
 )
 
-// Limit defines a rate in units per second.
-type Limit float64
-
-// LimitConfig configures the rate limit and burst limit for a given resource.
-type LimitConfig struct {
-	Rate  Limit
-	Burst int64
-}
-
-// LimitConfigs configures the rate limits.
-// It is exported for convenience and testing.
-// The values are derived from cluster settings.
-type LimitConfigs struct {
-	ReadRequests  LimitConfig
-	WriteRequests LimitConfig
-	ReadBytes     LimitConfig
-	WriteBytes    LimitConfig
-}
+// Config contains the configuration of the rate limiter.
+//
+// We limit the rate in terms of "KV Compute Units". The configuration contains
+// the rate and burst limits for KVCUs, as well as factors that define a "cost
+// mode" for calculating the number of KVCUs for a read or write request.
+//
+// Specifically, the cost model is a linear function combining a fixed
+// pre-request cost and a size-dependent (per-byte) cost.
+//
+// For a read:
+//   KVCUs = ReadRequestUnits + <size of read> * ReadUnitsPerByte
+// For a write:
+//   KVCUs = WriteRequestUnits + <size of write) * WriteUnitsPerByte
+//
+type Config struct {
+	// Rate defines the "sustained" rate limit in KV Compute Units per second.
+	Rate float64
+	// Burst defines the "burst" limit in KV Compute Units. Unused units
+	// accumulate up to this limit.
+	Burst float64
 
-// LimitConfigsFromSettings constructs LimitConfigs from the values stored in
-// the settings.
-func LimitConfigsFromSettings(settings *cluster.Settings) LimitConfigs {
-	return LimitConfigs{
-		ReadRequests: LimitConfig{
-			Rate:  Limit(readRequestRateLimit.Get(&settings.SV)),
-			Burst: readRequestBurstLimit.Get(&settings.SV),
-		},
-		WriteRequests: LimitConfig{
-			Rate:  Limit(writeRequestRateLimit.Get(&settings.SV)),
-			Burst: writeRequestBurstLimit.Get(&settings.SV),
-		},
-		ReadBytes: LimitConfig{
-			Rate:  Limit(readRateLimit.Get(&settings.SV)),
-			Burst: readBurstLimit.Get(&settings.SV),
-		},
-		WriteBytes: LimitConfig{
-			Rate:  Limit(writeRateLimit.Get(&settings.SV)),
-			Burst: writeBurstLimit.Get(&settings.SV),
-		},
-	}
+	// ReadRequestUnits is the baseline cost of a read, in KV Compute Units.
+	ReadRequestUnits float64
+	// ReadRequestUnits is the size-dependent cost of a read, in KV Compute Units
+	// per byte.
+	ReadUnitsPerByte float64
+	// WriteRequestUnits is the baseline cost of a write, in KV Compute Units.
+	WriteRequestUnits float64
+	// WriteRequestUnits is the size-dependent cost of a write, in KV Compute
+	// Units per byte.
+	WriteUnitsPerByte float64
 }
 
+// Settings for the rate limiter. These determine the values for a Config,
+// though not directly (the settings have user-friendlier units).
+//
+// The settings are designed so that there is one important "knob" to turn:
+// kv.tenant_rate_limiter.rate_limit.
+//
+// The rest of the settings are meant to be changed rarely. Note that the burst
+// limit setting is defined as a multiplier of the rate (i.e. in seconds), so
+// it doesn't need to be adjusted in concert with the rate.
 var (
-	readRequestRateLimit = settings.RegisterFloatSetting(
-		"kv.tenant_rate_limiter.read_requests.rate_limit",
-		"per-tenant read request rate limit in requests per second",
-		128,
+	kvcuRateLimit = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.rate_limit",
+		"per-tenant rate limit in KV Compute Units per second",
+		200,
 		settings.PositiveFloat,
 	)
 
-	readRequestBurstLimit = settings.RegisterIntSetting(
-		"kv.tenant_rate_limiter.read_requests.burst_limit",
-		"per-tenant read request burst limit in requests",
-		512,
-		settings.PositiveInt,
+	kvcuBurstLimitSeconds = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.burst_limit_seconds",
+		"per-tenant burst limit as a multiplier of the rate",
+		10,
+		settings.PositiveFloat,
 	)
 
-	writeRequestRateLimit = settings.RegisterFloatSetting(
-		"kv.tenant_rate_limiter.write_requests.rate_limit",
-		"per-tenant write request rate limit in requests per second",
-		128,
+	readRequestCost = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.read_request_cost",
+		"base cost of a read request in KV Compute Units",
+		0.7,
 		settings.PositiveFloat,
 	)
 
-	writeRequestBurstLimit = settings.RegisterIntSetting(
-		"kv.tenant_rate_limiter.write_requests.burst_limit",
-		"per-tenant write request burst limit in requests",
-		512,
-		settings.PositiveInt,
+	readCostPerMB = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.read_cost_per_megabyte",
+		"cost of a read in KV Compute Units per MB",
+		10.0,
+		settings.PositiveFloat,
 	)
 
-	readRateLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.read_bytes.rate_limit",
-		"per-tenant read rate limit in bytes per second",
-		1<<20 /* 1 MiB */)
-
-	readBurstLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.read_bytes.burst_limit",
-		"per-tenant read burst limit in bytes",
-		16<<20 /* 16 MiB */)
-
-	writeRateLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.write_bytes.rate_limit",
-		"per-tenant write rate limit in bytes per second",
-		512<<10 /* 512 KiB */)
+	writeRequestCost = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.write_request_cost",
+		"base cost of a write request in KV Compute Units",
+		1.0,
+		settings.PositiveFloat,
+	)
 
-	writeBurstLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.write_bytes.burst_limit",
-		"per-tenant write burst limit in bytes",
-		8<<20 /* 8 MiB */)
+	writeCostPerMB = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.write_cost_per_megabyte",
+		"cost of a write in KV Compute Units per MB",
+		400.0,
+		settings.PositiveFloat,
+	)
 
-	// settingsSetOnChangeFuncs are the functions used to register the factory to
-	// be notified of changes to any of the settings which configure it.
-	settingsSetOnChangeFuncs = [...]func(*settings.Values, func()){
-		readRequestRateLimit.SetOnChange,
-		readRequestBurstLimit.SetOnChange,
-		writeRequestRateLimit.SetOnChange,
-		writeRequestBurstLimit.SetOnChange,
-		readRateLimit.SetOnChange,
-		readBurstLimit.SetOnChange,
-		writeRateLimit.SetOnChange,
-		writeBurstLimit.SetOnChange,
+	// List of config settings, used to set up "on change" notifiers.
+	configSettings = [...]settings.WritableSetting{
+		kvcuRateLimit,
+		kvcuBurstLimitSeconds,
+		readRequestCost,
+		readCostPerMB,
+		writeRequestCost,
+		writeCostPerMB,
 	}
 )
+
+// ConfigFromSettings constructs a Config using the cluster setting values.
+func ConfigFromSettings(st *cluster.Settings) Config {
+	const perMBToPerByte = float64(1) / (1024 * 1024)
+	var c Config
+	c.Rate = kvcuRateLimit.Get(&st.SV)
+	c.Burst = c.Rate * kvcuBurstLimitSeconds.Get(&st.SV)
+	c.ReadRequestUnits = readRequestCost.Get(&st.SV)
+	c.ReadUnitsPerByte = readCostPerMB.Get(&st.SV) * perMBToPerByte
+	c.WriteRequestUnits = writeRequestCost.Get(&st.SV)
+	c.WriteUnitsPerByte = writeCostPerMB.Get(&st.SV) * perMBToPerByte
+	return c
+}
diff --git a/pkg/kv/kvserver/tenantrate/testdata/basic b/pkg/kv/kvserver/tenantrate/testdata/basic
index 77a8db5de8d8..6d891611f210 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/basic
+++ b/pkg/kv/kvserver/tenantrate/testdata/basic
@@ -1,8 +1,8 @@
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 1024, burst: 2048 }
+rate:  1
+burst: 2
+read:  { base: 1, perbyte: 1 }
+write: { base: 1, perbyte: 1 }
 ----
 00:00:00.000
 
@@ -13,35 +13,33 @@ get_tenants
 ----
 [2#2, 3#1, 5#3, system#1]
 
-# Launch four requests on behalf of tenant 2, one on behalf of 3, and one on
+# Launch two requests on behalf of tenant 2, one on behalf of 3, and one on
 # behalf of the system tenant.
 
 launch
 - { id: g0, tenant: 1 }
 - { id: g1, tenant: 2 }
-- { id: g2, tenant: 2 }
-- { id: g3, tenant: 2, iswrite: true }
-- { id: g4, tenant: 2, iswrite: true }
-- { id: g5, tenant: 3 }
+- { id: g2, tenant: 2, iswrite: true }
+- { id: g3, tenant: 3 }
 ----
-[g0@system, g1@2, g2@2, g3@2, g4@2, g5@3]
+[g0@system, g1@2, g2@2, g3@3]
 
 # Ensure that none of the above requests get blocked because they use less
 # than the configured burst for their respective limiters.
 
 await
-[g0, g1, g2, g3, g4, g5]
+[g0, g1, g2, g3]
 ----
 []
 
-# Launch another read and another write request on behalf of tenant 2, it will
-# block due to the request rate limit.
+# Launch another read and another write request on behalf of tenant 2; they
+# will block because the burst limit only supports two requests.
 
 launch
-- { id: g6, tenant: 2 }
-- { id: g7, tenant: 2, iswrite: true }
+- { id: g4, tenant: 2 }
+- { id: g5, tenant: 2, iswrite: true }
 ----
-[g6@2, g7@2]
+[g4@2, g5@2]
 
 # Ensure that it the above request was blocked by observing the timer it creates
 # to wait for available quota.
@@ -69,13 +67,13 @@ kv_tenant_rate_limit_current_blocked{tenant_id="system"} 0
 metrics
 kv_tenant_rate_limit_.*_requests_admitted
 ----
-kv_tenant_rate_limit_read_requests_admitted 4
-kv_tenant_rate_limit_read_requests_admitted{tenant_id="2"} 2
+kv_tenant_rate_limit_read_requests_admitted 3
+kv_tenant_rate_limit_read_requests_admitted{tenant_id="2"} 1
 kv_tenant_rate_limit_read_requests_admitted{tenant_id="3"} 1
 kv_tenant_rate_limit_read_requests_admitted{tenant_id="5"} 0
 kv_tenant_rate_limit_read_requests_admitted{tenant_id="system"} 1
-kv_tenant_rate_limit_write_requests_admitted 2
-kv_tenant_rate_limit_write_requests_admitted{tenant_id="2"} 2
+kv_tenant_rate_limit_write_requests_admitted 1
+kv_tenant_rate_limit_write_requests_admitted{tenant_id="2"} 1
 kv_tenant_rate_limit_write_requests_admitted{tenant_id="3"} 0
 kv_tenant_rate_limit_write_requests_admitted{tenant_id="5"} 0
 kv_tenant_rate_limit_write_requests_admitted{tenant_id="system"} 0
@@ -94,16 +92,17 @@ metrics
 ----
 
 
-# Advance time to the timer deadline.
+# Advance time to the point where there should be enough units for both
+# requests to go through.
 
 advance
-1s1ms
+2s1ms
 ----
-00:00:01.001
+00:00:02.001
 
 # Observe that the blocked requests are now unblocked.
 
 await
-[g6, g7]
+[g4,g5]
 ----
 []
diff --git a/pkg/kv/kvserver/tenantrate/testdata/burst b/pkg/kv/kvserver/tenantrate/testdata/burst
index 9ed29bd12113..96a96bc71f9f 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/burst
+++ b/pkg/kv/kvserver/tenantrate/testdata/burst
@@ -2,10 +2,10 @@
 # into debt.
 
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 10, burst: 20 }
+rate:  1
+burst: 2
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -16,11 +16,11 @@ get_tenants
 ----
 [2#1]
 
-# Launch a request for tenant 2 that consumes more write bytes than the burst
+# Launch a write request for tenant 2 that needs 3 units, more than the burst
 # limit. This will not block but will put the limiter into debt.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 30 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 20 }
 ----
 [g1@2]
 
@@ -29,14 +29,13 @@ await
 ----
 []
 
-# Launch another request which will block until there is sufficient write
-# quota available. This will be 2s because we're in debt 10 and the rate is
-# 10/s.
+# Launch another request which will block until there is 1 unit available.
+# This will be 2s because we're in debt 1 and the rate is 1/s.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 10 }
+- { id: g2, tenant: 2, iswrite: true, writebytes: 0 }
 ----
-[g1@2]
+[g2@2]
 
 # Observe that the request indeed sees two seconds of waiting.
 
@@ -59,18 +58,18 @@ advance
 # Ensure that the request is indeed unblocked.
 
 await
-- g1
+- g2
 ----
 []
 
 # Test that when consuming more than burst that we wait for the token bucket to
-# be full. At time 4s the token bucket will be full. When requesting 30, which
-# is above the burst of 20, we'll need to wait for the bucket to be full.
+# be full. At time 4s the token bucket will be full. When requesting 4, which
+# is above the burst of 2, we'll need to wait for the bucket to be full.
 
 launch
-- { id: g1,  tenant: 2, iswrite: true, writebytes: 30 }
+- { id: g3,  tenant: 2, iswrite: true, writebytes: 30 }
 ----
-[g1@2]
+[g3@2]
 
 # Verify that the timer exists to avoid races setting the timer and advancing
 # time.
@@ -85,6 +84,6 @@ advance
 00:00:04.000
 
 await
-- g1
+- g3
 ----
 []
diff --git a/pkg/kv/kvserver/tenantrate/testdata/cancel b/pkg/kv/kvserver/tenantrate/testdata/cancel
index 77ce14d13d5f..6d04abed90b3 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/cancel
+++ b/pkg/kv/kvserver/tenantrate/testdata/cancel
@@ -1,10 +1,10 @@
 # This tests cancellation and unblocking subsequent requests.
 
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 1024, burst: 2048 }
+rate:  2
+burst: 4
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -13,10 +13,10 @@ get_tenants
 ----
 [2#1]
 
-# Launch a request to consume half of the 1024 capacity.
+# Launch a request to consume one unit.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 1024 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 0 }
 ----
 [g1@2]
 
@@ -28,7 +28,7 @@ await
 # Launch a request requiring more quota than exists.
 
 launch
-- { id: g2, tenant: 2, iswrite: true, writebytes: 1536 }
+- { id: g2, tenant: 2, iswrite: true, writebytes: 100 }
 ----
 [g2@2]
 
@@ -41,7 +41,7 @@ timers
 # Launch another request which could be fulfilled by the existing quota.
 
 launch
-- { id: g3, tenant: 2, iswrite: true, writebytes: 1024 }
+- { id: g3, tenant: 2, iswrite: true, writebytes: 0 }
 ----
 [g2@2, g3@2]
 
diff --git a/pkg/kv/kvserver/tenantrate/testdata/reads b/pkg/kv/kvserver/tenantrate/testdata/reads
index 9f99d85c5c17..55729d819d8d 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/reads
+++ b/pkg/kv/kvserver/tenantrate/testdata/reads
@@ -2,8 +2,10 @@
 # into debt
 
 init
-readrequests: { rate: 1, burst: 2 }
-readbytes:    { rate: 10, burst: 100 }
+rate:  2
+burst: 4
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -13,12 +15,12 @@ get_tenants
 ----
 [2#1, system#1]
 
-# Read the entire burst worth of bytes plus 4 which should put the limiter
-# in debt by 4. Also record a system read. We'll verify both show up in metrics.
+# Read the entire burst worth of bytes plus 0.4 which should put the limiter
+# in debt by 0.4. Also record a system read. We'll verify both show up in metrics.
 
 record_read
+- { tenant: 2, readbytes: 34 }
 - { tenant: 1, readbytes: 10 }
-- { tenant: 2, readbytes: 104 }
 ----
 []
 
@@ -27,12 +29,11 @@ record_read
 metrics
 kv_tenant_rate_limit_read_bytes_admitted
 ----
-kv_tenant_rate_limit_read_bytes_admitted 114
-kv_tenant_rate_limit_read_bytes_admitted{tenant_id="2"} 104
+kv_tenant_rate_limit_read_bytes_admitted 44
+kv_tenant_rate_limit_read_bytes_admitted{tenant_id="2"} 34
 kv_tenant_rate_limit_read_bytes_admitted{tenant_id="system"} 10
 
-# Launch a request which will block on the lack of available readbytes as it
-# tries to read its 1 courtesy byte.
+# Launch a request which will block because it needs 1 unit.
 
 launch
 - { id: g1, tenant: 2 }
@@ -41,12 +42,12 @@ launch
 
 timers
 ----
-00:00:00.500
+00:00:00.200
 
-# Record more reads, putting the limiter further into debt
+# Record more reads, putting the limiter further into debt.
 
 record_read
-- { tenant: 2, readbytes: 5 }
+- { tenant: 2, readbytes: 16 }
 ----
 [g1@2]
 
@@ -56,22 +57,22 @@ record_read
 
 timers
 ----
-00:00:00.500
+00:00:00.200
 
 # Note that the head of the queue notices the removal of readbytes and sets a
 # new timer.
 
 advance
-501ms
+201ms
 ----
-00:00:00.501
+00:00:00.201
 
 timers
 ----
 00:00:01.000
 
 advance
-500ms
+800ms
 ----
 00:00:01.001
 
diff --git a/pkg/kv/kvserver/tenantrate/testdata/update b/pkg/kv/kvserver/tenantrate/testdata/update
index fefc8e88364e..415d8cc8a0b1 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/update
+++ b/pkg/kv/kvserver/tenantrate/testdata/update
@@ -1,10 +1,10 @@
 # Test updating the configuration of the rate limiter.
 
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 10, burst: 20 }
+rate:  2
+burst: 4
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -13,10 +13,10 @@ get_tenants
 ----
 [2#1]
 
-# Launch a request that puts the limiter in debt by 10.
+# Launch a request that puts the limiter in debt by 2.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 30 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 50 }
 ----
 [g1@2]
 
@@ -25,11 +25,11 @@ await
 ----
 []
 
-# Launch a request that will require 20, it will need to block for 3s to deal
+# Launch a request that will require 4, it will need to block for 3s to deal
 # with the current debt.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 20 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 30 }
 ----
 [g1@2]
 
@@ -50,7 +50,8 @@ advance
 # Update the settings to double the writebytes rate.
 
 update_settings
-writebytes: { rate: 20, burst: 10 }
+rate:  4
+burst: 2
 ----
 00:00:01.000