diff --git a/pkg/kv/kvserver/tenantrate/factory.go b/pkg/kv/kvserver/tenantrate/factory.go
index cd075e64bfc6..890d88a5566f 100644
--- a/pkg/kv/kvserver/tenantrate/factory.go
+++ b/pkg/kv/kvserver/tenantrate/factory.go
@@ -32,7 +32,7 @@ type LimiterFactory struct {
 	systemLimiter systemLimiter
 	mu            struct {
 		syncutil.RWMutex
-		limits  LimitConfigs
+		limits  Config
 		tenants map[roachpb.TenantID]*refCountedLimiter
 	}
 }
@@ -53,12 +53,12 @@ func NewLimiterFactory(st *cluster.Settings, knobs *TestingKnobs) *LimiterFactor
 		rl.knobs = *knobs
 	}
 	rl.mu.tenants = make(map[roachpb.TenantID]*refCountedLimiter)
-	rl.mu.limits = LimitConfigsFromSettings(st)
+	rl.mu.limits = ConfigFromSettings(st)
 	rl.systemLimiter = systemLimiter{
 		tenantMetrics: rl.metrics.tenantMetrics(roachpb.SystemTenantID),
 	}
-	for _, setOnChange := range settingsSetOnChangeFuncs {
-		setOnChange(&st.SV, rl.updateLimits)
+	for _, setting := range configSettings {
+		setting.SetOnChange(&st.SV, rl.updateConfig)
 	}
 	return rl
 }
@@ -114,12 +114,12 @@ func (rl *LimiterFactory) Release(lim Limiter) {
 	}
 }
 
-func (rl *LimiterFactory) updateLimits() {
+func (rl *LimiterFactory) updateConfig() {
 	rl.mu.Lock()
 	defer rl.mu.Unlock()
-	rl.mu.limits = LimitConfigsFromSettings(rl.settings)
+	rl.mu.limits = ConfigFromSettings(rl.settings)
 	for _, rcLim := range rl.mu.tenants {
-		rcLim.lim.updateLimits(rl.mu.limits)
+		rcLim.lim.updateConfig(rl.mu.limits)
 	}
 }
 
diff --git a/pkg/kv/kvserver/tenantrate/helpers_test.go b/pkg/kv/kvserver/tenantrate/helpers_test.go
index 81f29ec0ff94..c42526917f53 100644
--- a/pkg/kv/kvserver/tenantrate/helpers_test.go
+++ b/pkg/kv/kvserver/tenantrate/helpers_test.go
@@ -10,17 +10,38 @@
 
 package tenantrate
 
-import "github.com/cockroachdb/cockroach/pkg/settings/cluster"
+import "github.com/cockroachdb/cockroach/pkg/settings"
 
-// OverrideSettingsWithRateLimits utilizes LimitConfigs from the values stored in the
-// settings.
-func OverrideSettingsWithRateLimits(settings *cluster.Settings, rl LimitConfigs) {
-	readRequestRateLimit.Override(&settings.SV, float64(rl.ReadRequests.Rate))
-	readRequestBurstLimit.Override(&settings.SV, rl.ReadRequests.Burst)
-	writeRequestRateLimit.Override(&settings.SV, float64(rl.WriteRequests.Rate))
-	writeRequestBurstLimit.Override(&settings.SV, rl.WriteRequests.Burst)
-	readRateLimit.Override(&settings.SV, int64(rl.ReadBytes.Rate))
-	readBurstLimit.Override(&settings.SV, rl.ReadBytes.Burst)
-	writeRateLimit.Override(&settings.SV, int64(rl.WriteBytes.Rate))
-	writeBurstLimit.Override(&settings.SV, rl.WriteBytes.Burst)
+// SettingValues is a struct that can be populated from test files, via YAML.
+type SettingValues struct {
+	Rate  float64
+	Burst float64
+
+	Read  Factors
+	Write Factors
+}
+
+// Factors for reads and writes.
+type Factors struct {
+	Base    float64
+	PerByte float64
+}
+
+// OverrideSettings sets the cluster setting according to the given
+// settingValues.
+//
+// Uninitialized (zero) values are ignored.
+func OverrideSettings(sv *settings.Values, vals SettingValues) {
+	override := func(setting *settings.FloatSetting, val float64) {
+		if val != 0 {
+			setting.Override(sv, val)
+		}
+	}
+	override(kvcuRateLimit, vals.Rate)
+	override(kvcuBurstLimitSeconds, vals.Burst/kvcuRateLimit.Get(sv))
+
+	override(readRequestCost, vals.Read.Base)
+	override(readCostPerMB, vals.Read.PerByte*1024*1024)
+	override(writeRequestCost, vals.Write.Base)
+	override(writeCostPerMB, vals.Write.PerByte*1024*1024)
 }
diff --git a/pkg/kv/kvserver/tenantrate/limiter.go b/pkg/kv/kvserver/tenantrate/limiter.go
index 5276840e0f22..f22f4254d3fd 100644
--- a/pkg/kv/kvserver/tenantrate/limiter.go
+++ b/pkg/kv/kvserver/tenantrate/limiter.go
@@ -47,13 +47,11 @@ import (
 // The Limiter is backed by a FIFO queue which provides fairness.
 type Limiter interface {
 
-	// Wait acquires n quota from the limiter. This acquisition cannot be
-	// released. Each call to wait will consume 1 read or write request
-	// depending on isWrite, 1 read byte, and writeBytes from the token buckets.
-	// Calls to Wait will block until the buckets contain adequate resources. If
-	// a request attempts to write more than the burst limit, it will wait until
-	// the bucket is completely full before acquiring the requested quantity and
-	// putting the limiter in debt.
+	// Wait acquires the quota necessary to admit a read or write request. This
+	// acquisition cannot be released.  Calls to Wait will block until the buckets
+	// contain adequate resources. If a request attempts to write more than the
+	// burst limit, it will wait until the bucket is completely full before
+	// acquiring the requested quantity and putting the limiter in debt.
 	//
 	// The only errors which should be returned are due to the context.
 	Wait(ctx context.Context, isWrite bool, writeBytes int64) error
@@ -76,7 +74,7 @@ type limiter struct {
 func (rl *limiter) init(
 	parent *LimiterFactory,
 	tenantID roachpb.TenantID,
-	conf LimitConfigs,
+	config Config,
 	metrics tenantMetrics,
 	options ...quotapool.Option,
 ) {
@@ -85,46 +83,41 @@ func (rl *limiter) init(
 		tenantID: tenantID,
 		metrics:  metrics,
 	}
-	buckets := tokenBuckets{
-		readRequests:  makeTokenBucket(conf.ReadRequests),
-		writeRequests: makeTokenBucket(conf.WriteRequests),
-		readBytes:     makeTokenBucket(conf.ReadBytes),
-		writeBytes:    makeTokenBucket(conf.WriteBytes),
-	}
-	options = append(options, quotapool.OnAcquisition(func(
-		ctx context.Context, poolName string, r quotapool.Request, start time.Time,
-	) {
-		req := r.(*waitRequest)
-		if req.readRequests > 0 {
-			rl.metrics.readRequestsAdmitted.Inc(req.readRequests)
-		}
-		if req.writeRequests > 0 {
-			rl.metrics.writeRequestsAdmitted.Inc(req.writeRequests)
-		}
-		// Accounted for in limiter.RecordRead.
-		// if req.readBytes > 0 {
-		// 	rl.metrics.readBytesAdmitted.Inc(req.readBytes)
-		// }
-		if req.writeBytes > 0 {
-			rl.metrics.writeBytesAdmitted.Inc(req.writeBytes)
-		}
-	}))
-	rl.qp = quotapool.New(tenantID.String(), &buckets, options...)
-	buckets.clock = rl.qp.TimeSource()
-	buckets.lastUpdated = buckets.clock.Now()
+	// Note: if multiple token buckets are needed, consult the history of
+	// this file as of 0e70529f84 for a sample implementation.
+	bucket := makeTokenBucket(config)
+	rl.qp = quotapool.New(tenantID.String(), &bucket, options...)
+	bucket.clock = rl.qp.TimeSource()
+	bucket.lastUpdated = bucket.clock.Now()
 }
 
+// Wait is part of the Limiter interface.
 func (rl *limiter) Wait(ctx context.Context, isWrite bool, writeBytes int64) error {
+	// TODO(radu): find a way to omit these atomic operations in the case when we
+	// don't have to wait.
 	rl.metrics.currentBlocked.Inc(1)
 	defer rl.metrics.currentBlocked.Dec(1)
+
 	r := newWaitRequest(isWrite, writeBytes)
 	defer putWaitRequest(r)
+
 	if err := rl.qp.Acquire(ctx, r); err != nil {
 		return err
 	}
+
+	if isWrite {
+		rl.metrics.writeRequestsAdmitted.Inc(1)
+		rl.metrics.writeBytesAdmitted.Inc(writeBytes)
+	} else {
+		// We don't know how much we will read; the bytes will be accounted for
+		// after the fact in RecordRead.
+		rl.metrics.readRequestsAdmitted.Inc(1)
+	}
+
 	return nil
 }
 
+// RecordRead is part of the Limiter interface.
 func (rl *limiter) RecordRead(ctx context.Context, readBytes int64) {
 	rb := newReadBytesResource(readBytes)
 	defer putReadBytesResource(rb)
@@ -132,130 +125,75 @@ func (rl *limiter) RecordRead(ctx context.Context, readBytes int64) {
 	rl.qp.Add(rb)
 }
 
-// updateLimits is used by the factory to inform the limiter of a new
+// updateConfig is used by the factory to inform the limiter of a new
 // configuration.
-func (rl *limiter) updateLimits(limits LimitConfigs) {
-	rl.qp.Add(limits)
-}
-
-// tokenBuckets is the implementation of Resource which remains in the quotapool
-// for a limiter.
-type tokenBuckets struct {
-	clock         timeutil.TimeSource
-	lastUpdated   time.Time
-	readRequests  tokenBucket
-	writeRequests tokenBucket
-	readBytes     tokenBucket
-	writeBytes    tokenBucket
-}
-
-var _ quotapool.Resource = (*tokenBuckets)(nil)
-
-func (rb *tokenBuckets) update() {
-	now := rb.clock.Now()
-
-	// Update token bucket capacity given the passage of clock.
-	// TODO(ajwerner): Consider instituting a minimum update frequency to avoid
-	// spinning too fast on timers for tons of tiny allocations at a fast rate.
-	if since := now.Sub(rb.lastUpdated); since > 0 {
-		rb.readRequests.update(since)
-		rb.writeRequests.update(since)
-		rb.readBytes.update(since)
-		rb.writeBytes.update(since)
-		rb.lastUpdated = now
-	}
+func (rl *limiter) updateConfig(config Config) {
+	rl.qp.Add(config)
 }
 
-// check determines whether a request can be fulfilled by the given tokens in
-// the bucket. If not, it determines when the buckets will be adequately full
-// to fulfill the request.
-func (rb *tokenBuckets) check(req *waitRequest) (fulfilled bool, tryAgainAfter time.Duration) {
-	fulfilled = true
-	check := func(t *tokenBucket, needed int64) {
-		if ok, after := t.check(needed); !ok {
-			fulfilled = false
-			if after > tryAgainAfter {
-				tryAgainAfter = after
-			}
-		}
-	}
-	check(&rb.readRequests, req.readRequests)
-	check(&rb.writeRequests, req.writeRequests)
-	check(&rb.readBytes, req.readBytes)
-	check(&rb.writeBytes, req.writeBytes)
-	return fulfilled, tryAgainAfter
-}
-
-func (rb *tokenBuckets) subtract(req *waitRequest) {
-	rb.readRequests.tokens -= float64(req.readRequests)
-	rb.writeRequests.tokens -= float64(req.writeRequests)
-	rb.readBytes.tokens -= float64(req.readBytes)
-	rb.writeBytes.tokens -= float64(req.writeBytes)
-}
-
-func (rb *tokenBuckets) Merge(val interface{}) (shouldNotify bool) {
-	switch toAdd := val.(type) {
-	case LimitConfigs:
-		// Account for the accumulation since lastUpdate and now under the old
-		// configuration.
-		rb.update()
-
-		rb.readRequests.setConf(toAdd.ReadRequests)
-		rb.writeRequests.setConf(toAdd.WriteRequests)
-		rb.readBytes.setConf(toAdd.ReadBytes)
-		rb.writeBytes.setConf(toAdd.WriteBytes)
-		return true
-	case *readBytesResource:
-		rb.readBytes.tokens -= float64(*toAdd)
-		// Do not notify the head of the queue. In the best case we did not disturb
-		// the time at which it can be fulfilled and in the worst case, we made it
-		// further in the future.
-		return false
-	default:
-		panic(errors.AssertionFailedf("merge not implemented for %T", val))
-	}
-}
-
-// tokenBucket represents a token bucket for a given resource and its associated
-// configuration.
+// tokenBucket represents the token bucket for KV Compute Units and its
+// associated configuration. It implements quotapool.Resource.
 type tokenBucket struct {
-	LimitConfig
+	config      Config
+	clock       timeutil.TimeSource
+	lastUpdated time.Time
+	// Current number of tokens, in KV Compute Units.
 	tokens float64
 }
 
-func makeTokenBucket(rl LimitConfig) tokenBucket {
+var _ quotapool.Resource = (*tokenBucket)(nil)
+
+func makeTokenBucket(config Config) tokenBucket {
 	return tokenBucket{
-		LimitConfig: rl,
-		tokens:      float64(rl.Burst),
+		config: config,
+		tokens: float64(config.Burst),
 	}
 }
 
-// update applies the positive time delta update for the resource.
-func (t *tokenBucket) update(deltaT time.Duration) {
-	t.tokens += float64(t.Rate) * deltaT.Seconds()
-	t.clampTokens()
+// update accounts for the passing of time.
+func (tb *tokenBucket) update() {
+	now := tb.clock.Now()
+
+	if since := now.Sub(tb.lastUpdated); since > 0 {
+		tb.tokens += float64(tb.config.Rate) * since.Seconds()
+		tb.clampTokens()
+		tb.lastUpdated = now
+	}
 }
 
-// checkQuota returns whether needed will be satisfied by quota. Note that the
-// definition of satisfied is either that the integer part of quota exceeds
-// needed or that quota is equal to the burst. This is because we want to
-// have request put the rate limiter in debt rather than prevent execution of
-// requests.
+// tryToFulfill calculates the number of KV Compute Units needed for the
+// request and tries to remove them from the bucket.
+//
+// If the request can be fulfilled, the current token amount is adjusted. Note
+// if the current amount is equal to Burst, then we allow any request to be
+// fulfilled. This is because we want to have request put the rate limiter
+// in debt rather than prevent execution of requests.
 //
 // If the request is not satisfied, the amount of clock that must be waited for
 // the request to be satisfied at the current rate is returned.
-func (t *tokenBucket) check(needed int64) (fulfilled bool, tryAgainAfter time.Duration) {
-	if q := int64(t.tokens); needed <= q || q == t.Burst {
+func (tb *tokenBucket) tryToFulfill(
+	req *waitRequest,
+) (fulfilled bool, tryAgainAfter time.Duration) {
+	var needed float64
+	if req.isWrite {
+		needed = tb.config.WriteRequestUnits + float64(req.writeBytes)*tb.config.WriteUnitsPerByte
+	} else {
+		// We don't know the size of the read upfront; we will adjust the bucket
+		// after the fact in RecordRead.
+		needed = tb.config.ReadRequestUnits
+	}
+	if q := tb.tokens; needed <= q || q == tb.config.Burst {
+		tb.tokens -= needed
 		return true, 0
 	}
 
 	// We'll calculate the amount of clock until the quota is full if we're
 	// requesting more than the burst limit.
-	if needed > t.Burst {
-		needed = t.Burst
+	if needed > tb.config.Burst {
+		needed = tb.config.Burst
 	}
-	delta := float64(needed) - t.tokens
-	tryAgainAfter = time.Duration((delta * float64(time.Second)) / float64(t.Rate))
+	delta := needed - tb.tokens
+	tryAgainAfter = time.Duration((delta * float64(time.Second)) / tb.config.Rate)
 	return false, tryAgainAfter
 }
 
@@ -266,26 +204,49 @@ func (t *tokenBucket) check(needed int64) (fulfilled bool, tryAgainAfter time.Du
 // It's not obvious that we want to add tokens when increasing the burst as
 // that might lead to a big spike in load immediately upon increasing this
 // limit.
-func (t *tokenBucket) setConf(rl LimitConfig) {
-	t.LimitConfig = rl
-	t.clampTokens()
+func (tb *tokenBucket) updateConfig(config Config) {
+	tb.config = config
+	tb.clampTokens()
 }
 
 // clampTokens ensures that tokens does not exceed burst.
-func (t *tokenBucket) clampTokens() {
-	if burst := float64(t.Burst); t.tokens > burst {
-		t.tokens = burst
+func (tb *tokenBucket) clampTokens() {
+	if tb.tokens > tb.config.Burst {
+		tb.tokens = tb.config.Burst
+	}
+}
+
+// Merge is part of quotapool.Resource.
+func (tb *tokenBucket) Merge(val interface{}) (shouldNotify bool) {
+	switch val := val.(type) {
+	case Config:
+		// Account for the accumulation since lastUpdate and now under the old
+		// configuration.
+		tb.update()
+
+		tb.updateConfig(val)
+		return true
+
+	case *readBytesResource:
+		tb.tokens -= float64(val.readBytes) * tb.config.ReadUnitsPerByte
+		// Do not notify the head of the queue. In the best case we did not disturb
+		// the time at which it can be fulfilled and in the worst case, we made it
+		// further in the future.
+		return false
+
+	default:
+		panic(errors.AssertionFailedf("merge not implemented for %T", val))
 	}
 }
 
 // waitRequest is used to wait for adequate resources in the tokenBuckets.
 type waitRequest struct {
-	readRequests  int64
-	writeRequests int64
-	writeBytes    int64
-	readBytes     int64
+	isWrite    bool
+	writeBytes int64
 }
 
+var _ quotapool.Request = (*waitRequest)(nil)
+
 var waitRequestSyncPool = sync.Pool{
 	New: func() interface{} { return new(waitRequest) },
 }
@@ -295,15 +256,8 @@ var waitRequestSyncPool = sync.Pool{
 func newWaitRequest(isWrite bool, writeBytes int64) *waitRequest {
 	r := waitRequestSyncPool.Get().(*waitRequest)
 	*r = waitRequest{
-		readRequests:  0,
-		writeRequests: 0,
-		readBytes:     1,
-		writeBytes:    writeBytes,
-	}
-	if isWrite {
-		r.writeRequests = 1
-	} else {
-		r.readRequests = 1
+		isWrite:    isWrite,
+		writeBytes: writeBytes,
 	}
 	return r
 }
@@ -313,7 +267,23 @@ func putWaitRequest(r *waitRequest) {
 	waitRequestSyncPool.Put(r)
 }
 
-type readBytesResource int64
+// Acquire is part of quotapool.Request.
+func (req *waitRequest) Acquire(
+	ctx context.Context, res quotapool.Resource,
+) (fulfilled bool, tryAgainAfter time.Duration) {
+	r := res.(*tokenBucket)
+	r.update()
+	return r.tryToFulfill(req)
+}
+
+// ShouldWait is part of quotapool.Request.
+func (req *waitRequest) ShouldWait() bool {
+	return true
+}
+
+type readBytesResource struct {
+	readBytes int64
+}
 
 var readBytesResourceSyncPool = sync.Pool{
 	New: func() interface{} { return new(readBytesResource) },
@@ -321,27 +291,13 @@ var readBytesResourceSyncPool = sync.Pool{
 
 func newReadBytesResource(readBytes int64) *readBytesResource {
 	rb := readBytesResourceSyncPool.Get().(*readBytesResource)
-	*rb = readBytesResource(readBytes)
+	*rb = readBytesResource{
+		readBytes: readBytes,
+	}
 	return rb
 }
 
 func putReadBytesResource(rb *readBytesResource) {
-	*rb = 0
+	*rb = readBytesResource{}
 	readBytesResourceSyncPool.Put(rb)
 }
-
-func (req *waitRequest) Acquire(
-	ctx context.Context, res quotapool.Resource,
-) (fulfilled bool, tryAgainAfter time.Duration) {
-	r := res.(*tokenBuckets)
-	r.update()
-	if fulfilled, tryAgainAfter = r.check(req); !fulfilled {
-		return false, tryAgainAfter
-	}
-	r.subtract(req)
-	return true, 0
-}
-
-func (req *waitRequest) ShouldWait() bool {
-	return true
-}
diff --git a/pkg/kv/kvserver/tenantrate/limiter_test.go b/pkg/kv/kvserver/tenantrate/limiter_test.go
index 1c94d70bb958..9a910e891ed8 100644
--- a/pkg/kv/kvserver/tenantrate/limiter_test.go
+++ b/pkg/kv/kvserver/tenantrate/limiter_test.go
@@ -48,9 +48,9 @@ func TestCloser(t *testing.T) {
 	limiter := factory.GetTenant(tenant, closer)
 	ctx := context.Background()
 	// First Wait call will not block.
-	require.NoError(t, limiter.Wait(ctx, false, 1))
+	require.NoError(t, limiter.Wait(ctx, true, 1))
 	errCh := make(chan error, 1)
-	go func() { errCh <- limiter.Wait(ctx, false, 1<<30) }()
+	go func() { errCh <- limiter.Wait(ctx, true, 1<<30) }()
 	testutils.SucceedsSoon(t, func() error {
 		if timers := timeSource.Timers(); len(timers) != 1 {
 			return errors.Errorf("expected 1 timer, found %d", len(timers))
@@ -142,9 +142,8 @@ func (ts *testState) init(t *testing.T, d *datadriven.TestData) string {
 	ts.tenants = make(map[roachpb.TenantID][]tenantrate.Limiter)
 	ts.clock = timeutil.NewManualTime(t0)
 	ts.settings = cluster.MakeTestingClusterSettings()
-	limits := tenantrate.LimitConfigsFromSettings(ts.settings)
-	parseLimits(t, d, &limits)
-	tenantrate.OverrideSettingsWithRateLimits(ts.settings, limits)
+	settings := parseSettings(t, d)
+	tenantrate.OverrideSettings(&ts.settings.SV, settings)
 	ts.rl = tenantrate.NewLimiterFactory(ts.settings, &tenantrate.TestingKnobs{
 		TimeSource: ts.clock,
 	})
@@ -157,9 +156,8 @@ func (ts *testState) init(t *testing.T, d *datadriven.TestData) string {
 // yaml object representing the limits and updates accordingly. It returns
 // the current time. See init for more details as the semantics are the same.
 func (ts *testState) updateSettings(t *testing.T, d *datadriven.TestData) string {
-	limits := tenantrate.LimitConfigsFromSettings(ts.settings)
-	parseLimits(t, d, &limits)
-	tenantrate.OverrideSettingsWithRateLimits(ts.settings, limits)
+	settings := parseSettings(t, d)
+	tenantrate.OverrideSettings(&ts.settings.SV, settings)
 	return ts.formatTime()
 }
 
@@ -363,11 +361,11 @@ func (ts *testState) metrics(t *testing.T, d *datadriven.TestData) string {
 	if err := testutils.SucceedsSoonError(func() error {
 		got := ts.getMetricsText(t, d)
 		if got != exp {
-			return errors.Errorf("got: %q, exp: %q", got, exp)
+			return errors.Errorf("got:\n%s\nexp:\n%s\n", got, exp)
 		}
 		return nil
 	}); err != nil {
-		d.Fatalf(t, "failed to find expected timers: %v", err)
+		d.Fatalf(t, "failed to find expected metrics: %v", err)
 	}
 	return d.Expected
 }
@@ -516,10 +514,12 @@ func parseTenantIDs(t *testing.T, d *datadriven.TestData) []uint64 {
 	return tenantIDs
 }
 
-func parseLimits(t *testing.T, d *datadriven.TestData, limits *tenantrate.LimitConfigs) {
-	if err := yaml.UnmarshalStrict([]byte(d.Input), &limits); err != nil {
+func parseSettings(t *testing.T, d *datadriven.TestData) tenantrate.SettingValues {
+	var vals tenantrate.SettingValues
+	if err := yaml.UnmarshalStrict([]byte(d.Input), &vals); err != nil {
 		d.Fatalf(t, "failed to unmarshal limits: %v", err)
 	}
+	return vals
 }
 
 func parseStrings(t *testing.T, d *datadriven.TestData) []string {
diff --git a/pkg/kv/kvserver/tenantrate/settings.go b/pkg/kv/kvserver/tenantrate/settings.go
index 938b4c785f50..02a03644341c 100644
--- a/pkg/kv/kvserver/tenantrate/settings.go
+++ b/pkg/kv/kvserver/tenantrate/settings.go
@@ -15,107 +15,111 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
 )
 
-// Limit defines a rate in units per second.
-type Limit float64
-
-// LimitConfig configures the rate limit and burst limit for a given resource.
-type LimitConfig struct {
-	Rate  Limit
-	Burst int64
-}
-
-// LimitConfigs configures the rate limits.
-// It is exported for convenience and testing.
-// The values are derived from cluster settings.
-type LimitConfigs struct {
-	ReadRequests  LimitConfig
-	WriteRequests LimitConfig
-	ReadBytes     LimitConfig
-	WriteBytes    LimitConfig
-}
+// Config contains the configuration of the rate limiter.
+//
+// We limit the rate in terms of "KV Compute Units". The configuration contains
+// the rate and burst limits for KVCUs, as well as factors that define a "cost
+// mode" for calculating the number of KVCUs for a read or write request.
+//
+// Specifically, the cost model is a linear function combining a fixed
+// pre-request cost and a size-dependent (per-byte) cost.
+//
+// For a read:
+//   KVCUs = ReadRequestUnits + <size of read> * ReadUnitsPerByte
+// For a write:
+//   KVCUs = WriteRequestUnits + <size of write) * WriteUnitsPerByte
+//
+type Config struct {
+	// Rate defines the "sustained" rate limit in KV Compute Units per second.
+	Rate float64
+	// Burst defines the "burst" limit in KV Compute Units. Unused units
+	// accumulate up to this limit.
+	Burst float64
 
-// LimitConfigsFromSettings constructs LimitConfigs from the values stored in
-// the settings.
-func LimitConfigsFromSettings(settings *cluster.Settings) LimitConfigs {
-	return LimitConfigs{
-		ReadRequests: LimitConfig{
-			Rate:  Limit(readRequestRateLimit.Get(&settings.SV)),
-			Burst: readRequestBurstLimit.Get(&settings.SV),
-		},
-		WriteRequests: LimitConfig{
-			Rate:  Limit(writeRequestRateLimit.Get(&settings.SV)),
-			Burst: writeRequestBurstLimit.Get(&settings.SV),
-		},
-		ReadBytes: LimitConfig{
-			Rate:  Limit(readRateLimit.Get(&settings.SV)),
-			Burst: readBurstLimit.Get(&settings.SV),
-		},
-		WriteBytes: LimitConfig{
-			Rate:  Limit(writeRateLimit.Get(&settings.SV)),
-			Burst: writeBurstLimit.Get(&settings.SV),
-		},
-	}
+	// ReadRequestUnits is the baseline cost of a read, in KV Compute Units.
+	ReadRequestUnits float64
+	// ReadRequestUnits is the size-dependent cost of a read, in KV Compute Units
+	// per byte.
+	ReadUnitsPerByte float64
+	// WriteRequestUnits is the baseline cost of a write, in KV Compute Units.
+	WriteRequestUnits float64
+	// WriteRequestUnits is the size-dependent cost of a write, in KV Compute
+	// Units per byte.
+	WriteUnitsPerByte float64
 }
 
+// Settings for the rate limiter. These determine the values for a Config,
+// though not directly (the settings have user-friendlier units).
+//
+// The settings are designed so that there is one important "knob" to turn:
+// kv.tenant_rate_limiter.rate_limit.
+//
+// The rest of the settings are meant to be changed rarely. Note that the burst
+// limit setting is defined as a multiplier of the rate (i.e. in seconds), so
+// it doesn't need to be adjusted in concert with the rate.
 var (
-	readRequestRateLimit = settings.RegisterFloatSetting(
-		"kv.tenant_rate_limiter.read_requests.rate_limit",
-		"per-tenant read request rate limit in requests per second",
-		128,
+	kvcuRateLimit = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.rate_limit",
+		"per-tenant rate limit in KV Compute Units per second",
+		200,
 		settings.PositiveFloat,
 	)
 
-	readRequestBurstLimit = settings.RegisterIntSetting(
-		"kv.tenant_rate_limiter.read_requests.burst_limit",
-		"per-tenant read request burst limit in requests",
-		512,
-		settings.PositiveInt,
+	kvcuBurstLimitSeconds = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.burst_limit_seconds",
+		"per-tenant burst limit as a multiplier of the rate",
+		10,
+		settings.PositiveFloat,
 	)
 
-	writeRequestRateLimit = settings.RegisterFloatSetting(
-		"kv.tenant_rate_limiter.write_requests.rate_limit",
-		"per-tenant write request rate limit in requests per second",
-		128,
+	readRequestCost = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.read_request_cost",
+		"base cost of a read request in KV Compute Units",
+		0.7,
 		settings.PositiveFloat,
 	)
 
-	writeRequestBurstLimit = settings.RegisterIntSetting(
-		"kv.tenant_rate_limiter.write_requests.burst_limit",
-		"per-tenant write request burst limit in requests",
-		512,
-		settings.PositiveInt,
+	readCostPerMB = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.read_cost_per_megabyte",
+		"cost of a read in KV Compute Units per MB",
+		10.0,
+		settings.PositiveFloat,
 	)
 
-	readRateLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.read_bytes.rate_limit",
-		"per-tenant read rate limit in bytes per second",
-		1<<20 /* 1 MiB */)
-
-	readBurstLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.read_bytes.burst_limit",
-		"per-tenant read burst limit in bytes",
-		16<<20 /* 16 MiB */)
-
-	writeRateLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.write_bytes.rate_limit",
-		"per-tenant write rate limit in bytes per second",
-		512<<10 /* 512 KiB */)
+	writeRequestCost = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.write_request_cost",
+		"base cost of a write request in KV Compute Units",
+		1.0,
+		settings.PositiveFloat,
+	)
 
-	writeBurstLimit = settings.RegisterByteSizeSetting(
-		"kv.tenant_rate_limiter.write_bytes.burst_limit",
-		"per-tenant write burst limit in bytes",
-		8<<20 /* 8 MiB */)
+	writeCostPerMB = settings.RegisterFloatSetting(
+		"kv.tenant_rate_limiter.write_cost_per_megabyte",
+		"cost of a write in KV Compute Units per MB",
+		400.0,
+		settings.PositiveFloat,
+	)
 
-	// settingsSetOnChangeFuncs are the functions used to register the factory to
-	// be notified of changes to any of the settings which configure it.
-	settingsSetOnChangeFuncs = [...]func(*settings.Values, func()){
-		readRequestRateLimit.SetOnChange,
-		readRequestBurstLimit.SetOnChange,
-		writeRequestRateLimit.SetOnChange,
-		writeRequestBurstLimit.SetOnChange,
-		readRateLimit.SetOnChange,
-		readBurstLimit.SetOnChange,
-		writeRateLimit.SetOnChange,
-		writeBurstLimit.SetOnChange,
+	// List of config settings, used to set up "on change" notifiers.
+	configSettings = [...]settings.WritableSetting{
+		kvcuRateLimit,
+		kvcuBurstLimitSeconds,
+		readRequestCost,
+		readCostPerMB,
+		writeRequestCost,
+		writeCostPerMB,
 	}
 )
+
+// ConfigFromSettings constructs a Config using the cluster setting values.
+func ConfigFromSettings(st *cluster.Settings) Config {
+	const perMBToPerByte = float64(1) / (1024 * 1024)
+	var c Config
+	c.Rate = kvcuRateLimit.Get(&st.SV)
+	c.Burst = c.Rate * kvcuBurstLimitSeconds.Get(&st.SV)
+	c.ReadRequestUnits = readRequestCost.Get(&st.SV)
+	c.ReadUnitsPerByte = readCostPerMB.Get(&st.SV) * perMBToPerByte
+	c.WriteRequestUnits = writeRequestCost.Get(&st.SV)
+	c.WriteUnitsPerByte = writeCostPerMB.Get(&st.SV) * perMBToPerByte
+	return c
+}
diff --git a/pkg/kv/kvserver/tenantrate/testdata/basic b/pkg/kv/kvserver/tenantrate/testdata/basic
index 77a8db5de8d8..6d891611f210 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/basic
+++ b/pkg/kv/kvserver/tenantrate/testdata/basic
@@ -1,8 +1,8 @@
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 1024, burst: 2048 }
+rate:  1
+burst: 2
+read:  { base: 1, perbyte: 1 }
+write: { base: 1, perbyte: 1 }
 ----
 00:00:00.000
 
@@ -13,35 +13,33 @@ get_tenants
 ----
 [2#2, 3#1, 5#3, system#1]
 
-# Launch four requests on behalf of tenant 2, one on behalf of 3, and one on
+# Launch two requests on behalf of tenant 2, one on behalf of 3, and one on
 # behalf of the system tenant.
 
 launch
 - { id: g0, tenant: 1 }
 - { id: g1, tenant: 2 }
-- { id: g2, tenant: 2 }
-- { id: g3, tenant: 2, iswrite: true }
-- { id: g4, tenant: 2, iswrite: true }
-- { id: g5, tenant: 3 }
+- { id: g2, tenant: 2, iswrite: true }
+- { id: g3, tenant: 3 }
 ----
-[g0@system, g1@2, g2@2, g3@2, g4@2, g5@3]
+[g0@system, g1@2, g2@2, g3@3]
 
 # Ensure that none of the above requests get blocked because they use less
 # than the configured burst for their respective limiters.
 
 await
-[g0, g1, g2, g3, g4, g5]
+[g0, g1, g2, g3]
 ----
 []
 
-# Launch another read and another write request on behalf of tenant 2, it will
-# block due to the request rate limit.
+# Launch another read and another write request on behalf of tenant 2; they
+# will block because the burst limit only supports two requests.
 
 launch
-- { id: g6, tenant: 2 }
-- { id: g7, tenant: 2, iswrite: true }
+- { id: g4, tenant: 2 }
+- { id: g5, tenant: 2, iswrite: true }
 ----
-[g6@2, g7@2]
+[g4@2, g5@2]
 
 # Ensure that it the above request was blocked by observing the timer it creates
 # to wait for available quota.
@@ -69,13 +67,13 @@ kv_tenant_rate_limit_current_blocked{tenant_id="system"} 0
 metrics
 kv_tenant_rate_limit_.*_requests_admitted
 ----
-kv_tenant_rate_limit_read_requests_admitted 4
-kv_tenant_rate_limit_read_requests_admitted{tenant_id="2"} 2
+kv_tenant_rate_limit_read_requests_admitted 3
+kv_tenant_rate_limit_read_requests_admitted{tenant_id="2"} 1
 kv_tenant_rate_limit_read_requests_admitted{tenant_id="3"} 1
 kv_tenant_rate_limit_read_requests_admitted{tenant_id="5"} 0
 kv_tenant_rate_limit_read_requests_admitted{tenant_id="system"} 1
-kv_tenant_rate_limit_write_requests_admitted 2
-kv_tenant_rate_limit_write_requests_admitted{tenant_id="2"} 2
+kv_tenant_rate_limit_write_requests_admitted 1
+kv_tenant_rate_limit_write_requests_admitted{tenant_id="2"} 1
 kv_tenant_rate_limit_write_requests_admitted{tenant_id="3"} 0
 kv_tenant_rate_limit_write_requests_admitted{tenant_id="5"} 0
 kv_tenant_rate_limit_write_requests_admitted{tenant_id="system"} 0
@@ -94,16 +92,17 @@ metrics
 ----
 
 
-# Advance time to the timer deadline.
+# Advance time to the point where there should be enough units for both
+# requests to go through.
 
 advance
-1s1ms
+2s1ms
 ----
-00:00:01.001
+00:00:02.001
 
 # Observe that the blocked requests are now unblocked.
 
 await
-[g6, g7]
+[g4,g5]
 ----
 []
diff --git a/pkg/kv/kvserver/tenantrate/testdata/burst b/pkg/kv/kvserver/tenantrate/testdata/burst
index 9ed29bd12113..96a96bc71f9f 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/burst
+++ b/pkg/kv/kvserver/tenantrate/testdata/burst
@@ -2,10 +2,10 @@
 # into debt.
 
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 10, burst: 20 }
+rate:  1
+burst: 2
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -16,11 +16,11 @@ get_tenants
 ----
 [2#1]
 
-# Launch a request for tenant 2 that consumes more write bytes than the burst
+# Launch a write request for tenant 2 that needs 3 units, more than the burst
 # limit. This will not block but will put the limiter into debt.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 30 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 20 }
 ----
 [g1@2]
 
@@ -29,14 +29,13 @@ await
 ----
 []
 
-# Launch another request which will block until there is sufficient write
-# quota available. This will be 2s because we're in debt 10 and the rate is
-# 10/s.
+# Launch another request which will block until there is 1 unit available.
+# This will be 2s because we're in debt 1 and the rate is 1/s.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 10 }
+- { id: g2, tenant: 2, iswrite: true, writebytes: 0 }
 ----
-[g1@2]
+[g2@2]
 
 # Observe that the request indeed sees two seconds of waiting.
 
@@ -59,18 +58,18 @@ advance
 # Ensure that the request is indeed unblocked.
 
 await
-- g1
+- g2
 ----
 []
 
 # Test that when consuming more than burst that we wait for the token bucket to
-# be full. At time 4s the token bucket will be full. When requesting 30, which
-# is above the burst of 20, we'll need to wait for the bucket to be full.
+# be full. At time 4s the token bucket will be full. When requesting 4, which
+# is above the burst of 2, we'll need to wait for the bucket to be full.
 
 launch
-- { id: g1,  tenant: 2, iswrite: true, writebytes: 30 }
+- { id: g3,  tenant: 2, iswrite: true, writebytes: 30 }
 ----
-[g1@2]
+[g3@2]
 
 # Verify that the timer exists to avoid races setting the timer and advancing
 # time.
@@ -85,6 +84,6 @@ advance
 00:00:04.000
 
 await
-- g1
+- g3
 ----
 []
diff --git a/pkg/kv/kvserver/tenantrate/testdata/cancel b/pkg/kv/kvserver/tenantrate/testdata/cancel
index 77ce14d13d5f..6d04abed90b3 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/cancel
+++ b/pkg/kv/kvserver/tenantrate/testdata/cancel
@@ -1,10 +1,10 @@
 # This tests cancellation and unblocking subsequent requests.
 
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 1024, burst: 2048 }
+rate:  2
+burst: 4
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -13,10 +13,10 @@ get_tenants
 ----
 [2#1]
 
-# Launch a request to consume half of the 1024 capacity.
+# Launch a request to consume one unit.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 1024 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 0 }
 ----
 [g1@2]
 
@@ -28,7 +28,7 @@ await
 # Launch a request requiring more quota than exists.
 
 launch
-- { id: g2, tenant: 2, iswrite: true, writebytes: 1536 }
+- { id: g2, tenant: 2, iswrite: true, writebytes: 100 }
 ----
 [g2@2]
 
@@ -41,7 +41,7 @@ timers
 # Launch another request which could be fulfilled by the existing quota.
 
 launch
-- { id: g3, tenant: 2, iswrite: true, writebytes: 1024 }
+- { id: g3, tenant: 2, iswrite: true, writebytes: 0 }
 ----
 [g2@2, g3@2]
 
diff --git a/pkg/kv/kvserver/tenantrate/testdata/reads b/pkg/kv/kvserver/tenantrate/testdata/reads
index 9f99d85c5c17..55729d819d8d 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/reads
+++ b/pkg/kv/kvserver/tenantrate/testdata/reads
@@ -2,8 +2,10 @@
 # into debt
 
 init
-readrequests: { rate: 1, burst: 2 }
-readbytes:    { rate: 10, burst: 100 }
+rate:  2
+burst: 4
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -13,12 +15,12 @@ get_tenants
 ----
 [2#1, system#1]
 
-# Read the entire burst worth of bytes plus 4 which should put the limiter
-# in debt by 4. Also record a system read. We'll verify both show up in metrics.
+# Read the entire burst worth of bytes plus 0.4 which should put the limiter
+# in debt by 0.4. Also record a system read. We'll verify both show up in metrics.
 
 record_read
+- { tenant: 2, readbytes: 34 }
 - { tenant: 1, readbytes: 10 }
-- { tenant: 2, readbytes: 104 }
 ----
 []
 
@@ -27,12 +29,11 @@ record_read
 metrics
 kv_tenant_rate_limit_read_bytes_admitted
 ----
-kv_tenant_rate_limit_read_bytes_admitted 114
-kv_tenant_rate_limit_read_bytes_admitted{tenant_id="2"} 104
+kv_tenant_rate_limit_read_bytes_admitted 44
+kv_tenant_rate_limit_read_bytes_admitted{tenant_id="2"} 34
 kv_tenant_rate_limit_read_bytes_admitted{tenant_id="system"} 10
 
-# Launch a request which will block on the lack of available readbytes as it
-# tries to read its 1 courtesy byte.
+# Launch a request which will block because it needs 1 unit.
 
 launch
 - { id: g1, tenant: 2 }
@@ -41,12 +42,12 @@ launch
 
 timers
 ----
-00:00:00.500
+00:00:00.200
 
-# Record more reads, putting the limiter further into debt
+# Record more reads, putting the limiter further into debt.
 
 record_read
-- { tenant: 2, readbytes: 5 }
+- { tenant: 2, readbytes: 16 }
 ----
 [g1@2]
 
@@ -56,22 +57,22 @@ record_read
 
 timers
 ----
-00:00:00.500
+00:00:00.200
 
 # Note that the head of the queue notices the removal of readbytes and sets a
 # new timer.
 
 advance
-501ms
+201ms
 ----
-00:00:00.501
+00:00:00.201
 
 timers
 ----
 00:00:01.000
 
 advance
-500ms
+800ms
 ----
 00:00:01.001
 
diff --git a/pkg/kv/kvserver/tenantrate/testdata/update b/pkg/kv/kvserver/tenantrate/testdata/update
index fefc8e88364e..415d8cc8a0b1 100644
--- a/pkg/kv/kvserver/tenantrate/testdata/update
+++ b/pkg/kv/kvserver/tenantrate/testdata/update
@@ -1,10 +1,10 @@
 # Test updating the configuration of the rate limiter.
 
 init
-readrequests:  { rate: 1, burst: 2 }
-writerequests: { rate: 1, burst: 2 }
-readbytes:     { rate: 1024, burst: 2048 }
-writebytes:    { rate: 10, burst: 20 }
+rate:  2
+burst: 4
+read:  { base: 1, perbyte: 0.1 }
+write: { base: 1, perbyte: 0.1 }
 ----
 00:00:00.000
 
@@ -13,10 +13,10 @@ get_tenants
 ----
 [2#1]
 
-# Launch a request that puts the limiter in debt by 10.
+# Launch a request that puts the limiter in debt by 2.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 30 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 50 }
 ----
 [g1@2]
 
@@ -25,11 +25,11 @@ await
 ----
 []
 
-# Launch a request that will require 20, it will need to block for 3s to deal
+# Launch a request that will require 4, it will need to block for 3s to deal
 # with the current debt.
 
 launch
-- { id: g1, tenant: 2, iswrite: true, writebytes: 20 }
+- { id: g1, tenant: 2, iswrite: true, writebytes: 30 }
 ----
 [g1@2]
 
@@ -50,7 +50,8 @@ advance
 # Update the settings to double the writebytes rate.
 
 update_settings
-writebytes: { rate: 20, burst: 10 }
+rate:  4
+burst: 2
 ----
 00:00:01.000