Skip to content

Commit

Permalink
admission: fix wait queue histograms
Browse files Browse the repository at this point in the history
We previously did not record anything into {IO,CPU} wait queue
histograms when work either bypassed admission control (because of the
nature of the work, or when certain admission queues were disabled
through cluster settings) or used the fast-path (i.e. didn't add
themselves to the tenant heaps). This meant that our histogram
percentiles were not accurate.

NB: This problem didn't exist at the flow control level where work may
not be subject to flow control depending on the mode selected
('apply_to_elastic', 'apply_to_all'). We'd still record a measured wait
duration (~0ms), so we had accurate waiting-for-flow-tokens histograms.

Part of cockroachdb#82743.

Release note: None
  • Loading branch information
irfansharif committed Sep 5, 2023
1 parent d9c137d commit 4da2627
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
1 change: 0 additions & 1 deletion pkg/kv/kvserver/kvadmission/kvadmission.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,6 @@ func (n *controllerImpl) AdmitKVWork(
AdmissionOriginNode: n.nodeID.Get(),
}
}

}
// If flow control is disabled or if work bypasses flow control, we still
// subject it above-raft, leaseholder-only IO admission control.
Expand Down
23 changes: 23 additions & 0 deletions pkg/util/admission/work_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
if !info.ReplicatedWorkInfo.Enabled {
enabledSetting := admissionControlEnabledSettings[q.workKind]
if enabledSetting != nil && !enabledSetting.Get(&q.settings.SV) {
q.metrics.recordBypassedAdmission(info.Priority)
return false, nil
}
}
Expand Down Expand Up @@ -618,6 +619,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
q.admitMu.Unlock()
q.granter.tookWithoutPermission(info.RequestedCount)
q.metrics.incAdmitted(info.Priority)
q.metrics.recordBypassedAdmission(info.Priority)
return true, nil
}
// Work is subject to admission control.
Expand Down Expand Up @@ -663,6 +665,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
false, /* coordMuLocked */
)
}
q.metrics.recordFastPathAdmission(info.Priority)
return true, nil
}
// Did not get token/slot.
Expand Down Expand Up @@ -1757,6 +1760,26 @@ func (m *WorkQueueMetrics) recordFinishWait(priority admissionpb.WorkPriority, d
priorityStats.WaitDurations.RecordValue(dur.Nanoseconds())
}

func (m *WorkQueueMetrics) recordBypassedAdmission(priority admissionpb.WorkPriority) {
// For work that either bypasses admission queues (because of the nature of
// the work itself or because certain queues are disabled), we'll explicit
// record a zero wait duration so that the histogram percentiles remain
// accurate.
m.total.WaitDurations.RecordValue(0)
priorityStats := m.getOrCreate(priority)
priorityStats.WaitDurations.RecordValue(0)
}

func (m *WorkQueueMetrics) recordFastPathAdmission(priority admissionpb.WorkPriority) {
// Explicitly record a zero wait queue duration when we're able to acquire
// tokens/slots without needing to add ourselves to tenant heaps. Explicitly
// recording zeros ensure that our histograms are accurate with respect to
// all work going through admission control.
m.total.WaitDurations.RecordValue(0)
priorityStats := m.getOrCreate(priority)
priorityStats.WaitDurations.RecordValue(0)
}

// MetricStruct implements the metric.Struct interface.
func (*WorkQueueMetrics) MetricStruct() {}

Expand Down

0 comments on commit 4da2627

Please sign in to comment.