Skip to content

Commit 44b630a

Browse files
committed
Use a histogram to record transferred bytes for get get_range
This will allow to understand the distribution of object store fetches. The metric has been purposely named so it can in the future also cover uploads. Signed-off-by: Christian Simon <[email protected]>
1 parent 89475d4 commit 44b630a

File tree

2 files changed

+74
-7
lines changed

2 files changed

+74
-7
lines changed

objstore.go

+27-3
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,13 @@ func WrapWithMetrics(b Bucket, reg prometheus.Registerer, name string) *metricBu
424424
ConstLabels: prometheus.Labels{"bucket": name},
425425
}, []string{"operation"}),
426426

427+
opsTransferredBytes: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
428+
Name: "objstore_bucket_operation_transferred_bytes",
429+
Help: "Number of bytes transferred from/to bucket per operation.",
430+
ConstLabels: prometheus.Labels{"bucket": name},
431+
Buckets: prometheus.ExponentialBuckets(2<<14, 2, 16), // 32KiB, 64KiB, ... 1GiB
432+
}, []string{"operation"}),
433+
427434
opsDuration: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
428435
Name: "objstore_bucket_operation_duration_seconds",
429436
Help: "Duration of successful operations against the bucket",
@@ -450,6 +457,14 @@ func WrapWithMetrics(b Bucket, reg prometheus.Registerer, name string) *metricBu
450457
bkt.opsDuration.WithLabelValues(op)
451458
bkt.opsFetchedBytes.WithLabelValues(op)
452459
}
460+
// fetched bytes only relevant for get and getrange
461+
for _, op := range []string{
462+
OpGet,
463+
OpGetRange,
464+
// TODO: Add uploads
465+
} {
466+
bkt.opsTransferredBytes.WithLabelValues(op)
467+
}
453468
bkt.lastSuccessfulUploadTime.WithLabelValues(b.Name())
454469
return bkt
455470
}
@@ -461,8 +476,8 @@ type metricBucket struct {
461476
opsFailures *prometheus.CounterVec
462477
isOpFailureExpected IsOpFailureExpectedFunc
463478

464-
opsFetchedBytes *prometheus.CounterVec
465-
479+
opsFetchedBytes *prometheus.CounterVec
480+
opsTransferredBytes *prometheus.HistogramVec
466481
opsDuration *prometheus.HistogramVec
467482
lastSuccessfulUploadTime *prometheus.GaugeVec
468483
}
@@ -473,6 +488,7 @@ func (b *metricBucket) WithExpectedErrs(fn IsOpFailureExpectedFunc) Bucket {
473488
ops: b.ops,
474489
opsFailures: b.opsFailures,
475490
opsFetchedBytes: b.opsFetchedBytes,
491+
opsTransferredBytes: b.opsTransferredBytes,
476492
isOpFailureExpected: fn,
477493
opsDuration: b.opsDuration,
478494
lastSuccessfulUploadTime: b.lastSuccessfulUploadTime,
@@ -530,6 +546,7 @@ func (b *metricBucket) Get(ctx context.Context, name string) (io.ReadCloser, err
530546
b.opsFailures,
531547
b.isOpFailureExpected,
532548
b.opsFetchedBytes,
549+
b.opsTransferredBytes,
533550
), nil
534551
}
535552

@@ -551,6 +568,7 @@ func (b *metricBucket) GetRange(ctx context.Context, name string, off, length in
551568
b.opsFailures,
552569
b.isOpFailureExpected,
553570
b.opsFetchedBytes,
571+
b.opsTransferredBytes,
554572
), nil
555573
}
556574

@@ -627,13 +645,15 @@ type timingReadCloser struct {
627645

628646
start time.Time
629647
op string
648+
readBytes int64
630649
duration *prometheus.HistogramVec
631650
failed *prometheus.CounterVec
632651
isFailureExpected IsOpFailureExpectedFunc
633652
fetchedBytes *prometheus.CounterVec
653+
transferredBytes *prometheus.HistogramVec
634654
}
635655

636-
func newTimingReadCloser(rc io.ReadCloser, op string, dur *prometheus.HistogramVec, failed *prometheus.CounterVec, isFailureExpected IsOpFailureExpectedFunc, fetchedBytes *prometheus.CounterVec) *timingReadCloser {
656+
func newTimingReadCloser(rc io.ReadCloser, op string, dur *prometheus.HistogramVec, failed *prometheus.CounterVec, isFailureExpected IsOpFailureExpectedFunc, fetchedBytes *prometheus.CounterVec, transferredBytes *prometheus.HistogramVec) *timingReadCloser {
637657
// Initialize the metrics with 0.
638658
dur.WithLabelValues(op)
639659
failed.WithLabelValues(op)
@@ -648,6 +668,8 @@ func newTimingReadCloser(rc io.ReadCloser, op string, dur *prometheus.HistogramV
648668
failed: failed,
649669
isFailureExpected: isFailureExpected,
650670
fetchedBytes: fetchedBytes,
671+
transferredBytes: transferredBytes,
672+
readBytes: 0,
651673
}
652674
}
653675

@@ -662,6 +684,7 @@ func (rc *timingReadCloser) Close() error {
662684
}
663685
if !rc.alreadyGotErr && err == nil {
664686
rc.duration.WithLabelValues(rc.op).Observe(time.Since(rc.start).Seconds())
687+
rc.transferredBytes.WithLabelValues(rc.op).Observe(float64(rc.readBytes))
665688
rc.alreadyGotErr = true
666689
}
667690
return err
@@ -670,6 +693,7 @@ func (rc *timingReadCloser) Close() error {
670693
func (rc *timingReadCloser) Read(b []byte) (n int, err error) {
671694
n, err = rc.ReadCloser.Read(b)
672695
rc.fetchedBytes.WithLabelValues(rc.op).Add(float64(n))
696+
rc.readBytes += int64(n)
673697
// Report metric just once.
674698
if !rc.alreadyGotErr && err != nil && err != io.EOF {
675699
if !rc.isFailureExpected(err) {

objstore_test.go

+47-4
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func TestDownloadUploadDirConcurrency(t *testing.T) {
7979

8080
testutil.Ok(t, m.Upload(context.Background(), "dir/obj1", bytes.NewReader([]byte("1"))))
8181
testutil.Ok(t, m.Upload(context.Background(), "dir/obj2", bytes.NewReader([]byte("2"))))
82-
testutil.Ok(t, m.Upload(context.Background(), "dir/obj3", bytes.NewReader([]byte("3"))))
82+
testutil.Ok(t, m.Upload(context.Background(), "dir/obj3", bytes.NewReader(bytes.Repeat([]byte("3"), 1024*1024))))
8383

8484
testutil.Ok(t, promtest.GatherAndCompare(r, strings.NewReader(`
8585
# HELP objstore_bucket_operations_total Total number of all attempted operations against a bucket.
@@ -110,17 +110,60 @@ func TestDownloadUploadDirConcurrency(t *testing.T) {
110110
`), `objstore_bucket_operations_total`))
111111

112112
testutil.Ok(t, promtest.GatherAndCompare(r, strings.NewReader(`
113-
# HELP objstore_bucket_operation_fetched_bytes_total Total number of bytes fetched from bucket, per operation.
113+
# HELP objstore_bucket_operation_fetched_bytes_total Total number of bytes fetched from bucket, per operation.
114114
# TYPE objstore_bucket_operation_fetched_bytes_total counter
115115
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="attributes"} 0
116116
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="delete"} 0
117117
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="exists"} 0
118-
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="get"} 3
118+
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="get"} 1.048578e+06
119119
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="get_range"} 0
120120
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="iter"} 0
121121
objstore_bucket_operation_fetched_bytes_total{bucket="",operation="upload"} 0
122122
`), `objstore_bucket_operation_fetched_bytes_total`))
123123

124+
testutil.Ok(t, promtest.GatherAndCompare(r, strings.NewReader(`
125+
# HELP objstore_bucket_operation_transferred_bytes Number of bytes transferred from/to bucket per operation.
126+
# TYPE objstore_bucket_operation_transferred_bytes histogram
127+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="32768"} 2
128+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="65536"} 2
129+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="131072"} 2
130+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="262144"} 2
131+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="524288"} 2
132+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="1.048576e+06"} 3
133+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="2.097152e+06"} 3
134+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="4.194304e+06"} 3
135+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="8.388608e+06"} 3
136+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="1.6777216e+07"} 3
137+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="3.3554432e+07"} 3
138+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="6.7108864e+07"} 3
139+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="1.34217728e+08"} 3
140+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="2.68435456e+08"} 3
141+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="5.36870912e+08"} 3
142+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="1.073741824e+09"} 3
143+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get",le="+Inf"} 3
144+
objstore_bucket_operation_transferred_bytes_sum{bucket="",operation="get"} 1.048578e+06
145+
objstore_bucket_operation_transferred_bytes_count{bucket="",operation="get"} 3
146+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="32768"} 0
147+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="65536"} 0
148+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="131072"} 0
149+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="262144"} 0
150+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="524288"} 0
151+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="1.048576e+06"} 0
152+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="2.097152e+06"} 0
153+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="4.194304e+06"} 0
154+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="8.388608e+06"} 0
155+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="1.6777216e+07"} 0
156+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="3.3554432e+07"} 0
157+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="6.7108864e+07"} 0
158+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="1.34217728e+08"} 0
159+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="2.68435456e+08"} 0
160+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="5.36870912e+08"} 0
161+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="1.073741824e+09"} 0
162+
objstore_bucket_operation_transferred_bytes_bucket{bucket="",operation="get_range",le="+Inf"} 0
163+
objstore_bucket_operation_transferred_bytes_sum{bucket="",operation="get_range"} 0
164+
objstore_bucket_operation_transferred_bytes_count{bucket="",operation="get_range"} 0
165+
`), `objstore_bucket_operation_transferred_bytes`))
166+
124167
testutil.Ok(t, UploadDir(context.Background(), log.NewNopLogger(), m, tempDir, "/dir-copy", WithUploadConcurrency(10)))
125168

126169
testutil.Ok(t, promtest.GatherAndCompare(r, strings.NewReader(`
@@ -143,7 +186,7 @@ func TestTimingTracingReader(t *testing.T) {
143186
tr := NopCloserWithSize(r)
144187
tr = newTimingReadCloser(tr, "", m.opsDuration, m.opsFailures, func(err error) bool {
145188
return false
146-
}, m.opsFetchedBytes)
189+
}, m.opsFetchedBytes, m.opsTransferredBytes)
147190

148191
size, err := TryToGetSize(tr)
149192

0 commit comments

Comments
 (0)