Skip to content

Commit

Permalink
kvcoord: Add metric to keep track of restarted ranges in rangefeed
Browse files Browse the repository at this point in the history
Add a `distsender.rangefeed.restart_ranges` metric to keep track
of the number of ranges restarted due to transient error.

Epic: CRDB-25044
Release note: None
  • Loading branch information
Yevgeniy Miretskiy committed Mar 21, 2023
1 parent b26b338 commit c9e4529
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 0 deletions.
8 changes: 8 additions & 0 deletions pkg/kv/kvclient/kvcoord/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ This counts the number of ranges with an active rangefeed that are performing ca
Measurement: "Ranges",
Unit: metric.Unit_COUNT,
}
metaDistSenderRangefeedRestartRanges = metric.Metadata{
Name: "distsender.rangefeed.restart_ranges",
Help: `Number of ranges that were restarted due to transient errors`,
Measurement: "Ranges",
Unit: metric.Unit_COUNT,
}
metaDistSenderRangefeedRestartStuck = metric.Metadata{
Name: "distsender.rangefeed.restart_stuck",
Help: `Number of times a rangefeed was restarted due to not receiving ` +
Expand Down Expand Up @@ -239,6 +245,7 @@ type DistSenderMetrics struct {
RangefeedRanges *metric.Gauge
RangefeedCatchupRanges *metric.Gauge
RangefeedErrorCatchup *metric.Counter
RangefeedRestartRanges *metric.Counter
RangefeedRestartStuck *metric.Counter
MethodCounts [kvpb.NumMethods]*metric.Counter
ErrCounts [kvpb.NumErrors]*metric.Counter
Expand All @@ -260,6 +267,7 @@ func makeDistSenderMetrics() DistSenderMetrics {
RangefeedRanges: metric.NewGauge(metaDistSenderRangefeedTotalRanges),
RangefeedCatchupRanges: metric.NewGauge(metaDistSenderRangefeedCatchupRanges),
RangefeedErrorCatchup: metric.NewCounter(metaDistSenderRangefeedErrorCatchupRanges),
RangefeedRestartRanges: metric.NewCounter(metaDistSenderRangefeedRestartRanges),
RangefeedRestartStuck: metric.NewCounter(metaDistSenderRangefeedRestartStuck),
}
for i := range m.MethodCounts {
Expand Down
2 changes: 2 additions & 0 deletions pkg/kv/kvclient/kvcoord/dist_sender_mux_rangefeed.go
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,8 @@ func (m *rangefeedMuxer) receiveEventsFromNode(
func (m *rangefeedMuxer) restartActiveRangeFeed(
ctx context.Context, active *activeMuxRangeFeed, reason error,
) error {
m.ds.metrics.RangefeedRestartRanges.Inc(1)

if log.V(1) {
log.Infof(ctx, "RangeFeed %s@%s disconnected with last checkpoint %s ago: %v",
active.Span, active.StartAfter, timeutil.Since(active.Resolved.GoTime()), reason)
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvclient/kvcoord/dist_sender_rangefeed.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ func (ds *DistSender) partialRangeFeed(
if err != nil {
return err
}
ds.metrics.RangefeedRestartRanges.Inc(1)
if errInfo.evict {
token.Evict(ctx)
token = rangecache.EvictionToken{}
Expand Down
6 changes: 6 additions & 0 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,12 @@ var charts = []sectionDescription{
"distsender.rangefeed.restart_stuck",
},
},
{
Title: "Restarted Ranges",
Metrics: []string{
"distsender.rangefeed.restart_ranges",
},
},
{
Title: "RPCs",
Metrics: []string{
Expand Down

0 comments on commit c9e4529

Please sign in to comment.