Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ethdb/pebble: print warning log if db performance degrade #29478

Merged
merged 1 commit into from
Apr 17, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 31 additions & 13 deletions ethdb/pebble/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ const (
// metricsGatheringInterval specifies the interval to retrieve pebble database
// compaction, io and pause stats to report to the user.
metricsGatheringInterval = 3 * time.Second

// degradationWarnInterval specifies how often warning should be printed if the
// leveldb database cannot keep up with requested writes.
degradationWarnInterval = time.Minute
)

// Database is a persistent key-value store based on the pebble storage engine.
Expand Down Expand Up @@ -76,14 +80,16 @@ type Database struct {

log log.Logger // Contextual logger tracking the database path

activeComp int // Current number of active compactions
compStartTime time.Time // The start time of the earliest currently-active compaction
compTime atomic.Int64 // Total time spent in compaction in ns
level0Comp atomic.Uint32 // Total number of level-zero compactions
nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions
writeDelayStartTime time.Time // The start time of the latest write stall
writeDelayCount atomic.Int64 // Total number of write stall counts
writeDelayTime atomic.Int64 // Total time spent in write stalls
activeComp int // Current number of active compactions
compStartTime time.Time // The start time of the earliest currently-active compaction
compTime atomic.Int64 // Total time spent in compaction in ns
level0Comp atomic.Uint32 // Total number of level-zero compactions
nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions

writeStalled atomic.Bool // Flag whether the write is stalled
writeDelayStartTime time.Time // The start time of the latest write stall
writeDelayCount atomic.Int64 // Total number of write stall counts
writeDelayTime atomic.Int64 // Total time spent in write stalls

writeOptions *pebble.WriteOptions
}
Expand Down Expand Up @@ -112,10 +118,13 @@ func (d *Database) onCompactionEnd(info pebble.CompactionInfo) {

func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) {
d.writeDelayStartTime = time.Now()
d.writeDelayCount.Add(1)
d.writeStalled.Store(true)
}

func (d *Database) onWriteStallEnd() {
d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime)))
d.writeStalled.Store(false)
}

// panicLogger is just a noop logger to disable Pebble's internal logger.
Expand Down Expand Up @@ -450,13 +459,15 @@ func (d *Database) meter(refresh time.Duration, namespace string) {

// Create storage and warning log tracer for write delay.
var (
compTimes [2]int64
writeDelayTimes [2]int64
writeDelayCounts [2]int64
compWrites [2]int64
compReads [2]int64
compTimes [2]int64
compWrites [2]int64
compReads [2]int64

nWrites [2]int64

writeDelayTimes [2]int64
writeDelayCounts [2]int64
lastWriteStallReport time.Time
)

// Iterate ad infinitum and collect the stats
Expand Down Expand Up @@ -496,6 +507,13 @@ func (d *Database) meter(refresh time.Duration, namespace string) {
if d.writeDelayMeter != nil {
d.writeDelayMeter.Mark(writeDelayTimes[i%2] - writeDelayTimes[(i-1)%2])
}
// Print a warning log if writing has been stalled for a while. The log will
// be printed per minute to avoid overwhelming users.
if d.writeStalled.Load() && writeDelayCounts[i%2] == writeDelayCounts[(i-1)%2] &&
time.Now().After(lastWriteStallReport.Add(degradationWarnInterval)) {
d.log.Warn("Database compacting, degraded performance")
lastWriteStallReport = time.Now()
}
if d.compTimeMeter != nil {
d.compTimeMeter.Mark(compTimes[i%2] - compTimes[(i-1)%2])
}
Expand Down
Loading