From 0f25d161c9a8e5883a058ee3e19a4cb9585c119c Mon Sep 17 00:00:00 2001 From: Gary Rong Date: Sun, 7 Apr 2024 15:15:20 +0800 Subject: [PATCH] ethdb/pebble: print warning log if db performance degrade --- ethdb/pebble/pebble.go | 44 +++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 57689ab04b9b..01bfb4be3d1d 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -45,6 +45,10 @@ const ( // metricsGatheringInterval specifies the interval to retrieve pebble database // compaction, io and pause stats to report to the user. metricsGatheringInterval = 3 * time.Second + + // degradationWarnInterval specifies how often warning should be printed if the + // leveldb database cannot keep up with requested writes. + degradationWarnInterval = time.Minute ) // Database is a persistent key-value store based on the pebble storage engine. @@ -76,14 +80,16 @@ type Database struct { log log.Logger // Contextual logger tracking the database path - activeComp int // Current number of active compactions - compStartTime time.Time // The start time of the earliest currently-active compaction - compTime atomic.Int64 // Total time spent in compaction in ns - level0Comp atomic.Uint32 // Total number of level-zero compactions - nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions - writeDelayStartTime time.Time // The start time of the latest write stall - writeDelayCount atomic.Int64 // Total number of write stall counts - writeDelayTime atomic.Int64 // Total time spent in write stalls + activeComp int // Current number of active compactions + compStartTime time.Time // The start time of the earliest currently-active compaction + compTime atomic.Int64 // Total time spent in compaction in ns + level0Comp atomic.Uint32 // Total number of level-zero compactions + nonLevel0Comp atomic.Uint32 // Total number of non level-zero compactions + + writeStalled atomic.Bool // Flag whether the write is stalled + writeDelayStartTime time.Time // The start time of the latest write stall + writeDelayCount atomic.Int64 // Total number of write stall counts + writeDelayTime atomic.Int64 // Total time spent in write stalls writeOptions *pebble.WriteOptions } @@ -112,10 +118,13 @@ func (d *Database) onCompactionEnd(info pebble.CompactionInfo) { func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) { d.writeDelayStartTime = time.Now() + d.writeDelayCount.Add(1) + d.writeStalled.Store(true) } func (d *Database) onWriteStallEnd() { d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime))) + d.writeStalled.Store(false) } // panicLogger is just a noop logger to disable Pebble's internal logger. @@ -450,13 +459,15 @@ func (d *Database) meter(refresh time.Duration, namespace string) { // Create storage and warning log tracer for write delay. var ( - compTimes [2]int64 - writeDelayTimes [2]int64 - writeDelayCounts [2]int64 - compWrites [2]int64 - compReads [2]int64 + compTimes [2]int64 + compWrites [2]int64 + compReads [2]int64 nWrites [2]int64 + + writeDelayTimes [2]int64 + writeDelayCounts [2]int64 + lastWriteStallReport time.Time ) // Iterate ad infinitum and collect the stats @@ -496,6 +507,13 @@ func (d *Database) meter(refresh time.Duration, namespace string) { if d.writeDelayMeter != nil { d.writeDelayMeter.Mark(writeDelayTimes[i%2] - writeDelayTimes[(i-1)%2]) } + // Print a warning log if writing has been stalled for a while. The log will + // be printed per minute to avoid overwhelming users. + if d.writeStalled.Load() && writeDelayCounts[i%2] == writeDelayCounts[(i-1)%2] && + time.Now().After(lastWriteStallReport.Add(degradationWarnInterval)) { + d.log.Warn("Database compacting, degraded performance") + lastWriteStallReport = time.Now() + } if d.compTimeMeter != nil { d.compTimeMeter.Mark(compTimes[i%2] - compTimes[(i-1)%2]) }