From 3b86d7adabb82cbe5939a8c62ace9f8fb247f666 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Thu, 6 Feb 2025 07:51:33 -0800 Subject: [PATCH] interop: Add metrics tracking Cross-validated blocks. (#14211) --- op-service/metrics/ref_metrics.go | 92 +++++++++------ op-supervisor/metrics/metrics.go | 12 ++ op-supervisor/metrics/noop.go | 3 + op-supervisor/supervisor/backend/backend.go | 2 +- .../supervisor/backend/backend_test.go | 108 ++++++++++++++++++ .../supervisor/backend/chain_metrics.go | 11 ++ op-supervisor/supervisor/backend/db/db.go | 15 ++- .../supervisor/backend/db/query_test.go | 5 +- op-supervisor/supervisor/backend/db/update.go | 6 + .../backend/rewinder/rewinder_test.go | 3 +- 10 files changed, 218 insertions(+), 39 deletions(-) diff --git a/op-service/metrics/ref_metrics.go b/op-service/metrics/ref_metrics.go index 19d8badb9ab73..df5b97aa8d3a3 100644 --- a/op-service/metrics/ref_metrics.go +++ b/op-service/metrics/ref_metrics.go @@ -40,73 +40,97 @@ var _ RefMetricer = (*RefMetrics)(nil) // // ns is the fully qualified namespace, e.g. "op_node_default". func MakeRefMetrics(ns string, factory Factory) RefMetrics { + return makeRefMetrics(ns, factory) +} + +func (m *RefMetrics) RecordRef(layer string, name string, num uint64, timestamp uint64, h common.Hash) { + recordRefWithLabels(m, name, num, timestamp, h, []string{layer, name}) +} + +func (m *RefMetrics) RecordL1Ref(name string, ref eth.L1BlockRef) { + m.RecordRef("l1", name, ref.Number, ref.Time, ref.Hash) +} + +func (m *RefMetrics) RecordL2Ref(name string, ref eth.L2BlockRef) { + m.RecordRef("l2", name, ref.Number, ref.Time, ref.Hash) + m.RecordRef("l1_origin", name, ref.L1Origin.Number, 0, ref.L1Origin.Hash) + m.RefsSeqNr.WithLabelValues(name).Set(float64(ref.SequenceNumber)) +} + +// RefMetricsWithChainID is a RefMetrics that includes a chain ID label. +type RefMetricsWithChainID struct { + RefMetrics +} + +func MakeRefMetricsWithChainID(ns string, factory Factory) RefMetricsWithChainID { + return RefMetricsWithChainID{ + RefMetrics: makeRefMetrics(ns, factory, "chain"), + } +} + +func (m *RefMetricsWithChainID) RecordRef(layer string, name string, num uint64, timestamp uint64, h common.Hash, chainID eth.ChainID) { + recordRefWithLabels(&m.RefMetrics, name, num, timestamp, h, []string{layer, name, chainID.String()}) +} + +func (m *RefMetricsWithChainID) RecordL1Ref(name string, ref eth.L1BlockRef, chainID eth.ChainID) { + m.RecordRef("l1", name, ref.Number, ref.Time, ref.Hash, chainID) +} + +func (m *RefMetricsWithChainID) RecordL2Ref(name string, ref eth.L2BlockRef, chainID eth.ChainID) { + m.RecordRef("l2", name, ref.Number, ref.Time, ref.Hash, chainID) + m.RecordRef("l1_origin", name, ref.L1Origin.Number, 0, ref.L1Origin.Hash, chainID) + m.RefsSeqNr.WithLabelValues(name, chainID.String()).Set(float64(ref.SequenceNumber)) +} + +// makeRefMetrics creates a new RefMetrics with the given namespace, factory, and labels. +func makeRefMetrics(ns string, factory Factory, extraLabels ...string) RefMetrics { + labels := append([]string{"layer", "type"}, extraLabels...) + seqLabels := append([]string{"type"}, extraLabels...) return RefMetrics{ RefsNumber: factory.NewGaugeVec(prometheus.GaugeOpts{ Namespace: ns, Name: "refs_number", Help: "Gauge representing the different L1/L2 reference block numbers", - }, []string{ - "layer", - "type", - }), + }, labels), RefsTime: factory.NewGaugeVec(prometheus.GaugeOpts{ Namespace: ns, Name: "refs_time", Help: "Gauge representing the different L1/L2 reference block timestamps", - }, []string{ - "layer", - "type", - }), + }, labels), RefsHash: factory.NewGaugeVec(prometheus.GaugeOpts{ Namespace: ns, Name: "refs_hash", Help: "Gauge representing the different L1/L2 reference block hashes truncated to float values", - }, []string{ - "layer", - "type", - }), + }, labels), RefsSeqNr: factory.NewGaugeVec(prometheus.GaugeOpts{ Namespace: ns, Name: "refs_seqnr", Help: "Gauge representing the different L2 reference sequence numbers", - }, []string{ - "type", - }), + }, seqLabels), RefsLatency: factory.NewGaugeVec(prometheus.GaugeOpts{ Namespace: ns, Name: "refs_latency", Help: "Gauge representing the different L1/L2 reference block timestamps minus current time, in seconds", - }, []string{ - "layer", - "type", - }), + }, labels), LatencySeen: make(map[string]common.Hash), } } -func (m *RefMetrics) RecordRef(layer string, name string, num uint64, timestamp uint64, h common.Hash) { - m.RefsNumber.WithLabelValues(layer, name).Set(float64(num)) +// recordRefWithLabels implements to core logic of emitting block ref metrics. +// It's abstracted over labels to enable re-use in different contexts. +func recordRefWithLabels(m *RefMetrics, name string, num uint64, timestamp uint64, h common.Hash, labels []string) { + m.RefsNumber.WithLabelValues(labels...).Set(float64(num)) if timestamp != 0 { - m.RefsTime.WithLabelValues(layer, name).Set(float64(timestamp)) + m.RefsTime.WithLabelValues(labels...).Set(float64(timestamp)) // only meter the latency when we first see this hash for the given label name if m.LatencySeen[name] != h { m.LatencySeen[name] = h - m.RefsLatency.WithLabelValues(layer, name).Set(float64(timestamp) - (float64(time.Now().UnixNano()) / 1e9)) + m.RefsLatency.WithLabelValues(labels...).Set(float64(timestamp) - (float64(time.Now().UnixNano()) / 1e9)) } } // we map the first 8 bytes to a float64, so we can graph changes of the hash to find divergences visually. // We don't do math.Float64frombits, just a regular conversion, to keep the value within a manageable range. - m.RefsHash.WithLabelValues(layer, name).Set(float64(binary.LittleEndian.Uint64(h[:]))) -} - -func (m *RefMetrics) RecordL1Ref(name string, ref eth.L1BlockRef) { - m.RecordRef("l1", name, ref.Number, ref.Time, ref.Hash) -} - -func (m *RefMetrics) RecordL2Ref(name string, ref eth.L2BlockRef) { - m.RecordRef("l2", name, ref.Number, ref.Time, ref.Hash) - m.RecordRef("l1_origin", name, ref.L1Origin.Number, 0, ref.L1Origin.Hash) - m.RefsSeqNr.WithLabelValues(name).Set(float64(ref.SequenceNumber)) + m.RefsHash.WithLabelValues(labels...).Set(float64(binary.LittleEndian.Uint64(h[:]))) } // NoopRefMetrics can be embedded in a noop version of a metric implementation diff --git a/op-supervisor/metrics/metrics.go b/op-supervisor/metrics/metrics.go index 2b7c3990dae92..7653fe9cb757f 100644 --- a/op-supervisor/metrics/metrics.go +++ b/op-supervisor/metrics/metrics.go @@ -14,6 +14,8 @@ type Metricer interface { RecordUp() opmetrics.RPCMetricer + RecordCrossUnsafeRef(chainID eth.ChainID, r eth.BlockRef) + RecordCrossSafeRef(chainID eth.ChainID, r eth.BlockRef) CacheAdd(chainID eth.ChainID, label string, cacheSize int, evicted bool) CacheGet(chainID eth.ChainID, label string, hit bool) @@ -30,6 +32,7 @@ type Metrics struct { factory opmetrics.Factory opmetrics.RPCMetrics + RefMetrics opmetrics.RefMetricsWithChainID CacheSizeVec *prometheus.GaugeVec CacheGetVec *prometheus.CounterVec @@ -62,6 +65,7 @@ func NewMetrics(procName string) *Metrics { factory: factory, RPCMetrics: opmetrics.MakeRPCMetrics(ns, factory), + RefMetrics: opmetrics.MakeRefMetricsWithChainID(ns, factory), info: *factory.NewGaugeVec(prometheus.GaugeOpts{ Namespace: ns, @@ -140,6 +144,14 @@ func (m *Metrics) RecordUp() { m.up.Set(1) } +func (m *Metrics) RecordCrossUnsafeRef(chainID eth.ChainID, ref eth.BlockRef) { + m.RefMetrics.RecordRef("l2", "cross_unsafe", ref.Number, ref.Time, ref.Hash, chainID) +} + +func (m *Metrics) RecordCrossSafeRef(chainID eth.ChainID, ref eth.BlockRef) { + m.RefMetrics.RecordRef("l2", "cross_safe", ref.Number, ref.Time, ref.Hash, chainID) +} + func (m *Metrics) CacheAdd(chainID eth.ChainID, label string, cacheSize int, evicted bool) { chain := chainIDLabel(chainID) m.CacheSizeVec.WithLabelValues(chain, label).Set(float64(cacheSize)) diff --git a/op-supervisor/metrics/noop.go b/op-supervisor/metrics/noop.go index e126f554e8f1e..42b658ab61f4d 100644 --- a/op-supervisor/metrics/noop.go +++ b/op-supervisor/metrics/noop.go @@ -16,6 +16,9 @@ func (*noopMetrics) Document() []opmetrics.DocumentedMetric { return nil } func (*noopMetrics) RecordInfo(version string) {} func (*noopMetrics) RecordUp() {} +func (m *noopMetrics) RecordCrossUnsafeRef(_ eth.ChainID, _ eth.BlockRef) {} +func (m *noopMetrics) RecordCrossSafeRef(_ eth.ChainID, _ eth.BlockRef) {} + func (m *noopMetrics) CacheAdd(_ eth.ChainID, _ string, _ int, _ bool) {} func (m *noopMetrics) CacheGet(_ eth.ChainID, _ string, _ bool) {} diff --git a/op-supervisor/supervisor/backend/backend.go b/op-supervisor/supervisor/backend/backend.go index 3f85f9539a83a..245adb42a3573 100644 --- a/op-supervisor/supervisor/backend/backend.go +++ b/op-supervisor/supervisor/backend/backend.go @@ -112,7 +112,7 @@ func NewSupervisorBackend(ctx context.Context, logger log.Logger, sysCtx, sysCancel := context.WithCancel(ctx) // create initial per-chain resources - chainsDBs := db.NewChainsDB(logger, depSet) + chainsDBs := db.NewChainsDB(logger, depSet, m) eventSys.Register("chainsDBs", chainsDBs, event.DefaultRegisterOpts()) l1Accessor := l1access.NewL1Accessor(sysCtx, logger, nil) diff --git a/op-supervisor/supervisor/backend/backend_test.go b/op-supervisor/supervisor/backend/backend_test.go index 68a41a69dc26f..45ea92ce2cf43 100644 --- a/op-supervisor/supervisor/backend/backend_test.go +++ b/op-supervisor/supervisor/backend/backend_test.go @@ -136,6 +136,114 @@ func TestBackendLifetime(t *testing.T) { t.Log("stopped!") } +func TestBackendCallsMetrics(t *testing.T) { + logger := testlog.Logger(t, log.LvlInfo) + mockMetrics := &MockMetrics{} + dataDir := t.TempDir() + chainA := eth.ChainIDFromUInt64(900) + + // Set up mock metrics + mockMetrics.Mock.On("RecordDBEntryCount", chainA, mock.AnythingOfType("string"), mock.AnythingOfType("int64")).Return() + mockMetrics.Mock.On("RecordCrossUnsafeRef", chainA, mock.MatchedBy(func(_ eth.BlockRef) bool { return true })).Return() + mockMetrics.Mock.On("RecordCrossSafeRef", chainA, mock.MatchedBy(func(_ eth.BlockRef) bool { return true })).Return() + + depSet, err := depset.NewStaticConfigDependencySet( + map[eth.ChainID]*depset.StaticConfigDependency{ + chainA: { + ChainIndex: 900, + ActivationTime: 42, + HistoryMinTime: 100, + }, + }) + require.NoError(t, err) + + cfg := &config.Config{ + Version: "test", + LogConfig: oplog.CLIConfig{}, + MetricsConfig: opmetrics.CLIConfig{}, + PprofConfig: oppprof.CLIConfig{}, + RPC: oprpc.CLIConfig{}, + DependencySetSource: depSet, + SynchronousProcessors: true, + MockRun: false, + SyncSources: &syncnode.CLISyncNodes{}, + Datadir: dataDir, + } + + ex := event.NewGlobalSynchronous(context.Background()) + b, err := NewSupervisorBackend(context.Background(), logger, mockMetrics, cfg, ex) + require.NoError(t, err) + + // Assert that the metrics are called at initialization + mockMetrics.Mock.AssertCalled(t, "RecordDBEntryCount", chainA, "log", int64(0)) + mockMetrics.Mock.AssertCalled(t, "RecordDBEntryCount", chainA, "local_derived", int64(0)) + mockMetrics.Mock.AssertCalled(t, "RecordDBEntryCount", chainA, "cross_derived", int64(0)) + + // Start the backend + err = b.Start(context.Background()) + require.NoError(t, err) + + // Create a test block + block := eth.BlockRef{ + Hash: common.Hash{0xaa}, + Number: 42, + ParentHash: common.Hash{0xbb}, + Time: 10000, + } + + // Assert that metrics are called on safety level updates + err = b.chainDBs.UpdateCrossUnsafe(chainA, types.BlockSeal{ + Hash: block.Hash, + Number: block.Number, + Timestamp: block.Time, + }) + require.NoError(t, err) + mockMetrics.Mock.AssertCalled(t, "RecordCrossUnsafeRef", chainA, mock.MatchedBy(func(ref eth.BlockRef) bool { + return ref.Hash == block.Hash && ref.Number == block.Number && ref.Time == block.Time + })) + + err = b.chainDBs.UpdateCrossSafe(chainA, block, block) + require.NoError(t, err) + mockMetrics.Mock.AssertCalled(t, "RecordDBEntryCount", chainA, "cross_derived", int64(1)) + mockMetrics.Mock.AssertCalled(t, "RecordCrossSafeRef", chainA, mock.MatchedBy(func(ref eth.BlockRef) bool { + return ref.Hash == block.Hash && ref.Number == block.Number && ref.Time == block.Time + })) + + // Stop the backend + err = b.Stop(context.Background()) + require.NoError(t, err) +} + +type MockMetrics struct { + mock.Mock +} + +var _ Metrics = (*MockMetrics)(nil) + +func (m *MockMetrics) CacheAdd(chainID eth.ChainID, label string, cacheSize int, evicted bool) { + m.Mock.Called(chainID, label, cacheSize, evicted) +} + +func (m *MockMetrics) CacheGet(chainID eth.ChainID, label string, hit bool) { + m.Mock.Called(chainID, label, hit) +} + +func (m *MockMetrics) RecordCrossUnsafeRef(chainID eth.ChainID, ref eth.BlockRef) { + m.Mock.Called(chainID, ref) +} + +func (m *MockMetrics) RecordCrossSafeRef(chainID eth.ChainID, ref eth.BlockRef) { + m.Mock.Called(chainID, ref) +} + +func (m *MockMetrics) RecordDBEntryCount(chainID eth.ChainID, kind string, count int64) { + m.Mock.Called(chainID, kind, count) +} + +func (m *MockMetrics) RecordDBSearchEntriesRead(chainID eth.ChainID, count int64) { + m.Mock.Called(chainID, count) +} + type MockProcessorSource struct { mock.Mock } diff --git a/op-supervisor/supervisor/backend/chain_metrics.go b/op-supervisor/supervisor/backend/chain_metrics.go index 8a2aec7409a6e..22d7e1bfde3ef 100644 --- a/op-supervisor/supervisor/backend/chain_metrics.go +++ b/op-supervisor/supervisor/backend/chain_metrics.go @@ -10,6 +10,9 @@ type Metrics interface { CacheAdd(chainID eth.ChainID, label string, cacheSize int, evicted bool) CacheGet(chainID eth.ChainID, label string, hit bool) + RecordCrossUnsafeRef(chainID eth.ChainID, ref eth.BlockRef) + RecordCrossSafeRef(chainID eth.ChainID, ref eth.BlockRef) + RecordDBEntryCount(chainID eth.ChainID, kind string, count int64) RecordDBSearchEntriesRead(chainID eth.ChainID, count int64) } @@ -28,6 +31,14 @@ func newChainMetrics(chainID eth.ChainID, delegate Metrics) *chainMetrics { } } +func (c *chainMetrics) RecordCrossUnsafeRef(ref eth.BlockRef) { + c.delegate.RecordCrossUnsafeRef(c.chainID, ref) +} + +func (c *chainMetrics) RecordCrossSafeRef(ref eth.BlockRef) { + c.delegate.RecordCrossSafeRef(c.chainID, ref) +} + func (c *chainMetrics) CacheAdd(label string, cacheSize int, evicted bool) { c.delegate.CacheAdd(c.chainID, label, cacheSize, evicted) } diff --git a/op-supervisor/supervisor/backend/db/db.go b/op-supervisor/supervisor/backend/db/db.go index f62025ecec83d..2e844e0fcc037 100644 --- a/op-supervisor/supervisor/backend/db/db.go +++ b/op-supervisor/supervisor/backend/db/db.go @@ -11,6 +11,7 @@ import ( "github.com/ethereum-optimism/optimism/op-node/rollup/event" "github.com/ethereum-optimism/optimism/op-service/eth" "github.com/ethereum-optimism/optimism/op-service/locks" + "github.com/ethereum-optimism/optimism/op-supervisor/metrics" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/backend/db/fromda" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/backend/db/logs" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/backend/depset" @@ -84,6 +85,11 @@ var _ DerivationStorage = (*fromda.DB)(nil) var _ LogStorage = (*logs.DB)(nil) +type Metrics interface { + RecordCrossUnsafeRef(chainID eth.ChainID, ref eth.BlockRef) + RecordCrossSafeRef(chainID eth.ChainID, ref eth.BlockRef) +} + // ChainsDB is a database that stores logs and derived-from data for multiple chains. // it implements the LogStorage interface, as well as several DB interfaces needed by the cross package. type ChainsDB struct { @@ -113,14 +119,21 @@ type ChainsDB struct { // emitter used to signal when the DB changes, for other modules to react to emitter event.Emitter + + m Metrics } var _ event.AttachEmitter = (*ChainsDB)(nil) -func NewChainsDB(l log.Logger, depSet depset.DependencySet) *ChainsDB { +func NewChainsDB(l log.Logger, depSet depset.DependencySet, m Metrics) *ChainsDB { + if m == nil { + m = metrics.NoopMetrics + } + return &ChainsDB{ logger: l, depSet: depSet, + m: m, } } diff --git a/op-supervisor/supervisor/backend/db/query_test.go b/op-supervisor/supervisor/backend/db/query_test.go index fbfb37697f3e2..f640ec8bc154d 100644 --- a/op-supervisor/supervisor/backend/db/query_test.go +++ b/op-supervisor/supervisor/backend/db/query_test.go @@ -6,6 +6,7 @@ import ( "github.com/ethereum-optimism/optimism/op-service/eth" "github.com/ethereum-optimism/optimism/op-service/testlog" + "github.com/ethereum-optimism/optimism/op-supervisor/metrics" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/backend/depset" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/types" "github.com/ethereum/go-ethereum/common" @@ -96,7 +97,7 @@ func TestCommonL1UnknownChain(t *testing.T) { m1 := &mockDerivationStorage{} m2 := &mockDerivationStorage{} logger := testlog.Logger(t, log.LevelDebug) - chainDB := NewChainsDB(logger, sampleDepSet(t)) + chainDB := NewChainsDB(logger, sampleDepSet(t), metrics.NoopMetrics) // add a mock local derived-from storage to drive the test chainDB.AddLocalDerivationDB(eth.ChainIDFromUInt64(900), m1) @@ -112,7 +113,7 @@ func TestCommonL1(t *testing.T) { m2 := &mockDerivationStorage{} m3 := &mockDerivationStorage{} logger := testlog.Logger(t, log.LevelDebug) - chainDB := NewChainsDB(logger, sampleDepSet(t)) + chainDB := NewChainsDB(logger, sampleDepSet(t), metrics.NoopMetrics) // add a mock local derived-from storage to drive the test chainDB.AddLocalDerivationDB(eth.ChainIDFromUInt64(900), m1) diff --git a/op-supervisor/supervisor/backend/db/update.go b/op-supervisor/supervisor/backend/db/update.go index 72d4aad160cf5..d3eb7b0f57e2a 100644 --- a/op-supervisor/supervisor/backend/db/update.go +++ b/op-supervisor/supervisor/backend/db/update.go @@ -108,6 +108,11 @@ func (db *ChainsDB) UpdateCrossUnsafe(chain eth.ChainID, crossUnsafe types.Block ChainID: chain, NewCrossUnsafe: crossUnsafe, }) + db.m.RecordCrossUnsafeRef(chain, eth.BlockRef{ + Number: crossUnsafe.Number, + Time: crossUnsafe.Timestamp, + Hash: crossUnsafe.Hash, + }) return nil } @@ -127,6 +132,7 @@ func (db *ChainsDB) UpdateCrossSafe(chain eth.ChainID, l1View eth.BlockRef, last Derived: types.BlockSealFromRef(lastCrossDerived), }, }) + db.m.RecordCrossSafeRef(chain, lastCrossDerived) return nil } diff --git a/op-supervisor/supervisor/backend/rewinder/rewinder_test.go b/op-supervisor/supervisor/backend/rewinder/rewinder_test.go index 2b09c50c48ecb..f9bd2bfda2c38 100644 --- a/op-supervisor/supervisor/backend/rewinder/rewinder_test.go +++ b/op-supervisor/supervisor/backend/rewinder/rewinder_test.go @@ -14,6 +14,7 @@ import ( "github.com/ethereum-optimism/optimism/op-node/rollup/event" "github.com/ethereum-optimism/optimism/op-service/eth" "github.com/ethereum-optimism/optimism/op-service/testlog" + "github.com/ethereum-optimism/optimism/op-supervisor/metrics" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/backend/db" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/backend/db/fromda" "github.com/ethereum-optimism/optimism/op-supervisor/supervisor/backend/db/logs" @@ -745,7 +746,7 @@ func setupTestChains(t *testing.T, chainIDs ...eth.ChainID) *testSetup { require.NoError(t, err) // Create ChainsDB with mock emitter - chainsDB := db.NewChainsDB(logger, depSet) + chainsDB := db.NewChainsDB(logger, depSet, metrics.NoopMetrics) chainsDB.AttachEmitter(&mockEmitter{}) setup := &testSetup{