Skip to content

Commit f9c3e64

Browse files
author
Woosang Son
authored
feat: add prometheus metrics for caches reverting telemetry metrics (#184)
1 parent 9717c49 commit f9c3e64

15 files changed

+300
-40
lines changed

baseapp/options.go

+11-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"io"
66

7+
"github.com/line/lbm-sdk/v2/store/cache"
78
"github.com/line/lbm-sdk/v2/store/iavl"
89
tmdb "github.com/line/tm-db/v2"
910

@@ -65,12 +66,12 @@ func SetInterBlockCache(cache sdk.MultiStorePersistentCache) func(*BaseApp) {
6566
}
6667

6768
// SetIAVLCacheManager provides a BaseApp option function that sets the iavl CacheManager
68-
func SetIAVLCacheManager(size int) func(*BaseApp) {
69+
func SetIAVLCacheManager(size int, provider iavl.MetricsProvider) func(*BaseApp) {
6970
return func(app *BaseApp) {
7071
if size == 0 {
7172
app.cms.SetIAVLCacheManager(iavl.NewCacheManagerNoCache())
7273
} else {
73-
app.cms.SetIAVLCacheManager(iavl.NewCacheManagerSingleton(size))
74+
app.cms.SetIAVLCacheManager(iavl.NewCacheManagerSingleton(size, provider))
7475
}
7576
}
7677
}
@@ -245,3 +246,11 @@ func (app *BaseApp) SetInterfaceRegistry(registry types.InterfaceRegistry) {
245246
app.grpcQueryRouter.SetInterfaceRegistry(registry)
246247
app.msgServiceRouter.SetInterfaceRegistry(registry)
247248
}
249+
250+
func MetricsProvider(prometheus bool) (cache.MetricsProvider, iavl.MetricsProvider) {
251+
namespace := "app"
252+
if prometheus {
253+
return cache.PrometheusMetricsProvider(namespace), iavl.PrometheusMetricsProvider(namespace)
254+
}
255+
return cache.NopMetricsProvider(), iavl.NopMetricsProvider()
256+
}

baseapp/options_test.go

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package baseapp
2+
3+
import (
4+
"testing"
5+
6+
"github.com/go-kit/kit/metrics/discard"
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestMetricsProvider(t *testing.T) {
11+
p1, p2 := MetricsProvider(true)
12+
c1 := p1()
13+
c2 := p2()
14+
require.NotEqual(t, c1.InterBlockCacheHits, discard.NewCounter())
15+
require.NotEqual(t, c2.IAVLCacheHits, discard.NewGauge())
16+
17+
p1, p2 = MetricsProvider(false)
18+
c1 = p1()
19+
c2 = p2()
20+
require.Equal(t, c1.InterBlockCacheHits, discard.NewCounter())
21+
require.Equal(t, c2.IAVLCacheHits, discard.NewGauge())
22+
}

server/config/config.go

+4
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ type BaseConfig struct {
7373

7474
// IAVL cache size; bytes size unit
7575
IAVLCacheSize int `mapstructure:"iavl-cache-size"`
76+
77+
// When true, Prometheus metrics are served under /metrics on prometheus_listen_addr in config.toml.
78+
// It works when tendermint's prometheus option (config.toml) is set to true.
79+
Prometheus bool `mapstructure:"prometheus"`
7680
}
7781

7882
// APIConfig defines the API listener configuration.

server/config/toml.go

+4
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ iavl-cache-size = {{ .BaseConfig.IAVLCacheSize }}
7676
# ["message.sender", "message.recipient"]
7777
index-events = {{ .BaseConfig.IndexEvents }}
7878
79+
# When true, Prometheus metrics are served under /metrics on prometheus_listen_addr in config.toml.
80+
# It works when tendermint's prometheus option (config.toml) is set to true.
81+
prometheus = {{ .BaseConfig.Prometheus }}
82+
7983
###############################################################################
8084
### Telemetry Configuration ###
8185
###############################################################################

server/start.go

+2
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ which accepts a path for the resulting pprof file.
160160
cmd.Flags().Uint64(FlagStateSyncSnapshotInterval, 0, "State sync snapshot interval")
161161
cmd.Flags().Uint32(FlagStateSyncSnapshotKeepRecent, 2, "State sync snapshot to keep")
162162

163+
cmd.Flags().Bool(FlagPrometheus, false, "Enable prometheus metric for app")
164+
163165
// add support for all Ostracon-specific command line options
164166
ostcmd.AddNodeFlags(cmd)
165167
return cmd

simapp/sim_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ func fauxMerkleModeOpt(bapp *baseapp.BaseApp) {
5454
// interBlockCacheOpt returns a BaseApp option function that sets the persistent
5555
// inter-block write-through cache.
5656
func interBlockCacheOpt() func(*baseapp.BaseApp) {
57-
return baseapp.SetInterBlockCache(store.NewCommitKVStoreCacheManager(cache.DefaultCommitKVStoreCacheSize))
57+
return baseapp.SetInterBlockCache(store.NewCommitKVStoreCacheManager(
58+
cache.DefaultCommitKVStoreCacheSize, cache.NopMetricsProvider()))
5859
}
5960

6061
func TestFullAppSimulation(t *testing.T) {

simapp/simd/cmd/root.go

+12-8
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ import (
66
"os"
77
"path/filepath"
88

9+
ostcli "github.com/line/ostracon/libs/cli"
10+
"github.com/line/ostracon/libs/log"
11+
tmdb "github.com/line/tm-db/v2"
12+
"github.com/spf13/cast"
13+
"github.com/spf13/cobra"
14+
"github.com/spf13/viper"
15+
916
"github.com/line/lbm-sdk/v2/baseapp"
1017
"github.com/line/lbm-sdk/v2/client"
1118
"github.com/line/lbm-sdk/v2/client/debug"
@@ -26,11 +33,6 @@ import (
2633
banktypes "github.com/line/lbm-sdk/v2/x/bank/types"
2734
"github.com/line/lbm-sdk/v2/x/crisis"
2835
genutilcli "github.com/line/lbm-sdk/v2/x/genutil/client/cli"
29-
ostcli "github.com/line/ostracon/libs/cli"
30-
"github.com/line/ostracon/libs/log"
31-
tmdb "github.com/line/tm-db/v2"
32-
"github.com/spf13/cast"
33-
"github.com/spf13/cobra"
3436
)
3537

3638
// NewRootCmd creates a new root command for simd. It is called once in the
@@ -156,9 +158,11 @@ type appCreator struct {
156158
func (a appCreator) newApp(logger log.Logger, db tmdb.DB, traceStore io.Writer, appOpts servertypes.AppOptions) servertypes.Application {
157159
var cache sdk.MultiStorePersistentCache
158160

161+
ibCacheMetricsProvider, iavlCacheMetricsProvider :=
162+
baseapp.MetricsProvider(cast.ToBool(viper.GetBool(server.FlagPrometheus)))
159163
if cast.ToBool(appOpts.Get(server.FlagInterBlockCache)) {
160164
cache = store.NewCommitKVStoreCacheManager(
161-
cast.ToInt(appOpts.Get(server.FlagInterBlockCacheSize)))
165+
cast.ToInt(appOpts.Get(server.FlagInterBlockCacheSize)), ibCacheMetricsProvider)
162166
}
163167

164168
skipUpgradeHeights := make(map[int64]bool)
@@ -192,10 +196,10 @@ func (a appCreator) newApp(logger log.Logger, db tmdb.DB, traceStore io.Writer,
192196
baseapp.SetHaltHeight(cast.ToUint64(appOpts.Get(server.FlagHaltHeight))),
193197
baseapp.SetHaltTime(cast.ToUint64(appOpts.Get(server.FlagHaltTime))),
194198
baseapp.SetMinRetainBlocks(cast.ToUint64(appOpts.Get(server.FlagMinRetainBlocks))),
195-
baseapp.SetIAVLCacheManager(cast.ToInt(appOpts.Get(server.FlagIAVLCacheSize))),
196199
baseapp.SetInterBlockCache(cache),
197-
baseapp.SetIndexEvents(cast.ToStringSlice(appOpts.Get(server.FlagIndexEvents))),
200+
baseapp.SetIAVLCacheManager(cast.ToInt(appOpts.Get(server.FlagIAVLCacheSize)), iavlCacheMetricsProvider),
198201
baseapp.SetTrace(cast.ToBool(appOpts.Get(server.FlagTrace))),
202+
baseapp.SetIndexEvents(cast.ToStringSlice(appOpts.Get(server.FlagIndexEvents))),
199203
baseapp.SetSnapshotStore(snapshotStore),
200204
baseapp.SetSnapshotInterval(cast.ToUint64(appOpts.Get(server.FlagStateSyncSnapshotInterval))),
201205
baseapp.SetSnapshotKeepRecent(cast.ToUint32(appOpts.Get(server.FlagStateSyncSnapshotKeepRecent))),

store/cache/cache.go

+17-13
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import (
66
"github.com/VictoriaMetrics/fastcache"
77
"github.com/line/lbm-sdk/v2/store/cachekv"
88
"github.com/line/lbm-sdk/v2/store/types"
9-
"github.com/line/lbm-sdk/v2/telemetry"
109
)
1110

1211
const (
@@ -27,18 +26,20 @@ type (
2726
// CommitKVStore and below is completely irrelevant to this layer.
2827
CommitKVStoreCache struct {
2928
types.CommitKVStore
30-
cache *fastcache.Cache
31-
prefix []byte
29+
cache *fastcache.Cache
30+
prefix []byte
31+
metrics *Metrics
3232
}
3333

3434
// CommitKVStoreCacheManager maintains a mapping from a StoreKey to a
3535
// CommitKVStoreCache. Each CommitKVStore, per StoreKey, is meant to be used
3636
// in an inter-block (persistent) manner and typically provided by a
3737
// CommitMultiStore.
3838
CommitKVStoreCacheManager struct {
39-
mutex sync.Mutex
40-
cache *fastcache.Cache
41-
caches map[string]types.CommitKVStore
39+
mutex sync.Mutex
40+
cache *fastcache.Cache
41+
caches map[string]types.CommitKVStore
42+
metrics *Metrics
4243

4344
// All cache stores use the unique prefix that has one byte length
4445
// Contract: The number of all cache stores cannot exceed 127(max byte)
@@ -47,22 +48,25 @@ type (
4748
}
4849
)
4950

50-
func NewCommitKVStoreCache(store types.CommitKVStore, prefix []byte, cache *fastcache.Cache) *CommitKVStoreCache {
51+
func NewCommitKVStoreCache(store types.CommitKVStore, prefix []byte, cache *fastcache.Cache,
52+
metrics *Metrics) *CommitKVStoreCache {
5153
return &CommitKVStoreCache{
5254
CommitKVStore: store,
5355
prefix: prefix,
5456
cache: cache,
57+
metrics: metrics,
5558
}
5659
}
5760

58-
func NewCommitKVStoreCacheManager(cacheSize int) *CommitKVStoreCacheManager {
61+
func NewCommitKVStoreCacheManager(cacheSize int, provider MetricsProvider) *CommitKVStoreCacheManager {
5962
if cacheSize <= 0 {
6063
// This function was called because it intended to use the inter block cache, creating a cache of minimal size.
6164
cacheSize = DefaultCommitKVStoreCacheSize
6265
}
6366
return &CommitKVStoreCacheManager{
6467
cache: fastcache.New(cacheSize),
6568
caches: make(map[string]types.CommitKVStore),
69+
metrics: provider(),
6670
prefixMap: make(map[string][]byte),
6771
prefixOrder: 0,
6872
}
@@ -81,7 +85,7 @@ func (cmgr *CommitKVStoreCacheManager) GetStoreCache(key types.StoreKey, store t
8185
if cmgr.prefixOrder <= 0 {
8286
panic("The number of cache stores exceed the maximum(127)")
8387
}
84-
cmgr.caches[key.Name()] = NewCommitKVStoreCache(store, cmgr.prefixMap[key.Name()], cmgr.cache)
88+
cmgr.caches[key.Name()] = NewCommitKVStoreCache(store, cmgr.prefixMap[key.Name()], cmgr.cache, cmgr.metrics)
8589
}
8690
cmgr.mutex.Unlock()
8791
}
@@ -123,18 +127,18 @@ func (ckv *CommitKVStoreCache) Get(key []byte) []byte {
123127
valueI := ckv.cache.Get(nil, prefixedKey)
124128
if valueI != nil {
125129
// cache hit
126-
telemetry.IncrCounter(1, "store", "inter-block-cache", "hits")
130+
ckv.metrics.InterBlockCacheHits.Add(1)
127131
return valueI
128132
}
129133

130134
// cache miss; write to cache
131-
telemetry.IncrCounter(1, "store", "inter-block-cache", "misses")
135+
ckv.metrics.InterBlockCacheMisses.Add(1)
132136
value := ckv.CommitKVStore.Get(key)
133137
ckv.cache.Set(prefixedKey, value)
134138
stats := fastcache.Stats{}
135139
ckv.cache.UpdateStats(&stats)
136-
telemetry.SetGauge(float32(stats.EntriesCount), "store", "inter-block-cache", "entries")
137-
telemetry.SetGauge(float32(stats.BytesSize), "store", "inter-block-cache", "bytes")
140+
ckv.metrics.InterBlockCacheEntries.Set(float64(stats.EntriesCount))
141+
ckv.metrics.InterBlockCacheBytes.Set(float64(stats.BytesSize))
138142
return value
139143
}
140144

store/cache/cache_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616

1717
func TestGetOrSetStoreCache(t *testing.T) {
1818
db := memdb.NewDB()
19-
mngr := cache.NewCommitKVStoreCacheManager(cache.DefaultCommitKVStoreCacheSize)
19+
mngr := cache.NewCommitKVStoreCacheManager(cache.DefaultCommitKVStoreCacheSize, cache.NopMetricsProvider())
2020

2121
sKey := types.NewKVStoreKey("test")
2222
tree, err := iavl.NewMutableTree(db, 100)
@@ -30,7 +30,7 @@ func TestGetOrSetStoreCache(t *testing.T) {
3030

3131
func TestUnwrap(t *testing.T) {
3232
db := memdb.NewDB()
33-
mngr := cache.NewCommitKVStoreCacheManager(cache.DefaultCommitKVStoreCacheSize)
33+
mngr := cache.NewCommitKVStoreCacheManager(cache.DefaultCommitKVStoreCacheSize, cache.NopMetricsProvider())
3434

3535
sKey := types.NewKVStoreKey("test")
3636
tree, err := iavl.NewMutableTree(db, 100)
@@ -44,7 +44,7 @@ func TestUnwrap(t *testing.T) {
4444

4545
func TestStoreCache(t *testing.T) {
4646
db := memdb.NewDB()
47-
mngr := cache.NewCommitKVStoreCacheManager(cache.DefaultCommitKVStoreCacheSize)
47+
mngr := cache.NewCommitKVStoreCacheManager(cache.DefaultCommitKVStoreCacheSize, cache.NopMetricsProvider())
4848

4949
sKey := types.NewKVStoreKey("test")
5050
tree, err := iavl.NewMutableTree(db, 100)

store/cache/metrics.go

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
package cache
2+
3+
import (
4+
"github.com/go-kit/kit/metrics"
5+
"github.com/go-kit/kit/metrics/discard"
6+
"github.com/go-kit/kit/metrics/prometheus"
7+
stdprometheus "github.com/prometheus/client_golang/prometheus"
8+
)
9+
10+
const (
11+
// MetricsSubsystem is a subsystem shared by all metrics exposed by this
12+
// package.
13+
MetricsSubsystem = "inter_block_cache"
14+
)
15+
16+
// Metrics contains metrics exposed by this package.
17+
type Metrics struct {
18+
InterBlockCacheHits metrics.Counter
19+
InterBlockCacheMisses metrics.Counter
20+
InterBlockCacheEntries metrics.Gauge
21+
InterBlockCacheBytes metrics.Gauge
22+
}
23+
24+
// PrometheusMetrics returns Metrics build using Prometheus client library.
25+
// Optionally, labels can be provided along with their values ("foo",
26+
// "fooValue").
27+
func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
28+
labels := []string{}
29+
for i := 0; i < len(labelsAndValues); i += 2 {
30+
labels = append(labels, labelsAndValues[i])
31+
}
32+
return &Metrics{
33+
InterBlockCacheHits: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
34+
Namespace: namespace,
35+
Subsystem: MetricsSubsystem,
36+
Name: "hits",
37+
Help: "Cache hits of the inter block cache",
38+
}, labels).With(labelsAndValues...),
39+
InterBlockCacheMisses: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
40+
Namespace: namespace,
41+
Subsystem: MetricsSubsystem,
42+
Name: "misses",
43+
Help: "Cache misses of the inter block cache",
44+
}, labels).With(labelsAndValues...),
45+
InterBlockCacheEntries: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
46+
Namespace: namespace,
47+
Subsystem: MetricsSubsystem,
48+
Name: "entries",
49+
Help: "Cache entry count of the inter block cache",
50+
}, labels).With(labelsAndValues...),
51+
InterBlockCacheBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
52+
Namespace: namespace,
53+
Subsystem: MetricsSubsystem,
54+
Name: "bytes_size",
55+
Help: "Cache bytes size of the inter block cache",
56+
}, labels).With(labelsAndValues...),
57+
}
58+
}
59+
60+
// NopMetrics returns no-op Metrics.
61+
func NopMetrics() *Metrics {
62+
return &Metrics{
63+
InterBlockCacheHits: discard.NewCounter(),
64+
InterBlockCacheMisses: discard.NewCounter(),
65+
InterBlockCacheEntries: discard.NewGauge(),
66+
InterBlockCacheBytes: discard.NewGauge(),
67+
}
68+
}
69+
70+
type MetricsProvider func() *Metrics
71+
72+
// PrometheusMetricsProvider returns PrometheusMetrics for each store
73+
func PrometheusMetricsProvider(namespace string, labelsAndValues ...string) func() *Metrics {
74+
return func() *Metrics {
75+
return PrometheusMetrics(namespace, labelsAndValues...)
76+
}
77+
}
78+
79+
// NopMetricsProvider returns NopMetrics for each store
80+
func NopMetricsProvider() func() *Metrics {
81+
return func() *Metrics {
82+
return NopMetrics()
83+
}
84+
}

store/cache/metrics_test.go

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package cache
2+
3+
import (
4+
"testing"
5+
6+
"github.com/go-kit/kit/metrics/discard"
7+
"github.com/stretchr/testify/require"
8+
)
9+
10+
func TestPrometheusMetrics(t *testing.T) {
11+
metrics := PrometheusMetrics("test")
12+
require.NotEqual(t, metrics.InterBlockCacheHits, discard.NewCounter())
13+
require.NotEqual(t, metrics.InterBlockCacheMisses, discard.NewCounter())
14+
require.NotEqual(t, metrics.InterBlockCacheEntries, discard.NewGauge())
15+
require.NotEqual(t, metrics.InterBlockCacheBytes, discard.NewGauge())
16+
}

0 commit comments

Comments
 (0)