diff --git a/plugin/storage/badger/factory.go b/plugin/storage/badger/factory.go index 758a94404af..3bd5e29927b 100644 --- a/plugin/storage/badger/factory.go +++ b/plugin/storage/badger/factory.go @@ -15,12 +15,15 @@ package badger import ( + "expvar" "flag" "io/ioutil" "os" + "strings" "time" "github.com/dgraph-io/badger" + "github.com/dgraph-io/badger/options" "github.com/spf13/viper" "github.com/uber/jaeger-lib/metrics" "go.uber.org/zap" @@ -58,6 +61,9 @@ type Factory struct { LastMaintenanceRun metrics.Gauge // LastValueLogCleaned stores the timestamp (UnixNano) of the previous ValueLogGC run LastValueLogCleaned metrics.Gauge + + // Expose badger's internal expvar metrics, which are all gauge's at this point + badgerMetrics map[string]metrics.Gauge } } @@ -84,6 +90,7 @@ func (f *Factory) Initialize(metricsFactory metrics.Factory, logger *zap.Logger) f.logger = logger opts := badger.DefaultOptions + opts.TableLoadingMode = options.MemoryMap if f.Options.primary.Ephemeral { opts.SyncWrites = false @@ -118,7 +125,10 @@ func (f *Factory) Initialize(metricsFactory metrics.Factory, logger *zap.Logger) f.metrics.LastMaintenanceRun = metricsFactory.Gauge(metrics.Options{Name: lastMaintenanceRunName}) f.metrics.LastValueLogCleaned = metricsFactory.Gauge(metrics.Options{Name: lastValueLogCleanedName}) + f.registerBadgerExpvarMetrics(metricsFactory) + go f.maintenance() + go f.metricsCopier() logger.Info("Badger storage configuration", zap.Any("configuration", opts)) @@ -150,8 +160,7 @@ func (f *Factory) CreateDependencyReader() (dependencystore.Reader, error) { // Close Implements io.Closer and closes the underlying storage func (f *Factory) Close() error { - f.maintenanceDone <- true - + close(f.maintenanceDone) err := f.store.Close() // Remove tmp files if this was ephemeral storage @@ -175,6 +184,7 @@ func (f *Factory) maintenance() { return case t := <-maintenanceTicker.C: var err error + // After there's nothing to clean, the err is raised for err == nil { err = f.store.RunValueLogGC(0.5) // 0.5 is selected to rewrite a file if half of it can be discarded @@ -190,3 +200,56 @@ func (f *Factory) maintenance() { } } } + +func (f *Factory) metricsCopier() { + metricsTicker := time.NewTicker(f.Options.primary.MetricsUpdateInterval) + defer metricsTicker.Stop() + for { + select { + case <-f.maintenanceDone: + return + case <-metricsTicker.C: + expvar.Do(func(kv expvar.KeyValue) { + if strings.HasPrefix(kv.Key, "badger") { + if intVal, ok := kv.Value.(*expvar.Int); ok { + if g, found := f.metrics.badgerMetrics[kv.Key]; found { + g.Update(intVal.Value()) + } + } else if mapVal, ok := kv.Value.(*expvar.Map); ok { + mapVal.Do(func(innerKv expvar.KeyValue) { + // The metrics we're interested in have only a single inner key (dynamic name) + // and we're only interested in its value + if intVal, ok := innerKv.Value.(*expvar.Int); ok { + if g, found := f.metrics.badgerMetrics[kv.Key]; found { + g.Update(intVal.Value()) + } + } + }) + } + } + }) + } + } +} + +func (f *Factory) registerBadgerExpvarMetrics(metricsFactory metrics.Factory) { + f.metrics.badgerMetrics = make(map[string]metrics.Gauge) + + expvar.Do(func(kv expvar.KeyValue) { + if strings.HasPrefix(kv.Key, "badger") { + if _, ok := kv.Value.(*expvar.Int); ok { + g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) + f.metrics.badgerMetrics[kv.Key] = g + } else if mapVal, ok := kv.Value.(*expvar.Map); ok { + mapVal.Do(func(innerKv expvar.KeyValue) { + // The metrics we're interested in have only a single inner key (dynamic name) + // and we're only interested in its value + if _, ok = innerKv.Value.(*expvar.Int); ok { + g := metricsFactory.Gauge(metrics.Options{Name: kv.Key}) + f.metrics.badgerMetrics[kv.Key] = g + } + }) + } + } + }) +} diff --git a/plugin/storage/badger/factory_test.go b/plugin/storage/badger/factory_test.go index 4261292505e..1917d017624 100644 --- a/plugin/storage/badger/factory_test.go +++ b/plugin/storage/badger/factory_test.go @@ -141,6 +141,49 @@ func TestMaintenanceCodecov(t *testing.T) { } } - _ = f.store.Close() + err := f.store.Close() + assert.NoError(t, err) waiter() // This should trigger the logging of error } + +func TestBadgerMetrics(t *testing.T) { + // The expvar is leaking keyparams between tests. We need to clean up a bit.. + eMap := expvar.Get("badger_lsm_size_bytes").(*expvar.Map) + eMap.Init() + + f := NewFactory() + v, command := config.Viperize(f.AddFlags) + command.ParseFlags([]string{ + "--badger.metrics-update-interval=10ms", + }) + f.InitFromViper(v) + mFactory := metricstest.NewFactory(0) + f.Initialize(mFactory, zap.NewNop()) + assert.NotNil(t, f.metrics.badgerMetrics) + _, found := f.metrics.badgerMetrics["badger_memtable_gets_total"] + assert.True(t, found) + + waiter := func(previousValue int64) int64 { + sleeps := 0 + _, gs := mFactory.Snapshot() + for gs["badger_memtable_gets_total"] == previousValue && sleeps < 8 { + // Wait for the scheduler + time.Sleep(time.Duration(50) * time.Millisecond) + sleeps++ + _, gs = mFactory.Snapshot() + } + assert.True(t, gs["badger_memtable_gets_total"] > previousValue) + return gs["badger_memtable_gets_total"] + } + + vlogSize := waiter(0) + _, gs := mFactory.Snapshot() + assert.True(t, vlogSize > 0) + assert.True(t, gs["badger_memtable_gets_total"] > 0) // IntVal metric + + _, found = gs["badger_lsm_size_bytes"] // Map metric + assert.True(t, found) + + err := f.Close() + assert.NoError(t, err) +} diff --git a/plugin/storage/badger/options.go b/plugin/storage/badger/options.go index db3c957ca36..8775578c431 100644 --- a/plugin/storage/badger/options.go +++ b/plugin/storage/badger/options.go @@ -31,18 +31,20 @@ type Options struct { // NamespaceConfig is badger's internal configuration data type NamespaceConfig struct { - namespace string - SpanStoreTTL time.Duration - ValueDirectory string - KeyDirectory string - Ephemeral bool // Setting this to true will ignore ValueDirectory and KeyDirectory - SyncWrites bool - MaintenanceInterval time.Duration + namespace string + SpanStoreTTL time.Duration + ValueDirectory string + KeyDirectory string + Ephemeral bool // Setting this to true will ignore ValueDirectory and KeyDirectory + SyncWrites bool + MaintenanceInterval time.Duration + MetricsUpdateInterval time.Duration } const ( - defaultMaintenanceInterval time.Duration = 5 * time.Minute - defaultTTL time.Duration = time.Hour * 72 + defaultMaintenanceInterval time.Duration = 5 * time.Minute + defaultMetricsUpdateInterval time.Duration = 10 * time.Second + defaultTTL time.Duration = time.Hour * 72 ) const ( @@ -52,6 +54,7 @@ const ( suffixSpanstoreTTL = ".span-store-ttl" suffixSyncWrite = ".consistency" suffixMaintenanceInterval = ".maintenance-interval" + suffixMetricsInterval = ".metrics-update-interval" // Intended only for testing purposes defaultDataDir = string(os.PathSeparator) + "data" defaultValueDir = defaultDataDir + string(os.PathSeparator) + "values" defaultKeysDir = defaultDataDir + string(os.PathSeparator) + "keys" @@ -64,13 +67,14 @@ func NewOptions(primaryNamespace string, otherNamespaces ...string) *Options { options := &Options{ primary: &NamespaceConfig{ - namespace: primaryNamespace, - SpanStoreTTL: defaultTTL, - SyncWrites: false, // Performance over durability - Ephemeral: true, // Default is ephemeral storage - ValueDirectory: defaultBadgerDataDir + defaultValueDir, - KeyDirectory: defaultBadgerDataDir + defaultKeysDir, - MaintenanceInterval: defaultMaintenanceInterval, + namespace: primaryNamespace, + SpanStoreTTL: defaultTTL, + SyncWrites: false, // Performance over durability + Ephemeral: true, // Default is ephemeral storage + ValueDirectory: defaultBadgerDataDir + defaultValueDir, + KeyDirectory: defaultBadgerDataDir + defaultKeysDir, + MaintenanceInterval: defaultMaintenanceInterval, + MetricsUpdateInterval: defaultMetricsUpdateInterval, }, } @@ -112,13 +116,18 @@ func addFlags(flagSet *flag.FlagSet, nsConfig *NamespaceConfig) { flagSet.Bool( nsConfig.namespace+suffixSyncWrite, nsConfig.SyncWrites, - "If all writes should be synced immediately. This will greatly reduce write performance.", + "If all writes should be synced immediately. This can impact write performance.", ) flagSet.Duration( nsConfig.namespace+suffixMaintenanceInterval, nsConfig.MaintenanceInterval, "How often the maintenance thread for values is ran. Format is time.Duration (https://golang.org/pkg/time/#Duration)", ) + flagSet.Duration( + nsConfig.namespace+suffixMetricsInterval, + nsConfig.MetricsUpdateInterval, + "How often the badger metrics are collected by Jaeger. Format is time.Duration (https://golang.org/pkg/time/#Duration)", + ) } // InitFromViper initializes Options with properties from viper @@ -133,6 +142,7 @@ func initFromViper(cfg *NamespaceConfig, v *viper.Viper) { cfg.SyncWrites = v.GetBool(cfg.namespace + suffixSyncWrite) cfg.SpanStoreTTL = v.GetDuration(cfg.namespace + suffixSpanstoreTTL) cfg.MaintenanceInterval = v.GetDuration(cfg.namespace + suffixMaintenanceInterval) + cfg.MetricsUpdateInterval = v.GetDuration(cfg.namespace + suffixMetricsInterval) } // GetPrimary returns the primary namespace configuration