diff --git a/.changelog/4877.feature.md b/.changelog/4877.feature.md new file mode 100644 index 00000000000..2b4cadb18e3 --- /dev/null +++ b/.changelog/4877.feature.md @@ -0,0 +1,9 @@ +go/worker/keymanager: Add key manager worker metrics + +The following metrics were added: + +- oasis_worker_keymanager_compute_runtime_count + +- oasis_worker_keymanager_enclave_rpc_count + +- oasis_worker_keymanager_policy_update_count diff --git a/docs/oasis-node/metrics.md b/docs/oasis-node/metrics.md index 8234ade2ce9..74242915ca7 100644 --- a/docs/oasis-node/metrics.md +++ b/docs/oasis-node/metrics.md @@ -96,6 +96,9 @@ oasis_worker_executor_liveness_live_ratio | Gauge | Ratio between live and total oasis_worker_executor_liveness_live_rounds | Gauge | Number of live rounds in last epoch. | runtime | [worker/common/committee](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/common/committee/node.go) oasis_worker_executor_liveness_total_rounds | Gauge | Number of total rounds in last epoch. | runtime | [worker/common/committee](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/common/committee/node.go) oasis_worker_failed_round_count | Counter | Number of failed roothash rounds. | runtime | [worker/common/committee](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/common/committee/node.go) +oasis_worker_keymanager_compute_runtime_count | Counter | Number of compute runtimes using the key manager. | | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) +oasis_worker_keymanager_enclave_rpc_count | Counter | Number of remote Enclave RPC requests via P2P. | method | [worker/keymanager/p2p](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/p2p/metrics.go) +oasis_worker_keymanager_policy_update_count | Counter | Number of key manager policy updates. | | [worker/keymanager](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/keymanager/metrics.go) oasis_worker_node_registered | Gauge | Is oasis node registered (binary). | | [worker/registration](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/registration/worker.go) oasis_worker_node_registration_eligible | Gauge | Is oasis node eligible for registration (binary). | | [worker/registration](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/registration/worker.go) oasis_worker_node_status_frozen | Gauge | Is oasis node frozen (binary). | | [worker/registration](https://github.com/oasisprotocol/oasis-core/tree/master/go/worker/registration/worker.go) diff --git a/go/worker/keymanager/init.go b/go/worker/keymanager/init.go index c21191fc484..04f29d4ce7c 100644 --- a/go/worker/keymanager/init.go +++ b/go/worker/keymanager/init.go @@ -76,6 +76,8 @@ func New( return w, nil } + initMetrics() + for _, b64pk := range viper.GetStringSlice(CfgPrivatePeerPubKeys) { pkBytes, err := base64.StdEncoding.DecodeString(b64pk) if err != nil { diff --git a/go/worker/keymanager/metrics.go b/go/worker/keymanager/metrics.go new file mode 100644 index 00000000000..c2ee196a79e --- /dev/null +++ b/go/worker/keymanager/metrics.go @@ -0,0 +1,36 @@ +package keymanager + +import ( + "sync" + + "github.com/prometheus/client_golang/prometheus" +) + +var ( + computeRuntimeCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "oasis_worker_keymanager_compute_runtime_count", + Help: "Number of compute runtimes using the key manager.", + }, + ) + + policyUpdateCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "oasis_worker_keymanager_policy_update_count", + Help: "Number of key manager policy updates.", + }, + ) + + keymanagerWorkerCollectors = []prometheus.Collector{ + computeRuntimeCount, + policyUpdateCount, + } + + metricsOnce sync.Once +) + +func initMetrics() { + metricsOnce.Do(func() { + prometheus.MustRegister(keymanagerWorkerCollectors...) + }) +} diff --git a/go/worker/keymanager/p2p/metrics.go b/go/worker/keymanager/p2p/metrics.go new file mode 100644 index 00000000000..6d0e244ba68 --- /dev/null +++ b/go/worker/keymanager/p2p/metrics.go @@ -0,0 +1,29 @@ +package p2p + +import ( + "sync" + + "github.com/prometheus/client_golang/prometheus" +) + +var ( + enclaveRPCCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "oasis_worker_keymanager_enclave_rpc_count", + Help: "Number of remote Enclave RPC requests via P2P.", + }, + []string{"method"}, + ) + + keymanagerWorkerCollectors = []prometheus.Collector{ + enclaveRPCCount, + } + + metricsOnce sync.Once +) + +func initMetrics() { + metricsOnce.Do(func() { + prometheus.MustRegister(keymanagerWorkerCollectors...) + }) +} diff --git a/go/worker/keymanager/p2p/server.go b/go/worker/keymanager/p2p/server.go index e046c12476c..ecdb2a07dbe 100644 --- a/go/worker/keymanager/p2p/server.go +++ b/go/worker/keymanager/p2p/server.go @@ -3,6 +3,8 @@ package p2p import ( "context" + "github.com/prometheus/client_golang/prometheus" + "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" "github.com/oasisprotocol/oasis-core/go/worker/common/p2p/rpc" @@ -19,6 +21,8 @@ type service struct { } func (s *service) HandleRequest(ctx context.Context, method string, body cbor.RawMessage) (interface{}, error) { + enclaveRPCCount.With(prometheus.Labels{"method": method}).Inc() + switch method { case MethodCallEnclave: var rq CallEnclaveRequest @@ -44,5 +48,7 @@ func (s *service) handleCallEnclave(ctx context.Context, request *CallEnclaveReq // NewServer creates a new keymanager protocol server. func NewServer(runtimeID common.Namespace, km KeyManager) rpc.Server { + initMetrics() + return rpc.NewServer(runtimeID, KeyManagerProtocolID, KeyManagerProtocolVersion, &service{km}) } diff --git a/go/worker/keymanager/worker.go b/go/worker/keymanager/worker.go index c784802dc7d..16c69d26cba 100644 --- a/go/worker/keymanager/worker.go +++ b/go/worker/keymanager/worker.go @@ -331,6 +331,8 @@ func (w *Worker) updateStatus(status *api.Status, runtimeStatus *runtimeStatus) w.initTicker = nil } + policyUpdateCount.Inc() + // Register as we are now ready to handle requests. initOk = true w.roleProvider.SetAvailableWithCallback(func(n *node.Node) error { @@ -449,6 +451,8 @@ func (w *Worker) startClientRuntimeWatcher(rt *registry.Runtime, status *api.Sta w.clientRuntimes[rt.ID] = crw + computeRuntimeCount.Inc() + return nil }