Skip to content

Commit

Permalink
feat: add metrics for trainer (#2293)
Browse files Browse the repository at this point in the history
Signed-off-by: MIchaelFU0403 <[email protected]>
  • Loading branch information
MIchaelFU0403 authored Apr 25, 2023
1 parent 4ab2b70 commit d657e1a
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 0 deletions.
6 changes: 6 additions & 0 deletions pkg/types/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ const (

// DfstoreName is dfstore name of dfdaemon.
DfstoreName = "dfstore"

// TrainerName is name of trainer.
TrainerName = "trainer"
)

const (
Expand All @@ -51,6 +54,9 @@ const (

// DfdaemonMetricsName is name of dfdaemon metrics.
DfdaemonMetricsName = "dfdaemon"

// TrainerMetricsName is name of trainer metrics.
TrainerMetricsName = "trainer"
)

const (
Expand Down
68 changes: 68 additions & 0 deletions trainer/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright 2020 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package metrics

import (
"net/http"

grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"google.golang.org/grpc"

"d7y.io/dragonfly/v2/pkg/types"
"d7y.io/dragonfly/v2/trainer/config"
"d7y.io/dragonfly/v2/version"
)

// Variables declared for metrics.
var (
TrainCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.TrainerMetricsName,
Name: "training_total",
Help: "Counter of the number of the training.",
})

TrainFailureCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.TrainerMetricsName,
Name: "training_failure_total",
Help: "Counter of the number of failed of the training.",
})

VersionGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.TrainerMetricsName,
Name: "version",
Help: "Version info of the service.",
}, []string{"major", "minor", "git_version", "git_commit", "platform", "build_time", "go_version", "go_tags", "go_gcflags"})
)

func New(cfg *config.MetricsConfig, svr *grpc.Server) *http.Server {
grpc_prometheus.Register(svr)

mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())

VersionGauge.WithLabelValues(version.Major, version.Minor, version.GitVersion, version.GitCommit, version.Platform, version.BuildTime, version.GoVersion, version.Gotags, version.Gogcflags).Set(1)
return &http.Server{
Addr: cfg.Addr,
Handler: mux,
}
}
42 changes: 42 additions & 0 deletions trainer/metrics/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright 2023 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package metrics

import (
"net/http"
"testing"

"google.golang.org/grpc"

"d7y.io/dragonfly/v2/trainer/config"
)

func TestNew(t *testing.T) {
cfg := &config.MetricsConfig{
Addr: "localhost:8080",
}
svr := grpc.NewServer()
server := New(cfg, svr)

if server.Addr != cfg.Addr {
t.Errorf("expected server.Addr to be %s, but got %s", cfg.Addr, server.Addr)
}

if _, ok := server.Handler.(*http.ServeMux); !ok {
t.Errorf("expected server.Handler to be a *http.ServeMux, but got %T", server.Handler)
}
}

0 comments on commit d657e1a

Please sign in to comment.