Skip to content

Commit

Permalink
cluster/api: improve metrics and cluster status
Browse files Browse the repository at this point in the history
  • Loading branch information
fabxc committed Feb 9, 2018
1 parent 247bfff commit 3f2e00f
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 47 deletions.
56 changes: 20 additions & 36 deletions api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
api.mtx.RLock()

var status = struct {
ConfigYAML string `json:"configYAML"`
ConfigJSON *config.Config `json:"configJSON"`
VersionInfo map[string]string `json:"versionInfo"`
Uptime time.Time `json:"uptime"`
MeshStatus *meshStatus `json:"meshStatus"`
ConfigYAML string `json:"configYAML"`
ConfigJSON *config.Config `json:"configJSON"`
VersionInfo map[string]string `json:"versionInfo"`
Uptime time.Time `json:"uptime"`
ClusterStatus *clusterStatus `json:"clusterStatus"`
}{
ConfigYAML: api.config.String(),
ConfigJSON: api.config,
Expand All @@ -205,54 +205,38 @@ func (api *API) status(w http.ResponseWriter, req *http.Request) {
"buildDate": version.BuildDate,
"goVersion": version.GoVersion,
},
Uptime: api.uptime,
MeshStatus: getMeshStatus(api),
Uptime: api.uptime,
ClusterStatus: getClusterStatus(api.peer),
}

api.mtx.RUnlock()

api.respond(w, status)
}

type meshStatus struct {
Name string `json:"name"`
NickName string `json:"nickName"`
Peers []peerStatus `json:"peers"`
Connections []connectionStatus `json:"connections"`
}

type peerStatus struct {
Name string `json:"name"` // e.g. "00:00:00:00:00:01"
NickName string `json:"nickName"` // e.g. "a"
UID uint64 `json:"uid"` // e.g. "14015114173033265000"
Name string `json:"name"`
Address string `json:"address"`
}

type connectionStatus struct {
Address string `json:"address"`
Outbound bool `json:"outbound"`
State string `json:"state"`
Info string `json:"info"`
type clusterStatus struct {
Name string `json:"name"`
Peers []peerStatus `json:"peers"`
}

func getMeshStatus(api *API) *meshStatus {
if api.peer == nil {
func getClusterStatus(p *cluster.Peer) *clusterStatus {
if p == nil {
return nil
}
s := &clusterStatus{Name: p.Name()}

strippedStatus := &meshStatus{
Name: api.peer.Name(),
NickName: "",
}

for _, p := range api.peer.Peers() {
strippedStatus.Peers = append(strippedStatus.Peers, peerStatus{
Name: p.Name,
NickName: "",
UID: 0,
for _, n := range p.Peers() {
s.Peers = append(s.Peers, peerStatus{
Name: n.Name,
Address: n.Address(),
})
}

return strippedStatus
return s
}

func (api *API) alertGroups(w http.ResponseWriter, r *http.Request) {
Expand Down
32 changes: 21 additions & 11 deletions cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,32 +228,38 @@ type delegate struct {
logger log.Logger
bcast *memberlist.TransmitLimitedQueue

gossipMsgsReceived prometheus.Counter
messagesReceived *prometheus.CounterVec
messagesReceivedSize *prometheus.CounterVec
}

func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer) *delegate {
bcast := &memberlist.TransmitLimitedQueue{
NumNodes: p.ClusterSize,
RetransmitMult: 3,
}
gossipMsgsReceived := prometheus.NewCounter(prometheus.CounterOpts{
Name: "alertmanager_gossip_messages_received_total",
Help: "Total gossip NotifyMsg calls.",
})
messagesReceived := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "alertmanager_cluster_messages_received_total",
Help: "Total number of cluster messsages received.",
}, []string{"msg_type"})
messagesReceivedSize := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "alertmanager_cluster_messages_received_size_total",
Help: "Total size of cluster messages received.",
}, []string{"msg_type"})
gossipClusterMembers := prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Name: "alertmanager_cluster_members",
Help: "Number indicating current number of members in cluster.",
}, func() float64 {
return float64(p.ClusterSize())
})

reg.MustRegister(gossipMsgsReceived, gossipClusterMembers)
reg.MustRegister(messagesReceived, messagesReceivedSize, gossipClusterMembers)

return &delegate{
logger: l,
Peer: p,
bcast: bcast,
gossipMsgsReceived: gossipMsgsReceived,
logger: l,
Peer: p,
bcast: bcast,
messagesReceived: messagesReceived,
messagesReceivedSize: messagesReceivedSize,
}
}

Expand All @@ -264,7 +270,8 @@ func (d *delegate) NodeMeta(limit int) []byte {

// NotifyMsg is the callback invoked when a user-level gossip message is received.
func (d *delegate) NotifyMsg(b []byte) {
d.gossipMsgsReceived.Inc()
d.messagesReceived.WithLabelValues("update").Inc()
d.messagesReceivedSize.WithLabelValues("update").Add(float64(len(b)))

var p clusterpb.Part
if err := proto.Unmarshal(b, &p); err != nil {
Expand Down Expand Up @@ -308,6 +315,9 @@ func (d *delegate) LocalState(_ bool) []byte {
}

func (d *delegate) MergeRemoteState(buf []byte, _ bool) {
d.messagesReceived.WithLabelValues("full_state").Inc()
d.messagesReceivedSize.WithLabelValues("full_state").Add(float64(len(buf)))

var fs clusterpb.FullState
if err := proto.Unmarshal(buf, &fs); err != nil {
level.Warn(d.logger).Log("msg", "merge remote state", "err", err)
Expand Down

0 comments on commit 3f2e00f

Please sign in to comment.