This repository has been archived by the owner on Dec 14, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgossip_checks.go
88 lines (74 loc) · 2.13 KB
/
gossip_checks.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package main
import (
"fmt"
"math"
"time"
)
func (client *esHTTPClient) getGossip(cs *checkSet) (*gossipResponse, error) {
check := cs.createCheck("collect_gossip")
defer cs.monitorCheck(time.Now(), check)
body, err := client.get("/gossip")
if err != nil {
check.fail(fmt.Sprintf("An error occured fetching gossip. %s", err))
return nil, err
}
r, err := toGossipResponse(body)
if err != nil {
check.fail(fmt.Sprintf("An error occured parsing gossip. %s", err))
return nil, err
}
return r, nil
}
func (cs *checkSet) doMasterCount(r *gossipResponse) {
check := cs.createCheck("alive_master")
defer cs.monitorCheck(time.Now(), check)
count := 0
for _, m := range r.Members {
if m.IsAliveMaster() {
count++
}
}
check.Data = count
check.Output = fmt.Sprintf("%d master node(s)", count)
if count != 1 {
check.fail(fmt.Sprintf("Expected 1 master. Found %d.", count))
}
}
func (cs *checkSet) doSlaveCount(r *gossipResponse) {
check := cs.createCheck("alive_slaves")
defer cs.monitorCheck(time.Now(), check)
count := 0
failLevel := int(math.Ceil(float64(config.ClusterSize)/2)) - 1
warnLevel := config.ClusterSize - 1
for _, m := range r.Members {
if m.State == "Slave" && m.IsAlive {
count++
}
}
check.Data = count
check.Output = fmt.Sprintf("%d slave node(s)", count)
if count < failLevel {
check.fail(fmt.Sprintf("Expected at least %d slave(s). Found %d.", failLevel, count))
} else if count < warnLevel {
check.warn(fmt.Sprintf("Want %d or more slave(s). Found %d.", warnLevel, count))
}
}
func (cs *checkSet) doAliveCount(r *gossipResponse) {
check := cs.createCheck("alive_nodes")
defer cs.monitorCheck(time.Now(), check)
count := 0
failLevel := int(math.Ceil(float64(config.ClusterSize) / 2))
warnLevel := config.ClusterSize
for _, m := range r.Members {
if m.IsAlive {
count++
}
}
check.Data = count
check.Output = fmt.Sprintf("%d alive node(s)", count)
if count < failLevel {
check.fail(fmt.Sprintf("Expected at least %d alive node(s). Found %d.", failLevel, count))
} else if count < warnLevel {
check.warn(fmt.Sprintf("Want %d or more alive node(s). Found %d.", warnLevel, count))
}
}