Skip to content

Commit

Permalink
Backport of Adds support for failures_before_warning and adds failues…
Browse files Browse the repository at this point in the history
…_before options on Nomad client service

Co-authored-by: Mike Nomitch <[email protected]>
  • Loading branch information
hc-github-team-nomad-core and mikenomitch authored Dec 14, 2023
1 parent 0cb5754 commit 13850b1
Show file tree
Hide file tree
Showing 19 changed files with 362 additions and 142 deletions.
7 changes: 7 additions & 0 deletions .changelog/19336.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
```release-note:improvement
consul: Added support for failures_before_warning and failures_before_critical in Nomad agent services
```

```release-note:improvement
consul: Added support for failures_before_warning in Consul service checks
```
5 changes: 5 additions & 0 deletions api/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ type ServiceCheck struct {
TaskName string `mapstructure:"task" hcl:"task,optional"`
SuccessBeforePassing int `mapstructure:"success_before_passing" hcl:"success_before_passing,optional"`
FailuresBeforeCritical int `mapstructure:"failures_before_critical" hcl:"failures_before_critical,optional"`
FailuresBeforeWarning int `mapstructure:"failures_before_warning" hcl:"failures_before_warning,optional"`
Body string `hcl:"body,optional"`
OnUpdate string `mapstructure:"on_update" hcl:"on_update,optional"`
}
Expand Down Expand Up @@ -320,6 +321,10 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) {
s.Checks[i].FailuresBeforeCritical = 0
}

if s.Checks[i].FailuresBeforeWarning < 0 {
s.Checks[i].FailuresBeforeWarning = 0
}

// Inhert Service
if s.Checks[i].OnUpdate == "" {
s.Checks[i].OnUpdate = s.OnUpdate
Expand Down
4 changes: 4 additions & 0 deletions api/services_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,25 +75,29 @@ func TestService_Check_PassFail(t *testing.T) {
Checks: []ServiceCheck{{
SuccessBeforePassing: -1,
FailuresBeforeCritical: -2,
FailuresBeforeWarning: -3,
}},
}

s.Canonicalize(task, tg, job)
must.Zero(t, s.Checks[0].SuccessBeforePassing)
must.Zero(t, s.Checks[0].FailuresBeforeCritical)
must.Zero(t, s.Checks[0].FailuresBeforeWarning)
})

t.Run("normal", func(t *testing.T) {
s := &Service{
Checks: []ServiceCheck{{
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
}},
}

s.Canonicalize(task, tg, job)
must.Eq(t, 3, s.Checks[0].SuccessBeforePassing)
must.Eq(t, 4, s.Checks[0].FailuresBeforeCritical)
must.Eq(t, 2, s.Checks[0].FailuresBeforeWarning)
})
}

Expand Down
21 changes: 14 additions & 7 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1172,19 +1172,25 @@ func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
httpCheckAddr = a.config.AdvertiseAddrs.HTTP
}
check := structs.ServiceCheck{
Name: defaultConsul.ClientHTTPCheckName,
Type: "http",
Path: "/v1/agent/health?type=client",
Protocol: "http",
Interval: agentHttpCheckInterval,
Timeout: agentHttpCheckTimeout,
PortLabel: httpCheckAddr,
Name: defaultConsul.ClientHTTPCheckName,
Type: "http",
Path: "/v1/agent/health?type=client",
Protocol: "http",
Interval: agentHttpCheckInterval,
Timeout: agentHttpCheckTimeout,
PortLabel: httpCheckAddr,
FailuresBeforeWarning: defaultConsul.ClientFailuresBeforeWarning,
FailuresBeforeCritical: defaultConsul.ClientFailuresBeforeCritical,
}
// Switch to endpoint that doesn't require a leader for servers
// and overwrite failures before x values
if server {
check.Name = defaultConsul.ServerHTTPCheckName
check.Path = "/v1/agent/health?type=server"
check.FailuresBeforeCritical = defaultConsul.ServerFailuresBeforeCritical
check.FailuresBeforeWarning = defaultConsul.ServerFailuresBeforeWarning
}

if !a.config.TLSConfig.EnableHTTP {
// No HTTPS, return a plain http check
return &check
Expand All @@ -1197,6 +1203,7 @@ func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
// HTTPS enabled; skip verification
check.Protocol = "https"
check.TLSSkipVerify = true

return &check
}

Expand Down
23 changes: 21 additions & 2 deletions command/agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -775,8 +775,10 @@ func TestAgent_HTTPCheck(t *testing.T) {
AdvertiseAddrs: &AdvertiseAddrs{HTTP: "advertise:4646"},
normalizedAddrs: &NormalizedAddrs{HTTP: []string{"normalized:4646"}},
Consuls: []*config.ConsulConfig{{
Name: "default",
ChecksUseAdvertise: pointer.Of(false),
Name: "default",
ChecksUseAdvertise: pointer.Of(false),
ClientFailuresBeforeCritical: 2,
ClientFailuresBeforeWarning: 1,
}},
TLSConfig: &config.TLSConfig{EnableHTTP: false},
},
Expand All @@ -801,6 +803,12 @@ func TestAgent_HTTPCheck(t *testing.T) {
if expected := a.config.normalizedAddrs.HTTP[0]; check.PortLabel != expected {
t.Errorf("expected normalized addr not %q", check.PortLabel)
}
if expected := 2; check.FailuresBeforeCritical != expected {
t.Errorf("expected failured before critical count not: %q", expected)
}
if expected := 1; check.FailuresBeforeWarning != expected {
t.Errorf("expected failured before warning count not: %q", expected)
}
})

t.Run("Plain HTTP + ChecksUseAdvertise", func(t *testing.T) {
Expand Down Expand Up @@ -851,6 +859,10 @@ func TestAgent_HTTPCheckPath(t *testing.T) {
config: DevConfig(nil),
logger: testlog.HCLogger(t),
}
// setting to ensure this does not get set for the server
a.config.Consuls[0].ServerFailuresBeforeCritical = 4
a.config.Consuls[0].ServerFailuresBeforeWarning = 3

if err := a.config.normalizeAddrs(); err != nil {
t.Fatalf("error normalizing config: %v", err)
}
Expand All @@ -864,6 +876,13 @@ func TestAgent_HTTPCheckPath(t *testing.T) {
if expected := "/v1/agent/health?type=server"; check.Path != expected {
t.Errorf("expected server check path to be %q but found %q", expected, check.Path)
}
// ensure server failures before critical and warning are set
if expected := 4; check.FailuresBeforeCritical != expected {
t.Errorf("expected failured before critical count not: %q", expected)
}
if expected := 3; check.FailuresBeforeWarning != expected {
t.Errorf("expected failured before warning count not: %q", expected)
}

// Assert client check uses /v1/agent/health?type=client
isServer = false
Expand Down
138 changes: 81 additions & 57 deletions command/agent/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,14 @@ func (c *Command) readConfig() *Config {
}), "consul-client-auto-join", "")
flags.StringVar(&defaultConsul.ClientServiceName, "consul-client-service-name", "", "")
flags.StringVar(&defaultConsul.ClientHTTPCheckName, "consul-client-http-check-name", "", "")
flags.IntVar(&defaultConsul.ClientFailuresBeforeCritical, "consul-client-failures-before-critical", 0, "")
flags.IntVar(&defaultConsul.ClientFailuresBeforeWarning, "consul-client-failures-before-warning", 0, "")
flags.StringVar(&defaultConsul.ServerServiceName, "consul-server-service-name", "", "")
flags.StringVar(&defaultConsul.ServerHTTPCheckName, "consul-server-http-check-name", "", "")
flags.StringVar(&defaultConsul.ServerSerfCheckName, "consul-server-serf-check-name", "", "")
flags.StringVar(&defaultConsul.ServerRPCCheckName, "consul-server-rpc-check-name", "", "")
flags.IntVar(&defaultConsul.ServerFailuresBeforeCritical, "consul-server-failures-before-critical", 0, "")
flags.IntVar(&defaultConsul.ServerFailuresBeforeWarning, "consul-server-failures-before-warning", 0, "")
flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
defaultConsul.ServerAutoJoin = &b
return nil
Expand Down Expand Up @@ -689,63 +693,67 @@ func (c *Command) AutocompleteFlags() complete.Flags {
complete.PredictFiles("*.hcl"))

return map[string]complete.Predictor{
"-dev": complete.PredictNothing,
"-dev-connect": complete.PredictNothing,
"-server": complete.PredictNothing,
"-client": complete.PredictNothing,
"-bootstrap-expect": complete.PredictAnything,
"-encrypt": complete.PredictAnything,
"-raft-protocol": complete.PredictAnything,
"-rejoin": complete.PredictNothing,
"-join": complete.PredictAnything,
"-retry-join": complete.PredictAnything,
"-retry-max": complete.PredictAnything,
"-state-dir": complete.PredictDirs("*"),
"-alloc-dir": complete.PredictDirs("*"),
"-node-class": complete.PredictAnything,
"-node-pool": complete.PredictAnything,
"-servers": complete.PredictAnything,
"-meta": complete.PredictAnything,
"-config": configFilePredictor,
"-bind": complete.PredictAnything,
"-region": complete.PredictAnything,
"-data-dir": complete.PredictDirs("*"),
"-plugin-dir": complete.PredictDirs("*"),
"-dc": complete.PredictAnything,
"-log-level": complete.PredictAnything,
"-json-logs": complete.PredictNothing,
"-node": complete.PredictAnything,
"-consul-auth": complete.PredictAnything,
"-consul-auto-advertise": complete.PredictNothing,
"-consul-ca-file": complete.PredictAnything,
"-consul-cert-file": complete.PredictAnything,
"-consul-key-file": complete.PredictAnything,
"-consul-checks-use-advertise": complete.PredictNothing,
"-consul-client-auto-join": complete.PredictNothing,
"-consul-client-service-name": complete.PredictAnything,
"-consul-client-http-check-name": complete.PredictAnything,
"-consul-server-service-name": complete.PredictAnything,
"-consul-server-http-check-name": complete.PredictAnything,
"-consul-server-serf-check-name": complete.PredictAnything,
"-consul-server-rpc-check-name": complete.PredictAnything,
"-consul-server-auto-join": complete.PredictNothing,
"-consul-ssl": complete.PredictNothing,
"-consul-verify-ssl": complete.PredictNothing,
"-consul-address": complete.PredictAnything,
"-consul-token": complete.PredictAnything,
"-vault-enabled": complete.PredictNothing,
"-vault-allow-unauthenticated": complete.PredictNothing,
"-vault-token": complete.PredictAnything,
"-vault-address": complete.PredictAnything,
"-vault-create-from-role": complete.PredictAnything,
"-vault-ca-file": complete.PredictAnything,
"-vault-ca-path": complete.PredictAnything,
"-vault-cert-file": complete.PredictAnything,
"-vault-key-file": complete.PredictAnything,
"-vault-tls-skip-verify": complete.PredictNothing,
"-vault-tls-server-name": complete.PredictAnything,
"-acl-enabled": complete.PredictNothing,
"-acl-replication-token": complete.PredictAnything,
"-dev": complete.PredictNothing,
"-dev-connect": complete.PredictNothing,
"-server": complete.PredictNothing,
"-client": complete.PredictNothing,
"-bootstrap-expect": complete.PredictAnything,
"-encrypt": complete.PredictAnything,
"-raft-protocol": complete.PredictAnything,
"-rejoin": complete.PredictNothing,
"-join": complete.PredictAnything,
"-retry-join": complete.PredictAnything,
"-retry-max": complete.PredictAnything,
"-state-dir": complete.PredictDirs("*"),
"-alloc-dir": complete.PredictDirs("*"),
"-node-class": complete.PredictAnything,
"-node-pool": complete.PredictAnything,
"-servers": complete.PredictAnything,
"-meta": complete.PredictAnything,
"-config": configFilePredictor,
"-bind": complete.PredictAnything,
"-region": complete.PredictAnything,
"-data-dir": complete.PredictDirs("*"),
"-plugin-dir": complete.PredictDirs("*"),
"-dc": complete.PredictAnything,
"-log-level": complete.PredictAnything,
"-json-logs": complete.PredictNothing,
"-node": complete.PredictAnything,
"-consul-auth": complete.PredictAnything,
"-consul-auto-advertise": complete.PredictNothing,
"-consul-ca-file": complete.PredictAnything,
"-consul-cert-file": complete.PredictAnything,
"-consul-key-file": complete.PredictAnything,
"-consul-checks-use-advertise": complete.PredictNothing,
"-consul-client-auto-join": complete.PredictNothing,
"-consul-client-service-name": complete.PredictAnything,
"-consul-client-failures-before-critical": complete.PredictAnything,
"-consul-client-failures-before-warning": complete.PredictAnything,
"-consul-client-http-check-name": complete.PredictAnything,
"-consul-server-service-name": complete.PredictAnything,
"-consul-server-http-check-name": complete.PredictAnything,
"-consul-server-serf-check-name": complete.PredictAnything,
"-consul-server-rpc-check-name": complete.PredictAnything,
"-consul-server-auto-join": complete.PredictNothing,
"-consul-server-failures-before-critical": complete.PredictAnything,
"-consul-server-failures-before-warning": complete.PredictAnything,
"-consul-ssl": complete.PredictNothing,
"-consul-verify-ssl": complete.PredictNothing,
"-consul-address": complete.PredictAnything,
"-consul-token": complete.PredictAnything,
"-vault-enabled": complete.PredictNothing,
"-vault-allow-unauthenticated": complete.PredictNothing,
"-vault-token": complete.PredictAnything,
"-vault-address": complete.PredictAnything,
"-vault-create-from-role": complete.PredictAnything,
"-vault-ca-file": complete.PredictAnything,
"-vault-ca-path": complete.PredictAnything,
"-vault-cert-file": complete.PredictAnything,
"-vault-key-file": complete.PredictAnything,
"-vault-tls-skip-verify": complete.PredictNothing,
"-vault-tls-server-name": complete.PredictAnything,
"-acl-enabled": complete.PredictNothing,
"-acl-replication-token": complete.PredictAnything,
}
}

Expand Down Expand Up @@ -1564,6 +1572,14 @@ Consul Options:
-consul-client-http-check-name=<name>
Specifies the HTTP health check name in Consul for the Nomad clients.
-consul-client-failures-before-critical
Specifies the number of consecutive failures before the Nomad client
Consul health check is critical. Defaults to 0.
-consul-client-failures-before-warning
Specifies the number of consecutive failures before the Nomad client
Consul health check shows a warning. Defaults to 0.
-consul-key-file=<path>
Specifies the path to the private key used for Consul communication. If this
is set then you need to also set cert_file.
Expand All @@ -1586,6 +1602,14 @@ Consul Options:
server_service_name option. This search only happens if the server does not
have a leader.
-consul-server-failures-before-critical
Specifies the number of consecutive failures before the Nomad server
Consul health check is critical. Defaults to 0.
-consul-server-failures-before-warning
Specifies the number of consecutive failures before the Nomad server
Consul health check shows a warning. Defaults to 0.
-consul-ssl
Specifies if the transport scheme should use HTTPS to communicate with the
Consul agent.
Expand Down
2 changes: 2 additions & 0 deletions command/agent/consul/service_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,7 @@ func apiCheckRegistrationToCheck(r *api.AgentCheckRegistration) *api.AgentServic
GRPCUseTLS: r.GRPCUseTLS,
SuccessBeforePassing: r.SuccessBeforePassing,
FailuresBeforeCritical: r.FailuresBeforeCritical,
FailuresBeforeWarning: r.FailuresBeforeWarning,
}
}

Expand Down Expand Up @@ -1969,6 +1970,7 @@ func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host
chkReg.Interval = check.Interval.String()
chkReg.SuccessBeforePassing = check.SuccessBeforePassing
chkReg.FailuresBeforeCritical = check.FailuresBeforeCritical
chkReg.FailuresBeforeWarning = check.FailuresBeforeWarning

// Require an address for http or tcp checks
if port == 0 && check.RequiresPort() {
Expand Down
1 change: 1 addition & 0 deletions command/agent/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1608,6 +1608,7 @@ func ApiServicesToStructs(in []*api.Service, group bool) []*structs.Service {
GRPCUseTLS: check.GRPCUseTLS,
SuccessBeforePassing: check.SuccessBeforePassing,
FailuresBeforeCritical: check.FailuresBeforeCritical,
FailuresBeforeWarning: check.FailuresBeforeWarning,
OnUpdate: onUpdate,
}

Expand Down
4 changes: 4 additions & 0 deletions command/agent/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2736,6 +2736,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
TaskName: "task1",
SuccessBeforePassing: 2,
FailuresBeforeCritical: 3,
FailuresBeforeWarning: 2,
},
},
Connect: &api.ConsulConnect{
Expand Down Expand Up @@ -2836,6 +2837,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
InitialStatus: "ok",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
CheckRestart: &api.CheckRestart{
Limit: 3,
IgnoreWarnings: true,
Expand Down Expand Up @@ -3167,6 +3169,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
OnUpdate: structs.OnUpdateRequireHealthy,
SuccessBeforePassing: 2,
FailuresBeforeCritical: 3,
FailuresBeforeWarning: 2,
},
},
Connect: &structs.ConsulConnect{
Expand Down Expand Up @@ -3267,6 +3270,7 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
GRPCUseTLS: true,
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
CheckRestart: &structs.CheckRestart{
Limit: 3,
Grace: 11 * time.Second,
Expand Down
1 change: 1 addition & 0 deletions jobspec/parse_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ func parseChecks(service *api.Service, checkObjs *ast.ObjectList) error {
"task",
"success_before_passing",
"failures_before_critical",
"failures_before_warning",
"on_update",
"body",
}
Expand Down
2 changes: 2 additions & 0 deletions jobspec/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ func TestParse(t *testing.T) {
Method: "POST",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
}},
}},
}},
Expand Down Expand Up @@ -789,6 +790,7 @@ func TestParse(t *testing.T) {
Method: "POST",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
FailuresBeforeWarning: 2,
}},
}},
}},
Expand Down
Loading

0 comments on commit 13850b1

Please sign in to comment.