Skip to content

Commit 3223686

Browse files
committed
connect: enable automatic expose paths for individual group service checks
Part of #6120 Building on the support for enabling connect proxy paths in #7323, this change adds the ability to configure the 'service.check.expose' flag on group-level service check definitions for services that are connect-enabled. This is a slight deviation from the "magic" that Consul provides. With Consul, the 'expose' flag exists on the connect.proxy stanza, which will then auto-generate expose paths for every HTTP and gRPC service check associated with that connect-enabled service. A first attempt at providing similar magic for Nomad's Consul Connect integration followed that pattern exactly, as seen in #7396. However, on reviewing the PR we realized having the `expose` flag on the proxy stanza inseperably ties together the automatic path generation with every HTTP/gRPC defined on the service. This makes sense in Consul's context, because a service definition is reasonably associated with a single "task". With Nomad's group level service definitions however, there is a reasonable expectation that a service definition is more abstractly representative of multiple services within the task group. In this case, one would want to define checks of that service which concretely make HTTP or gRPC requests to different underlying tasks. Such a model is not possible with the course `proxy.expose` flag. Instead, we now have the flag made available within the check definitions themselves. By making the expose feature resolute to each check, it is possible to have some HTTP/gRPC checks which make use of the envoy exposed paths, as well as some HTTP/gRPC checks which make use of some orthongonal port-mapping to do checks on some other task (or even some other bound port of the same task) within the task group. Given this example, group "server-group" { network { mode = "bridge" port "forchecks" { to = -1 } } service { name = "myserver" port = 2000 connect { sidecar_service { } } check { name = "mycheck-myserver" type = "http" port = "forchecks" interval = "3s" timeout = "2s" method = "GET" path = "/classic/responder/health" expose = true } } } Nomad will automatically inject (via job endpoint mutator) the extrapolated expose path configuration, i.e. expose { path { path = "/classic/responder/health" protocol = "http" local_path_port = 2000 listener_port = "forchecks" } } Documentation is coming in #7440 (needs updating, doing next) Modifications to the `countdash` examples in hashicorp/demo-consul-101#6 which will make the examples in the documentation actually runnable. Will add some e2e tests based on the above when it becomes available.
1 parent 4e5524e commit 3223686

12 files changed

+1114
-55
lines changed

api/services.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ type ServiceCheck struct {
8181
Path string
8282
Protocol string
8383
PortLabel string `mapstructure:"port"`
84+
Expose bool
8485
AddressMode string `mapstructure:"address_mode"`
8586
Interval time.Duration
8687
Timeout time.Duration
@@ -183,7 +184,6 @@ type ConsulUpstream struct {
183184

184185
type ConsulExposeConfig struct {
185186
Path []*ConsulExposePath `mapstructure:"path"`
186-
// todo(shoenig): add magic for 'checks' option
187187
}
188188

189189
type ConsulExposePath struct {

command/agent/job_endpoint.go

+1
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,7 @@ func ApiServicesToStructs(in []*api.Service) []*structs.Service {
11511151
Path: check.Path,
11521152
Protocol: check.Protocol,
11531153
PortLabel: check.PortLabel,
1154+
Expose: check.Expose,
11541155
AddressMode: check.AddressMode,
11551156
Interval: check.Interval,
11561157
Timeout: check.Timeout,

jobspec/parse_service.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,6 @@ func parseProxy(o *ast.ObjectItem) (*api.ConsulProxy, error) {
405405
func parseExpose(eo *ast.ObjectItem) (*api.ConsulExposeConfig, error) {
406406
valid := []string{
407407
"path", // an array of path blocks
408-
// todo(shoenig) checks boolean
409408
}
410409

411410
if err := helper.CheckHCLKeys(eo.Val, valid); err != nil {
@@ -514,6 +513,7 @@ func parseChecks(service *api.Service, checkObjs *ast.ObjectList) error {
514513
"path",
515514
"protocol",
516515
"port",
516+
"expose",
517517
"command",
518518
"args",
519519
"initial_status",

jobspec/parse_test.go

+26
Original file line numberDiff line numberDiff line change
@@ -1150,6 +1150,32 @@ func TestParse(t *testing.T) {
11501150
},
11511151
false,
11521152
},
1153+
{
1154+
"tg-service-check-expose.hcl",
1155+
&api.Job{
1156+
ID: helper.StringToPtr("group_service_proxy_expose"),
1157+
Name: helper.StringToPtr("group_service_proxy_expose"),
1158+
TaskGroups: []*api.TaskGroup{{
1159+
Name: helper.StringToPtr("group"),
1160+
Services: []*api.Service{{
1161+
Name: "example",
1162+
Connect: &api.ConsulConnect{
1163+
SidecarService: &api.ConsulSidecarService{
1164+
Proxy: &api.ConsulProxy{},
1165+
},
1166+
},
1167+
Checks: []api.ServiceCheck{{
1168+
Name: "example-check1",
1169+
Expose: true,
1170+
}, {
1171+
Name: "example-check2",
1172+
Expose: false,
1173+
}},
1174+
}},
1175+
}},
1176+
},
1177+
false,
1178+
},
11531179
{
11541180
"tg-service-enable-tag-override.hcl",
11551181
&api.Job{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
job "group_service_proxy_expose" {
2+
group "group" {
3+
service {
4+
name = "example"
5+
connect {
6+
sidecar_service {
7+
proxy {
8+
}
9+
}
10+
}
11+
12+
check {
13+
name = "example-check1"
14+
expose = true
15+
}
16+
17+
check {
18+
name = "example-check2"
19+
expose = false
20+
}
21+
}
22+
}
23+
}

nomad/job_endpoint.go

+2
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ func NewJobEndpoints(s *Server) *Job {
6161
mutators: []jobMutator{
6262
jobCanonicalizer{},
6363
jobConnectHook{},
64+
jobExposeCheckHook{},
6465
jobImpliedConstraints{},
6566
},
6667
validators: []jobValidator{
6768
jobConnectHook{},
69+
jobExposeCheckHook{},
6870
jobValidate{},
6971
},
7072
}
+228
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
package nomad
2+
3+
import (
4+
"strconv"
5+
"strings"
6+
7+
"github.com/hashicorp/nomad/nomad/structs"
8+
"github.com/pkg/errors"
9+
)
10+
11+
type jobExposeCheckHook struct{}
12+
13+
func (jobExposeCheckHook) Name() string {
14+
return "expose-check"
15+
}
16+
17+
// Mutate will scan every task group for group-services which have checks defined
18+
// that have the Expose field configured, and generate expose path configurations
19+
// extrapolated from those check definitions.
20+
func (jobExposeCheckHook) Mutate(job *structs.Job) (_ *structs.Job, warnings []error, err error) {
21+
for _, tg := range job.TaskGroups {
22+
for _, s := range tg.Services {
23+
for _, c := range s.Checks {
24+
if c.Expose {
25+
if exposePath, err := exposePathForCheck(tg, s, c); err != nil {
26+
return nil, nil, err
27+
} else if exposePath != nil {
28+
serviceExposeConfig := serviceExposeConfig(s)
29+
// insert only if not already present - required for job
30+
// updates which would otherwise create duplicates
31+
if !containsExposePath(serviceExposeConfig.Paths, *exposePath) {
32+
serviceExposeConfig.Paths = append(
33+
serviceExposeConfig.Paths, *exposePath,
34+
)
35+
}
36+
}
37+
}
38+
}
39+
}
40+
}
41+
return job, nil, nil
42+
}
43+
44+
// Validate will ensure:
45+
// - The job contains valid network configuration for each task group in which
46+
// an expose path is configured. The network must be of type bridge mode.
47+
// - The check Expose field is configured only for connect-enabled group-services.
48+
func (jobExposeCheckHook) Validate(job *structs.Job) (warnings []error, err error) {
49+
for _, tg := range job.TaskGroups {
50+
// Make sure any group that contains a group-service that enables expose
51+
// is configured with one network that is in "bridge" mode. This check
52+
// is being done independently of the preceding Connect task injection
53+
// hook, because at some point in the future Connect will not require the
54+
// use of network namespaces, whereas the use of "expose" does not make
55+
// sense without the use of network namespace.
56+
if err := tgValidateUseOfBridgeMode(tg); err != nil {
57+
return nil, err
58+
}
59+
// Make sure any group-service that contains a check that enables expose
60+
// is connect-enabled and does not specify a custom sidecar task. We only
61+
// support the expose feature when using the built-in Envoy integration.
62+
if err := tgValidateUseOfCheckExpose(tg); err != nil {
63+
return nil, err
64+
}
65+
}
66+
return nil, nil
67+
}
68+
69+
// serviceExposeConfig digs through s to extract the connect sidecar service proxy
70+
// expose configuration. It is not required of the user to provide this, so it
71+
// is created on demand here as needed in the case where any service check exposes
72+
// itself.
73+
//
74+
// The service, connect, and sidecar_service are assumed not to be nil, as they
75+
// are enforced in previous hooks / validation.
76+
func serviceExposeConfig(s *structs.Service) *structs.ConsulExposeConfig {
77+
if s.Connect.SidecarService.Proxy == nil {
78+
s.Connect.SidecarService.Proxy = new(structs.ConsulProxy)
79+
}
80+
if s.Connect.SidecarService.Proxy.Expose == nil {
81+
s.Connect.SidecarService.Proxy.Expose = new(structs.ConsulExposeConfig)
82+
}
83+
return s.Connect.SidecarService.Proxy.Expose
84+
}
85+
86+
// containsExposePath returns true if path is contained in paths.
87+
func containsExposePath(paths []structs.ConsulExposePath, path structs.ConsulExposePath) bool {
88+
for _, p := range paths {
89+
if p == path {
90+
return true
91+
}
92+
}
93+
return false
94+
}
95+
96+
// tgValidateUseOfCheckExpose ensures that any service check in tg making use
97+
// of the expose field is within an appropriate context to do so. The check must
98+
// be a group level check, and must use the builtin envoy proxy.
99+
func tgValidateUseOfCheckExpose(tg *structs.TaskGroup) error {
100+
// validation for group services (which must use built-in connect proxy)
101+
for _, s := range tg.Services {
102+
for _, check := range s.Checks {
103+
if check.Expose && !serviceUsesConnectEnvoy(s) {
104+
return errors.Errorf(
105+
"exposed service check %s->%s->%s requires use of Nomad's builtin Connect proxy",
106+
tg.Name, s.Name, check.Name,
107+
)
108+
}
109+
}
110+
}
111+
112+
// validation for task services (which must not be configured to use Expose)
113+
for _, t := range tg.Tasks {
114+
for _, s := range t.Services {
115+
for _, check := range s.Checks {
116+
if check.Expose {
117+
return errors.Errorf(
118+
"exposed service check %s[%s]->%s->%s is not a task-group service",
119+
tg.Name, t.Name, s.Name, check.Name,
120+
)
121+
}
122+
}
123+
}
124+
}
125+
return nil
126+
}
127+
128+
// tgValidateUseOfBridgeMode ensures there is exactly 1 network configured for
129+
// the task group, and that it makes use of "bridge" mode (i.e. enables network
130+
// namespaces).
131+
func tgValidateUseOfBridgeMode(tg *structs.TaskGroup) error {
132+
if tgUsesExposeCheck(tg) {
133+
if len(tg.Networks) != 1 {
134+
return errors.Errorf("group %q must specify one bridge network for exposing service check(s)", tg.Name)
135+
}
136+
if tg.Networks[0].Mode != "bridge" {
137+
return errors.Errorf("group %q must use bridge network for exposing service check(s)", tg.Name)
138+
}
139+
}
140+
return nil
141+
}
142+
143+
// tgUsesExposeCheck returns true if any group service in the task group makes
144+
// use of the expose field.
145+
func tgUsesExposeCheck(tg *structs.TaskGroup) bool {
146+
for _, s := range tg.Services {
147+
for _, check := range s.Checks {
148+
if check.Expose {
149+
return true
150+
}
151+
}
152+
}
153+
return false
154+
}
155+
156+
// serviceUsesConnectEnvoy returns true if the service is going to end up using
157+
// the built-in envoy proxy.
158+
//
159+
// This implementation is kind of reading tea leaves - firstly Connect
160+
// must be enabled, and second the sidecar_task must not be overridden. If these
161+
// conditions are met, the preceding connect hook will have injected a Connect
162+
// sidecar task, the configuration of which is interpolated at runtime.
163+
func serviceUsesConnectEnvoy(s *structs.Service) bool {
164+
// A non-nil connect stanza implies this service isn't connect enabled in
165+
// the first place.
166+
if s.Connect == nil {
167+
return false
168+
}
169+
170+
// A non-nil connect.sidecar_task stanza implies the sidecar task is being
171+
// overridden (i.e. the default Envoy is not being uesd).
172+
if s.Connect.SidecarTask != nil {
173+
return false
174+
}
175+
176+
return true
177+
}
178+
179+
// checkIsExposable returns true if check is qualified for automatic generation
180+
// of connect proxy expose path configuration based on configured consul checks.
181+
// To qualify, the check must be of type "http" or "grpc", and must have a Path
182+
// configured.
183+
func checkIsExposable(check *structs.ServiceCheck) bool {
184+
switch strings.ToLower(check.Type) {
185+
case "grpc", "http":
186+
return strings.HasPrefix(check.Path, "/")
187+
default:
188+
return false
189+
}
190+
}
191+
192+
// exposePathForCheck extrapolates the necessary expose path configuration for
193+
// the given consul service check. If the check is not compatible, nil is
194+
// returned.
195+
func exposePathForCheck(tg *structs.TaskGroup, s *structs.Service, check *structs.ServiceCheck) (*structs.ConsulExposePath, error) {
196+
if !checkIsExposable(check) {
197+
return nil, nil
198+
}
199+
200+
// Determine the local service port (i.e. what port the service is actually
201+
// listening to inside the network namespace).
202+
//
203+
// Similar logic exists in getAddress of client.go which is used for
204+
// creating check & service registration objects.
205+
//
206+
// The difference here is the address is predestined to be localhost since
207+
// it is binding inside the namespace.
208+
var port int
209+
if _, port = tg.Networks.Port(s.PortLabel); port <= 0 { // try looking up by port label
210+
if port, _ = strconv.Atoi(s.PortLabel); port <= 0 { // then try direct port value
211+
return nil, errors.Errorf(
212+
"unable to determine local service port for service check %s->%s->%s",
213+
tg.Name, s.Name, check.Name,
214+
)
215+
}
216+
}
217+
218+
// The Path, Protocol, and PortLabel are just copied over from the service
219+
// check definition. It is required that the user configure their own port
220+
// mapping for each check, including setting the 'to = -1' sentinel value
221+
// enabling the network namespace pass-through.
222+
return &structs.ConsulExposePath{
223+
Path: check.Path,
224+
Protocol: check.Protocol,
225+
LocalPathPort: port,
226+
ListenerPort: check.PortLabel,
227+
}, nil
228+
}

0 commit comments

Comments
 (0)