@@ -2,7 +2,10 @@ package consul
2
2
3
3
import (
4
4
"context"
5
+ "crypto/sha1"
6
+ "encoding/base32"
5
7
"fmt"
8
+ "io"
6
9
"log"
7
10
"net"
8
11
"net/url"
@@ -21,10 +24,14 @@ import (
21
24
)
22
25
23
26
const (
24
- // nomadServicePrefix is the first prefix that scopes all Nomad registered
25
- // services
27
+ // nomadServicePrefix is the prefix that scopes all Nomad registered
28
+ // services (both agent and task entries).
26
29
nomadServicePrefix = "_nomad"
27
30
31
+ // nomadTaskPrefix is the prefix that scopes Nomad registered services
32
+ // for tasks.
33
+ nomadTaskPrefix = nomadServicePrefix + "-task-"
34
+
28
35
// defaultRetryInterval is how quickly to retry syncing services and
29
36
// checks to Consul when an error occurs. Will backoff up to a max.
30
37
defaultRetryInterval = time .Second
@@ -288,8 +295,13 @@ func (c *ServiceClient) Run() {
288
295
289
296
if err := c .sync (); err != nil {
290
297
if failures == 0 {
298
+ // Log on the first failure
291
299
c .logger .Printf ("[WARN] consul.sync: failed to update services in Consul: %v" , err )
300
+ } else if failures % 10 == 0 {
301
+ // Log every 10th consecutive failure
302
+ c .logger .Printf ("[ERR] consul.sync: still unable to update services in Consul after %d failures; latest error: %v" , failures , err )
292
303
}
304
+
293
305
failures ++
294
306
if ! retryTimer .Stop () {
295
307
// Timer already expired, since the timer may
@@ -389,38 +401,31 @@ func (c *ServiceClient) sync() error {
389
401
// Not managed by Nomad, skip
390
402
continue
391
403
}
404
+
392
405
// Unknown Nomad managed service; kill
393
406
if err := c .client .ServiceDeregister (id ); err != nil {
407
+ if isOldNomadService (id ) {
408
+ // Don't hard-fail on old entries. See #3620
409
+ continue
410
+ }
411
+
394
412
metrics .IncrCounter ([]string {"client" , "consul" , "sync_failure" }, 1 )
395
413
return err
396
414
}
397
415
sdereg ++
398
416
metrics .IncrCounter ([]string {"client" , "consul" , "service_deregistrations" }, 1 )
399
417
}
400
418
401
- // Track services whose ports have changed as their checks may also
402
- // need updating
403
- portsChanged := make (map [string ]struct {}, len (c .services ))
404
-
405
419
// Add Nomad services missing from Consul
406
420
for id , locals := range c .services {
407
- if remotes , ok := consulServices [id ]; ok {
408
- // Make sure Port and Address are stable since
409
- // PortLabel and AddressMode aren't included in the
410
- // service ID.
411
- if locals .Port == remotes .Port && locals .Address == remotes .Address {
412
- // Already exists in Consul; skip
413
- continue
421
+ if _ , ok := consulServices [id ]; ! ok {
422
+ if err = c .client .ServiceRegister (locals ); err != nil {
423
+ metrics .IncrCounter ([]string {"client" , "consul" , "sync_failure" }, 1 )
424
+ return err
414
425
}
415
- // Port changed, reregister it and its checks
416
- portsChanged [id ] = struct {}{}
417
- }
418
- if err = c .client .ServiceRegister (locals ); err != nil {
419
- metrics .IncrCounter ([]string {"client" , "consul" , "sync_failure" }, 1 )
420
- return err
426
+ sreg ++
427
+ metrics .IncrCounter ([]string {"client" , "consul" , "service_registrations" }, 1 )
421
428
}
422
- sreg ++
423
- metrics .IncrCounter ([]string {"client" , "consul" , "service_registrations" }, 1 )
424
429
}
425
430
426
431
// Remove Nomad checks in Consul but unknown locally
@@ -433,8 +438,14 @@ func (c *ServiceClient) sync() error {
433
438
// Service not managed by Nomad, skip
434
439
continue
435
440
}
436
- // Unknown Nomad managed check; kill
441
+
442
+ // Unknown Nomad managed check; remove
437
443
if err := c .client .CheckDeregister (id ); err != nil {
444
+ if isOldNomadService (check .ServiceID ) {
445
+ // Don't hard-fail on old entries.
446
+ continue
447
+ }
448
+
438
449
metrics .IncrCounter ([]string {"client" , "consul" , "sync_failure" }, 1 )
439
450
return err
440
451
}
@@ -444,12 +455,11 @@ func (c *ServiceClient) sync() error {
444
455
445
456
// Add Nomad checks missing from Consul
446
457
for id , check := range c .checks {
447
- if check , ok := consulChecks [id ]; ok {
448
- if _ , changed := portsChanged [check .ServiceID ]; ! changed {
449
- // Already in Consul and ports didn't change; skipping
450
- continue
451
- }
458
+ if _ , ok := consulChecks [id ]; ok {
459
+ // Already in Consul; skipping
460
+ continue
452
461
}
462
+
453
463
if err := c .client .CheckRegister (check ); err != nil {
454
464
metrics .IncrCounter ([]string {"client" , "consul" , "sync_failure" }, 1 )
455
465
return err
@@ -751,22 +761,17 @@ func (c *ServiceClient) UpdateTask(allocID string, existing, newTask *structs.Ta
751
761
continue
752
762
}
753
763
764
+ // Service exists and hasn't changed, don't re-add it later
765
+ delete (newIDs , existingID )
766
+
754
767
// Service still exists so add it to the task's registration
755
768
sreg := & ServiceRegistration {
756
769
serviceID : existingID ,
757
770
checkIDs : make (map [string ]struct {}, len (newSvc .Checks )),
758
771
}
759
772
taskReg .Services [existingID ] = sreg
760
773
761
- // PortLabel and AddressMode aren't included in the ID, so we
762
- // have to compare manually.
763
- serviceUnchanged := newSvc .PortLabel == existingSvc .PortLabel && newSvc .AddressMode == existingSvc .AddressMode
764
- if serviceUnchanged {
765
- // Service exists and hasn't changed, don't add it later
766
- delete (newIDs , existingID )
767
- }
768
-
769
- // See what checks were updated
774
+ // See if any checks were updated
770
775
existingChecks := make (map [string ]* structs.ServiceCheck , len (existingSvc .Checks ))
771
776
for _ , check := range existingSvc .Checks {
772
777
existingChecks [makeCheckID (existingID , check )] = check
@@ -779,17 +784,16 @@ func (c *ServiceClient) UpdateTask(allocID string, existing, newTask *structs.Ta
779
784
// Check exists, so don't remove it
780
785
delete (existingChecks , checkID )
781
786
sreg .checkIDs [checkID ] = struct {}{}
782
- } else if serviceUnchanged {
783
- // New check on an unchanged service; add them now
784
- newCheckIDs , err := c .checkRegs (ops , allocID , existingID , newSvc , newTask , exec , net )
785
- if err != nil {
786
- return err
787
- }
787
+ }
788
788
789
- for _ , checkID := range newCheckIDs {
790
- sreg .checkIDs [checkID ] = struct {}{}
789
+ // New check on an unchanged service; add them now
790
+ newCheckIDs , err := c .checkRegs (ops , allocID , existingID , newSvc , newTask , exec , net )
791
+ if err != nil {
792
+ return err
793
+ }
791
794
792
- }
795
+ for _ , checkID := range newCheckIDs {
796
+ sreg .checkIDs [checkID ] = struct {}{}
793
797
794
798
}
795
799
@@ -999,36 +1003,40 @@ func (c *ServiceClient) removeTaskRegistration(allocID, taskName string) {
999
1003
//
1000
1004
// Agent service IDs are of the form:
1001
1005
//
1002
- // {nomadServicePrefix}-{ROLE}-{Service.Name}-{Service.Tags...}
1003
- // Example Server ID: _nomad-server-nomad-serf
1004
- // Example Client ID: _nomad-client-nomad-client-http
1006
+ // {nomadServicePrefix}-{ROLE}-b32(sha1( {Service.Name}-{Service.Tags...})
1007
+ // Example Server ID: _nomad-server-FBBK265QN4TMT25ND4EP42TJVMYJ3HR4
1008
+ // Example Client ID: _nomad-client-GGNJPGL7YN7RGMVXZILMPVRZZVRSZC7L
1005
1009
//
1006
1010
func makeAgentServiceID (role string , service * structs.Service ) string {
1007
- parts := make ([]string , len (service .Tags )+ 3 )
1008
- parts [0 ] = nomadServicePrefix
1009
- parts [1 ] = role
1010
- parts [2 ] = service .Name
1011
- copy (parts [3 :], service .Tags )
1012
- return strings .Join (parts , "-" )
1011
+ h := sha1 .New ()
1012
+ io .WriteString (h , service .Name )
1013
+ for _ , tag := range service .Tags {
1014
+ io .WriteString (h , tag )
1015
+ }
1016
+ b32 := base32 .StdEncoding .EncodeToString (h .Sum (nil ))
1017
+ return fmt .Sprintf ("%s-%s-%s" , nomadServicePrefix , role , b32 )
1013
1018
}
1014
1019
1015
1020
// makeTaskServiceID creates a unique ID for identifying a task service in
1016
- // Consul.
1017
- //
1018
- // Task service IDs are of the form:
1019
- //
1020
- // {nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
1021
- // Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
1021
+ // Consul. All structs.Service fields are included in the ID's hash except
1022
+ // Checks. This allows updates to merely compare IDs.
1022
1023
//
1024
+ // Example Service ID: _nomad-task-TNM333JKJPM5AK4FAS3VXQLXFDWOF4VH
1023
1025
func makeTaskServiceID (allocID , taskName string , service * structs.Service ) string {
1024
- parts := make ([]string , len (service .Tags )+ 5 )
1025
- parts [0 ] = nomadServicePrefix
1026
- parts [1 ] = "executor"
1027
- parts [2 ] = allocID
1028
- parts [3 ] = taskName
1029
- parts [4 ] = service .Name
1030
- copy (parts [5 :], service .Tags )
1031
- return strings .Join (parts , "-" )
1026
+ h := sha1 .New ()
1027
+ io .WriteString (h , allocID )
1028
+ io .WriteString (h , taskName )
1029
+ io .WriteString (h , service .Name )
1030
+ io .WriteString (h , service .PortLabel )
1031
+ io .WriteString (h , service .AddressMode )
1032
+ for _ , tag := range service .Tags {
1033
+ io .WriteString (h , tag )
1034
+ }
1035
+
1036
+ // Base32 is used for encoding the hash as sha1 hashes can always be
1037
+ // encoded without padding, only 4 bytes larger than base64, and saves
1038
+ // 8 bytes vs hex.
1039
+ return nomadTaskPrefix + base32 .StdEncoding .EncodeToString (h .Sum (nil ))
1032
1040
}
1033
1041
1034
1042
// makeCheckID creates a unique ID for a check.
@@ -1084,9 +1092,21 @@ func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host
1084
1092
}
1085
1093
1086
1094
// isNomadService returns true if the ID matches the pattern of a Nomad managed
1087
- // service. Agent services return false as independent client and server agents
1088
- // may be running on the same machine. #2827
1095
+ // service (new or old formats) . Agent services return false as independent
1096
+ // client and server agents may be running on the same machine. #2827
1089
1097
func isNomadService (id string ) bool {
1098
+ return strings .HasPrefix (id , nomadTaskPrefix ) || isOldNomadService (id )
1099
+ }
1100
+
1101
+ // isOldNomadService returns true if the ID matches an old pattern managed by
1102
+ // Nomad.
1103
+ //
1104
+ // Pre-0.7.1 task service IDs are of the form:
1105
+ //
1106
+ // {nomadServicePrefix}-executor-{ALLOC_ID}-{Service.Name}-{Service.Tags...}
1107
+ // Example Service ID: _nomad-executor-1234-echo-http-tag1-tag2-tag3
1108
+ //
1109
+ func isOldNomadService (id string ) bool {
1090
1110
const prefix = nomadServicePrefix + "-executor"
1091
1111
return strings .HasPrefix (id , prefix )
1092
1112
}
0 commit comments