@@ -1943,7 +1943,7 @@ func (c *Client) retryRegisterNode() {
1943
1943
}
1944
1944
1945
1945
retryIntv := registerRetryIntv
1946
- if err == noServersErr {
1946
+ if err == noServersErr || structs . IsErrNoRegionPath ( err ) {
1947
1947
c .logger .Debug ("registration waiting on servers" )
1948
1948
c .triggerDiscovery ()
1949
1949
retryIntv = noServerRetryIntv
@@ -1970,6 +1970,11 @@ func (c *Client) registerNode() error {
1970
1970
return err
1971
1971
}
1972
1972
1973
+ err := c .handleNodeUpdateResponse (resp )
1974
+ if err != nil {
1975
+ return err
1976
+ }
1977
+
1973
1978
// Update the node status to ready after we register.
1974
1979
c .UpdateConfig (func (c * config.Config ) {
1975
1980
c .Node .Status = structs .NodeStatusReady
@@ -1984,6 +1989,7 @@ func (c *Client) registerNode() error {
1984
1989
defer c .heartbeatLock .Unlock ()
1985
1990
c .heartbeatStop .setLastOk (time .Now ())
1986
1991
c .heartbeatTTL = resp .HeartbeatTTL
1992
+
1987
1993
return nil
1988
1994
}
1989
1995
@@ -2035,6 +2041,22 @@ func (c *Client) updateNodeStatus() error {
2035
2041
}
2036
2042
})
2037
2043
2044
+ err := c .handleNodeUpdateResponse (resp )
2045
+ if err != nil {
2046
+ return fmt .Errorf ("heartbeat response returned no valid servers" )
2047
+ }
2048
+
2049
+ // If there's no Leader in the response we may be talking to a partitioned
2050
+ // server. Redo discovery to ensure our server list is up to date.
2051
+ if resp .LeaderRPCAddr == "" {
2052
+ c .triggerDiscovery ()
2053
+ }
2054
+
2055
+ c .EnterpriseClient .SetFeatures (resp .Features )
2056
+ return nil
2057
+ }
2058
+
2059
+ func (c * Client ) handleNodeUpdateResponse (resp structs.NodeUpdateResponse ) error {
2038
2060
// Update the number of nodes in the cluster so we can adjust our server
2039
2061
// rebalance rate.
2040
2062
c .servers .SetNumNodes (resp .NumNodes )
@@ -2051,20 +2073,9 @@ func (c *Client) updateNodeStatus() error {
2051
2073
nomadServers = append (nomadServers , e )
2052
2074
}
2053
2075
if len (nomadServers ) == 0 {
2054
- return fmt . Errorf ( "heartbeat response returned no valid servers" )
2076
+ return noServersErr
2055
2077
}
2056
2078
c .servers .SetServers (nomadServers )
2057
-
2058
- // Begin polling Consul if there is no Nomad leader. We could be
2059
- // heartbeating to a Nomad server that is in the minority of a
2060
- // partition of the Nomad server quorum, but this Nomad Agent still
2061
- // has connectivity to the existing majority of Nomad Servers, but
2062
- // only if it queries Consul.
2063
- if resp .LeaderRPCAddr == "" {
2064
- c .triggerDiscovery ()
2065
- }
2066
-
2067
- c .EnterpriseClient .SetFeatures (resp .Features )
2068
2079
return nil
2069
2080
}
2070
2081
@@ -2906,14 +2917,6 @@ func (c *Client) consulDiscoveryImpl() error {
2906
2917
dcs = dcs [0 :helper .Min (len (dcs ), datacenterQueryLimit )]
2907
2918
}
2908
2919
2909
- // Query for servers in this client's region only
2910
- region := c .Region ()
2911
- rpcargs := structs.GenericRequest {
2912
- QueryOptions : structs.QueryOptions {
2913
- Region : region ,
2914
- },
2915
- }
2916
-
2917
2920
serviceName := c .GetConfig ().ConsulConfig .ServerServiceName
2918
2921
var mErr multierror.Error
2919
2922
var nomadServers servers.Servers
@@ -2944,32 +2947,14 @@ DISCOLOOP:
2944
2947
continue
2945
2948
}
2946
2949
2947
- // Query the members from the region that Consul gave us, and
2948
- // extract the client-advertise RPC address from each member
2949
- var membersResp structs.ServerMembersResponse
2950
- if err := c .connPool .RPC (region , addr , "Status.Members" , rpcargs , & membersResp ); err != nil {
2951
- mErr .Errors = append (mErr .Errors , err )
2952
- continue
2953
- }
2954
- for _ , member := range membersResp .Members {
2955
- if addrTag , ok := member .Tags ["rpc_addr" ]; ok {
2956
- if portTag , ok := member .Tags ["port" ]; ok {
2957
- addr , err := net .ResolveTCPAddr ("tcp" ,
2958
- fmt .Sprintf ("%s:%s" , addrTag , portTag ))
2959
- if err != nil {
2960
- mErr .Errors = append (mErr .Errors , err )
2961
- continue
2962
- }
2963
- srv := & servers.Server {Addr : addr }
2964
- nomadServers = append (nomadServers , srv )
2965
- }
2966
- }
2967
- }
2950
+ srv := & servers.Server {Addr : addr }
2951
+ nomadServers = append (nomadServers , srv )
2952
+ }
2968
2953
2969
- if len (nomadServers ) > 0 {
2970
- break DISCOLOOP
2971
- }
2954
+ if len (nomadServers ) > 0 {
2955
+ break DISCOLOOP
2972
2956
}
2957
+
2973
2958
}
2974
2959
if len (nomadServers ) == 0 {
2975
2960
if len (mErr .Errors ) > 0 {
0 commit comments