Skip to content

Commit 3c26238

Browse files
Backport of client: use Status.RPCServers RPC for Consul discovery into release/1.5.x (#16529)
This pull request was automerged via backport-assistant
1 parent 6286af0 commit 3c26238

File tree

2 files changed

+33
-45
lines changed

2 files changed

+33
-45
lines changed

.changelog/16490.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:bug
2+
client: Fixed a bug where clients using Consul discovery to join the cluster would get permission denied errors
3+
```

client/client.go

+30-45
Original file line numberDiff line numberDiff line change
@@ -1943,7 +1943,7 @@ func (c *Client) retryRegisterNode() {
19431943
}
19441944

19451945
retryIntv := registerRetryIntv
1946-
if err == noServersErr {
1946+
if err == noServersErr || structs.IsErrNoRegionPath(err) {
19471947
c.logger.Debug("registration waiting on servers")
19481948
c.triggerDiscovery()
19491949
retryIntv = noServerRetryIntv
@@ -1970,6 +1970,11 @@ func (c *Client) registerNode() error {
19701970
return err
19711971
}
19721972

1973+
err := c.handleNodeUpdateResponse(resp)
1974+
if err != nil {
1975+
return err
1976+
}
1977+
19731978
// Update the node status to ready after we register.
19741979
c.UpdateConfig(func(c *config.Config) {
19751980
c.Node.Status = structs.NodeStatusReady
@@ -1984,6 +1989,7 @@ func (c *Client) registerNode() error {
19841989
defer c.heartbeatLock.Unlock()
19851990
c.heartbeatStop.setLastOk(time.Now())
19861991
c.heartbeatTTL = resp.HeartbeatTTL
1992+
19871993
return nil
19881994
}
19891995

@@ -2035,6 +2041,22 @@ func (c *Client) updateNodeStatus() error {
20352041
}
20362042
})
20372043

2044+
err := c.handleNodeUpdateResponse(resp)
2045+
if err != nil {
2046+
return fmt.Errorf("heartbeat response returned no valid servers")
2047+
}
2048+
2049+
// If there's no Leader in the response we may be talking to a partitioned
2050+
// server. Redo discovery to ensure our server list is up to date.
2051+
if resp.LeaderRPCAddr == "" {
2052+
c.triggerDiscovery()
2053+
}
2054+
2055+
c.EnterpriseClient.SetFeatures(resp.Features)
2056+
return nil
2057+
}
2058+
2059+
func (c *Client) handleNodeUpdateResponse(resp structs.NodeUpdateResponse) error {
20382060
// Update the number of nodes in the cluster so we can adjust our server
20392061
// rebalance rate.
20402062
c.servers.SetNumNodes(resp.NumNodes)
@@ -2051,20 +2073,9 @@ func (c *Client) updateNodeStatus() error {
20512073
nomadServers = append(nomadServers, e)
20522074
}
20532075
if len(nomadServers) == 0 {
2054-
return fmt.Errorf("heartbeat response returned no valid servers")
2076+
return noServersErr
20552077
}
20562078
c.servers.SetServers(nomadServers)
2057-
2058-
// Begin polling Consul if there is no Nomad leader. We could be
2059-
// heartbeating to a Nomad server that is in the minority of a
2060-
// partition of the Nomad server quorum, but this Nomad Agent still
2061-
// has connectivity to the existing majority of Nomad Servers, but
2062-
// only if it queries Consul.
2063-
if resp.LeaderRPCAddr == "" {
2064-
c.triggerDiscovery()
2065-
}
2066-
2067-
c.EnterpriseClient.SetFeatures(resp.Features)
20682079
return nil
20692080
}
20702081

@@ -2906,14 +2917,6 @@ func (c *Client) consulDiscoveryImpl() error {
29062917
dcs = dcs[0:helper.Min(len(dcs), datacenterQueryLimit)]
29072918
}
29082919

2909-
// Query for servers in this client's region only
2910-
region := c.Region()
2911-
rpcargs := structs.GenericRequest{
2912-
QueryOptions: structs.QueryOptions{
2913-
Region: region,
2914-
},
2915-
}
2916-
29172920
serviceName := c.GetConfig().ConsulConfig.ServerServiceName
29182921
var mErr multierror.Error
29192922
var nomadServers servers.Servers
@@ -2944,32 +2947,14 @@ DISCOLOOP:
29442947
continue
29452948
}
29462949

2947-
// Query the members from the region that Consul gave us, and
2948-
// extract the client-advertise RPC address from each member
2949-
var membersResp structs.ServerMembersResponse
2950-
if err := c.connPool.RPC(region, addr, "Status.Members", rpcargs, &membersResp); err != nil {
2951-
mErr.Errors = append(mErr.Errors, err)
2952-
continue
2953-
}
2954-
for _, member := range membersResp.Members {
2955-
if addrTag, ok := member.Tags["rpc_addr"]; ok {
2956-
if portTag, ok := member.Tags["port"]; ok {
2957-
addr, err := net.ResolveTCPAddr("tcp",
2958-
fmt.Sprintf("%s:%s", addrTag, portTag))
2959-
if err != nil {
2960-
mErr.Errors = append(mErr.Errors, err)
2961-
continue
2962-
}
2963-
srv := &servers.Server{Addr: addr}
2964-
nomadServers = append(nomadServers, srv)
2965-
}
2966-
}
2967-
}
2950+
srv := &servers.Server{Addr: addr}
2951+
nomadServers = append(nomadServers, srv)
2952+
}
29682953

2969-
if len(nomadServers) > 0 {
2970-
break DISCOLOOP
2971-
}
2954+
if len(nomadServers) > 0 {
2955+
break DISCOLOOP
29722956
}
2957+
29732958
}
29742959
if len(nomadServers) == 0 {
29752960
if len(mErr.Errors) > 0 {

0 commit comments

Comments
 (0)