-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Only synthesize anonymous token in primary DC #17231
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```release-note:bug | ||
acl: Fix an issue where the anonymous token was synthesized in non-primary datacenters which could cause permission errors when federating clusters with ACL replication enabled. | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
// Copyright (c) HashiCorp, Inc. | ||
// SPDX-License-Identifier: MPL-2.0 | ||
|
||
package wanfed | ||
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/hashicorp/consul/api" | ||
"github.com/hashicorp/consul/sdk/testutil/retry" | ||
libcluster "github.com/hashicorp/consul/test/integration/consul-container/libs/cluster" | ||
) | ||
|
||
const replicationPolicyRules = ` | ||
acl = "write" | ||
operator = "write" | ||
|
||
service_prefix "" { | ||
policy = "read" | ||
intentions = "read" | ||
} | ||
` | ||
|
||
var retryFuncTimer = &retry.Timer{ | ||
Timeout: 60 * time.Second, | ||
Wait: 2 * time.Second, | ||
} | ||
|
||
// retryFunc is a helper to retry the given function until it returns a nil | ||
// error. It returns the value from the function on success. | ||
func retryFunc[T any](t *testing.T, f func() (T, error)) T { | ||
var result T | ||
retry.RunWith(retryFuncTimer, t, func(r *retry.R) { | ||
val, err := f() | ||
require.NoError(r, err) | ||
result = val | ||
}) | ||
return result | ||
} | ||
Comment on lines
+34
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added this to try to declutter the retry loops in the test case a little. One gotcha is that the |
||
|
||
// TestWanFed_ReplicationBootstrap checks the setup procedure of two federated | ||
// datacenters with a manual ACL bootstrap step and ACL token replication enabled. | ||
// | ||
// - WAN join two clusters that are not ACL bootstrapped | ||
// and have ACL replication enabled | ||
// - ACL Bootstrap the primary datacenter | ||
// - Configure the replication token in each datacenter | ||
// - Validate ACL replication by creating a token | ||
// and checking it is found in each datacenter. | ||
// | ||
// Added to validate this issue: https://github.com/hashicorp/consul/issues/16620 | ||
func TestWanFed_ReplicationBootstrap(t *testing.T) { | ||
cfgFunc := func(c *libcluster.ConfigBuilder) { | ||
c.Set("primary_datacenter", "primary") | ||
c.Set("acl.enabled", true) | ||
c.Set("acl.default_policy", "deny") | ||
c.Set("acl.enable_token_persistence", true) | ||
c.Set("acl.enable_token_replication", true) | ||
} | ||
|
||
_, primary := createNonACLBootstrappedCluster(t, "primary", cfgFunc) | ||
_, secondary := createNonACLBootstrappedCluster(t, "secondary", func(c *libcluster.ConfigBuilder) { | ||
cfgFunc(c) | ||
c.Set("retry_join_wan", []string{primary.GetIP()}) | ||
}) | ||
|
||
// ACL bootstrap the primary | ||
rootToken := retryFunc(t, func() (*api.ACLToken, error) { | ||
t.Logf("ACL bootstrap the primary datacenter") | ||
tok, _, err := primary.GetClient().ACL().Bootstrap() | ||
return tok, err | ||
}) | ||
|
||
agents := []libcluster.Agent{primary, secondary} | ||
|
||
for _, agent := range agents { | ||
// Make a new client with the bootstrap token. | ||
_, err := agent.NewClient(rootToken.SecretID, true) | ||
require.NoError(t, err) | ||
|
||
t.Logf("Wait for members in %s datacenter", agent.GetDatacenter()) | ||
libcluster.WaitForMembers(t, agent.GetClient(), 1) | ||
} | ||
|
||
// Create and set the replication token | ||
replicationPolicy := retryFunc(t, func() (*api.ACLPolicy, error) { | ||
t.Logf("Create the replication policy") | ||
p, _, err := primary.GetClient().ACL().PolicyCreate( | ||
&api.ACLPolicy{ | ||
Name: "consul-server-replication", | ||
Rules: replicationPolicyRules, | ||
}, | ||
nil, | ||
) | ||
return p, err | ||
}) | ||
Comment on lines
+89
to
+99
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I saw there was a helper to create an agent token. I could move this to a helper alongside that? |
||
replicationToken := retryFunc(t, func() (*api.ACLToken, error) { | ||
t.Logf("Create the replication policy") | ||
tok, _, err := primary.GetClient().ACL().TokenCreate( | ||
&api.ACLToken{ | ||
Description: "Consul server replication token", | ||
Policies: []*api.ACLLink{{ID: replicationPolicy.ID}}, | ||
}, | ||
nil, | ||
) | ||
return tok, err | ||
}) | ||
|
||
// Set the replication token in the secondary | ||
retryFunc(t, func() (any, error) { | ||
t.Logf("Set the replication token in the %s datacenter", secondary.GetDatacenter()) | ||
return secondary.GetClient().Agent().UpdateReplicationACLToken(replicationToken.SecretID, nil) | ||
}) | ||
|
||
// Double check that replication happens after setting the replication token. | ||
// - Create a token | ||
// - Check the token is found in each datacenter | ||
createdToken := retryFunc(t, func() (*api.ACLToken, error) { | ||
t.Logf("Create a test token to validate ACL replication in the secondary datacenter") | ||
tok, _, err := secondary.GetClient().ACL().TokenCreate(&api.ACLToken{ | ||
ServiceIdentities: []*api.ACLServiceIdentity{{ | ||
ServiceName: "test-svc", | ||
}}, | ||
}, nil) | ||
return tok, err | ||
}) | ||
for _, agent := range agents { | ||
tok := retryFunc(t, func() (*api.ACLToken, error) { | ||
t.Logf("Check the test token is found in the %s datacenter", agent.GetDatacenter()) | ||
tok, _, err := agent.GetClient().ACL().TokenRead(createdToken.AccessorID, nil) | ||
return tok, err | ||
}) | ||
require.NotNil(t, tok) | ||
require.Equal(t, tok.AccessorID, createdToken.AccessorID) | ||
require.Equal(t, tok.SecretID, createdToken.SecretID) | ||
} | ||
} | ||
|
||
func createNonACLBootstrappedCluster(t *testing.T, dc string, f func(c *libcluster.ConfigBuilder)) (*libcluster.Cluster, libcluster.Agent) { | ||
ctx := libcluster.NewBuildContext(t, libcluster.BuildOptions{Datacenter: dc}) | ||
conf := libcluster.NewConfigBuilder(ctx).Advanced(f) | ||
|
||
cluster, err := libcluster.New(t, []libcluster.Config{*conf.ToAgentConfig(t)}) | ||
require.NoError(t, err) | ||
|
||
client := cluster.Agents[0].GetClient() | ||
|
||
libcluster.WaitForLeader(t, cluster, client) | ||
// Note: Do not wait for members yet (not ACL bootstrapped so no perms with a default deny) | ||
|
||
agent, err := cluster.Leader() | ||
require.NoError(t, err) | ||
return cluster, agent | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
// Copyright (c) HashiCorp, Inc. | ||
// SPDX-License-Identifier: MPL-2.0 | ||
|
||
package peering | ||
package wanfed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed another package is called |
||
|
||
import ( | ||
"context" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I ended up adding an integration test for this.
I wanted to test the scenario from the github issue, but I thought that scenario was perhaps slightly too complex to put in the
api/
package, where we are really just testing the api client. I did try incorporating a test into theagent/consul/
package where we have some other acl replication tests, but I had some issues reliably reproducing the issue there (and I opted to not spend more cycles on it).I feel like this is an important scenario for WAN fed that makes sense for an integration test, and the integration tests are pretty straightforward to write 🙂