Skip to content

Commit 9d006ec

Browse files
authored
Merge pull request #3492 from hashicorp/f-client-tls-reload
Client/Server TLS dynamic reload
2 parents 6e96c81 + c6eee01 commit 9d006ec

31 files changed

+1213
-156
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ IMPROVEMENTS:
1414

1515
BUG FIXES:
1616
* core: Fix search endpoint forwarding for multi-region clusters [[GH-3680](https://github.com/hashicorp/nomad/issues/3680)]
17+
* core: Allow upgrading/downgrading TLS via SIGHUP on both servers and clients [[GH-3492](https://github.com/hashicorp/nomad/issues/3492)]
1718
* core: Fix an issue in which batch jobs with queued placements and lost
1819
allocations could result in improper placement counts [[GH-3717](https://github.com/hashicorp/nomad/issues/3717)]
1920
* client: Migrated ephemeral_disk's maintain directory permissions [[GH-3723](https://github.com/hashicorp/nomad/issues/3723)]

client/client.go

+29
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/hashicorp/nomad/helper/uuid"
3131
"github.com/hashicorp/nomad/nomad"
3232
"github.com/hashicorp/nomad/nomad/structs"
33+
nconfig "github.com/hashicorp/nomad/nomad/structs/config"
3334
vaultapi "github.com/hashicorp/vault/api"
3435
"github.com/mitchellh/hashstructure"
3536
"github.com/shirou/gopsutil/host"
@@ -364,6 +365,34 @@ func (c *Client) init() error {
364365
return nil
365366
}
366367

368+
// reloadTLSConnections allows a client to reload its TLS configuration on the
369+
// fly
370+
func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error {
371+
var tlsWrap tlsutil.RegionWrapper
372+
if newConfig != nil && newConfig.EnableRPC {
373+
tw, err := tlsutil.NewTLSConfiguration(newConfig).OutgoingTLSWrapper()
374+
if err != nil {
375+
return err
376+
}
377+
tlsWrap = tw
378+
}
379+
380+
// Keep the client configuration up to date as we use configuration values to
381+
// decide on what type of connections to accept
382+
c.configLock.Lock()
383+
c.config.TLSConfig = newConfig
384+
c.configLock.Unlock()
385+
386+
c.connPool.ReloadTLS(tlsWrap)
387+
388+
return nil
389+
}
390+
391+
// Reload allows a client to reload its configuration on the fly
392+
func (c *Client) Reload(newConfig *config.Config) error {
393+
return c.reloadTLSConnections(newConfig.TLSConfig)
394+
}
395+
367396
// Leave is used to prepare the client to leave the cluster
368397
func (c *Client) Leave() error {
369398
// TODO

client/client_test.go

+153
Original file line numberDiff line numberDiff line change
@@ -1001,3 +1001,156 @@ func TestClient_ValidateMigrateToken_ACLDisabled(t *testing.T) {
10011001

10021002
assert.Equal(c.ValidateMigrateToken("", ""), true)
10031003
}
1004+
1005+
func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) {
1006+
t.Parallel()
1007+
assert := assert.New(t)
1008+
1009+
s1, addr := testServer(t, func(c *nomad.Config) {
1010+
c.Region = "regionFoo"
1011+
})
1012+
defer s1.Shutdown()
1013+
testutil.WaitForLeader(t, s1.RPC)
1014+
1015+
const (
1016+
cafile = "../helper/tlsutil/testdata/ca.pem"
1017+
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
1018+
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
1019+
)
1020+
1021+
c1 := testClient(t, func(c *config.Config) {
1022+
c.Servers = []string{addr}
1023+
})
1024+
defer c1.Shutdown()
1025+
1026+
// Registering a node over plaintext should succeed
1027+
{
1028+
req := structs.NodeSpecificRequest{
1029+
NodeID: c1.Node().ID,
1030+
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
1031+
}
1032+
1033+
testutil.WaitForResult(func() (bool, error) {
1034+
var out structs.SingleNodeResponse
1035+
err := c1.RPC("Node.GetNode", &req, &out)
1036+
if err != nil {
1037+
return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err)
1038+
}
1039+
return true, nil
1040+
},
1041+
func(err error) {
1042+
t.Fatalf(err.Error())
1043+
},
1044+
)
1045+
}
1046+
1047+
newConfig := &nconfig.TLSConfig{
1048+
EnableHTTP: true,
1049+
EnableRPC: true,
1050+
VerifyServerHostname: true,
1051+
CAFile: cafile,
1052+
CertFile: foocert,
1053+
KeyFile: fookey,
1054+
}
1055+
1056+
err := c1.reloadTLSConnections(newConfig)
1057+
assert.Nil(err)
1058+
1059+
// Registering a node over plaintext should fail after the node has upgraded
1060+
// to TLS
1061+
{
1062+
req := structs.NodeSpecificRequest{
1063+
NodeID: c1.Node().ID,
1064+
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
1065+
}
1066+
testutil.WaitForResult(func() (bool, error) {
1067+
var out structs.SingleNodeResponse
1068+
err := c1.RPC("Node.GetNode", &req, &out)
1069+
if err == nil {
1070+
return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err)
1071+
}
1072+
return true, nil
1073+
},
1074+
func(err error) {
1075+
t.Fatalf(err.Error())
1076+
},
1077+
)
1078+
}
1079+
}
1080+
1081+
func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) {
1082+
t.Parallel()
1083+
assert := assert.New(t)
1084+
1085+
s1, addr := testServer(t, func(c *nomad.Config) {
1086+
c.Region = "regionFoo"
1087+
})
1088+
defer s1.Shutdown()
1089+
testutil.WaitForLeader(t, s1.RPC)
1090+
1091+
const (
1092+
cafile = "../helper/tlsutil/testdata/ca.pem"
1093+
foocert = "../helper/tlsutil/testdata/nomad-foo.pem"
1094+
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem"
1095+
)
1096+
1097+
c1 := testClient(t, func(c *config.Config) {
1098+
c.Servers = []string{addr}
1099+
c.TLSConfig = &nconfig.TLSConfig{
1100+
EnableHTTP: true,
1101+
EnableRPC: true,
1102+
VerifyServerHostname: true,
1103+
CAFile: cafile,
1104+
CertFile: foocert,
1105+
KeyFile: fookey,
1106+
}
1107+
})
1108+
defer c1.Shutdown()
1109+
1110+
// assert that when one node is running in encrypted mode, a RPC request to a
1111+
// node running in plaintext mode should fail
1112+
{
1113+
req := structs.NodeSpecificRequest{
1114+
NodeID: c1.Node().ID,
1115+
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
1116+
}
1117+
testutil.WaitForResult(func() (bool, error) {
1118+
var out structs.SingleNodeResponse
1119+
err := c1.RPC("Node.GetNode", &req, &out)
1120+
if err == nil {
1121+
return false, fmt.Errorf("client RPC succeeded when it should have failed :\n%+v", err)
1122+
}
1123+
return true, nil
1124+
},
1125+
func(err error) {
1126+
t.Fatalf(err.Error())
1127+
},
1128+
)
1129+
}
1130+
1131+
newConfig := &nconfig.TLSConfig{}
1132+
1133+
err := c1.reloadTLSConnections(newConfig)
1134+
assert.Nil(err)
1135+
1136+
// assert that when both nodes are in plaintext mode, a RPC request should
1137+
// succeed
1138+
{
1139+
req := structs.NodeSpecificRequest{
1140+
NodeID: c1.Node().ID,
1141+
QueryOptions: structs.QueryOptions{Region: "regionFoo"},
1142+
}
1143+
testutil.WaitForResult(func() (bool, error) {
1144+
var out structs.SingleNodeResponse
1145+
err := c1.RPC("Node.GetNode", &req, &out)
1146+
if err != nil {
1147+
return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err)
1148+
}
1149+
return true, nil
1150+
},
1151+
func(err error) {
1152+
t.Fatalf(err.Error())
1153+
},
1154+
)
1155+
}
1156+
}

client/config/config.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -347,9 +347,10 @@ func (c *Config) ReadStringListToMapDefault(key, defaultValue string) map[string
347347
return list
348348
}
349349

350-
// TLSConfig returns a TLSUtil Config based on the client configuration
350+
// TLSConfiguration returns a TLSUtil Config based on the existing client
351+
// configuration
351352
func (c *Config) TLSConfiguration() *tlsutil.Config {
352-
tlsConf := &tlsutil.Config{
353+
return &tlsutil.Config{
353354
VerifyIncoming: true,
354355
VerifyOutgoing: true,
355356
VerifyServerHostname: c.TLSConfig.VerifyServerHostname,
@@ -358,5 +359,4 @@ func (c *Config) TLSConfiguration() *tlsutil.Config {
358359
KeyFile: c.TLSConfig.KeyFile,
359360
KeyLoader: c.TLSConfig.GetKeyLoader(),
360361
}
361-
return tlsConf
362362
}

command/agent/agent.go

+47-18
Original file line numberDiff line numberDiff line change
@@ -756,37 +756,66 @@ func (a *Agent) Stats() map[string]map[string]string {
756756
return stats
757757
}
758758

759+
// ShouldReload determines if we should reload the configuration and agent
760+
// connections. If the TLS Configuration has not changed, we shouldn't reload.
761+
func (a *Agent) ShouldReload(newConfig *Config) (bool, bool) {
762+
a.configLock.Lock()
763+
defer a.configLock.Unlock()
764+
if a.config.TLSConfig.Equals(newConfig.TLSConfig) {
765+
return false, false
766+
}
767+
768+
return true, true // requires a reload of both agent and http server
769+
}
770+
759771
// Reload handles configuration changes for the agent. Provides a method that
760772
// is easier to unit test, as this action is invoked via SIGHUP.
761773
func (a *Agent) Reload(newConfig *Config) error {
762774
a.configLock.Lock()
763775
defer a.configLock.Unlock()
764776

765-
if newConfig.TLSConfig != nil {
766-
767-
// TODO(chelseakomlo) In a later PR, we will introduce the ability to reload
768-
// TLS configuration if the agent is not running with TLS enabled.
769-
if a.config.TLSConfig != nil {
770-
// Reload the certificates on the keyloader and on success store the
771-
// updated TLS config. It is important to reuse the same keyloader
772-
// as this allows us to dynamically reload configurations not only
773-
// on the Agent but on the Server and Client too (they are
774-
// referencing the same keyloader).
775-
keyloader := a.config.TLSConfig.GetKeyLoader()
776-
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
777-
if err != nil {
778-
return err
779-
}
780-
a.config.TLSConfig = newConfig.TLSConfig
781-
a.config.TLSConfig.KeyLoader = keyloader
777+
if newConfig == nil || newConfig.TLSConfig == nil {
778+
return fmt.Errorf("cannot reload agent with nil configuration")
779+
}
780+
781+
// This is just a TLS configuration reload, we don't need to refresh
782+
// existing network connections
783+
if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
784+
785+
// Reload the certificates on the keyloader and on success store the
786+
// updated TLS config. It is important to reuse the same keyloader
787+
// as this allows us to dynamically reload configurations not only
788+
// on the Agent but on the Server and Client too (they are
789+
// referencing the same keyloader).
790+
keyloader := a.config.TLSConfig.GetKeyLoader()
791+
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
792+
if err != nil {
793+
return err
782794
}
795+
a.config.TLSConfig = newConfig.TLSConfig
796+
a.config.TLSConfig.KeyLoader = keyloader
797+
return nil
798+
}
799+
800+
// Completely reload the agent's TLS configuration (moving from non-TLS to
801+
// TLS, or vice versa)
802+
// This does not handle errors in loading the new TLS configuration
803+
a.config.TLSConfig = newConfig.TLSConfig.Copy()
804+
805+
if newConfig.TLSConfig.IsEmpty() {
806+
a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext")
807+
} else {
808+
a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS")
783809
}
784810

785811
return nil
786812
}
787813

788-
// GetConfigCopy creates a replica of the agent's config, excluding locks
814+
// GetConfig creates a locked reference to the agent's config
789815
func (a *Agent) GetConfig() *Config {
816+
a.configLock.Lock()
817+
defer a.configLock.Unlock()
818+
790819
return a.config
791820
}
792821

0 commit comments

Comments
 (0)