-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Client/Server TLS dynamic reload #3492
Changes from 23 commits
7b74957
b1f8772
156297c
d4754d5
184fbfe
c65857c
bee883c
18fdd31
359358d
9ba0e6a
a4af400
e3cc098
c70702e
11089b2
21c1c1d
bbc5686
8de260f
d97c91c
0805c41
028ba9f
d443098
5170301
cab0830
c6eee01
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1001,3 +1001,156 @@ func TestClient_ValidateMigrateToken_ACLDisabled(t *testing.T) { | |
|
||
assert.Equal(c.ValidateMigrateToken("", ""), true) | ||
} | ||
|
||
func TestClient_ReloadTLS_UpgradePlaintextToTLS(t *testing.T) { | ||
t.Parallel() | ||
assert := assert.New(t) | ||
|
||
s1, addr := testServer(t, func(c *nomad.Config) { | ||
c.Region = "regionFoo" | ||
}) | ||
defer s1.Shutdown() | ||
testutil.WaitForLeader(t, s1.RPC) | ||
|
||
const ( | ||
cafile = "../helper/tlsutil/testdata/ca.pem" | ||
foocert = "../helper/tlsutil/testdata/nomad-foo.pem" | ||
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" | ||
) | ||
|
||
c1 := testClient(t, func(c *config.Config) { | ||
c.Servers = []string{addr} | ||
}) | ||
defer c1.Shutdown() | ||
|
||
// Registering a node over plaintext should succeed | ||
{ | ||
req := structs.NodeSpecificRequest{ | ||
NodeID: c1.Node().ID, | ||
QueryOptions: structs.QueryOptions{Region: "regionFoo"}, | ||
} | ||
|
||
testutil.WaitForResult(func() (bool, error) { | ||
var out structs.SingleNodeResponse | ||
err := c1.RPC("Node.GetNode", &req, &out) | ||
if err != nil { | ||
return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err) | ||
} | ||
return true, nil | ||
}, | ||
func(err error) { | ||
t.Fatalf(err.Error()) | ||
}, | ||
) | ||
} | ||
|
||
newConfig := &nconfig.TLSConfig{ | ||
EnableHTTP: true, | ||
EnableRPC: true, | ||
VerifyServerHostname: true, | ||
CAFile: cafile, | ||
CertFile: foocert, | ||
KeyFile: fookey, | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't you want to assert the node registered using plaintext? |
||
err := c1.reloadTLSConnections(newConfig) | ||
assert.Nil(err) | ||
|
||
// Registering a node over plaintext should fail after the node has upgraded | ||
// to TLS | ||
{ | ||
req := structs.NodeSpecificRequest{ | ||
NodeID: c1.Node().ID, | ||
QueryOptions: structs.QueryOptions{Region: "regionFoo"}, | ||
} | ||
testutil.WaitForResult(func() (bool, error) { | ||
var out structs.SingleNodeResponse | ||
err := c1.RPC("Node.GetNode", &req, &out) | ||
if err == nil { | ||
return false, fmt.Errorf("client RPC succeeded when it should have failed:\n%+v", err) | ||
} | ||
return true, nil | ||
}, | ||
func(err error) { | ||
t.Fatalf(err.Error()) | ||
}, | ||
) | ||
} | ||
} | ||
|
||
func TestClient_ReloadTLS_DowngradeTLSToPlaintext(t *testing.T) { | ||
t.Parallel() | ||
assert := assert.New(t) | ||
|
||
s1, addr := testServer(t, func(c *nomad.Config) { | ||
c.Region = "regionFoo" | ||
}) | ||
defer s1.Shutdown() | ||
testutil.WaitForLeader(t, s1.RPC) | ||
|
||
const ( | ||
cafile = "../helper/tlsutil/testdata/ca.pem" | ||
foocert = "../helper/tlsutil/testdata/nomad-foo.pem" | ||
fookey = "../helper/tlsutil/testdata/nomad-foo-key.pem" | ||
) | ||
|
||
c1 := testClient(t, func(c *config.Config) { | ||
c.Servers = []string{addr} | ||
c.TLSConfig = &nconfig.TLSConfig{ | ||
EnableHTTP: true, | ||
EnableRPC: true, | ||
VerifyServerHostname: true, | ||
CAFile: cafile, | ||
CertFile: foocert, | ||
KeyFile: fookey, | ||
} | ||
}) | ||
defer c1.Shutdown() | ||
|
||
// assert that when one node is running in encrypted mode, a RPC request to a | ||
// node running in plaintext mode should fail | ||
{ | ||
req := structs.NodeSpecificRequest{ | ||
NodeID: c1.Node().ID, | ||
QueryOptions: structs.QueryOptions{Region: "regionFoo"}, | ||
} | ||
testutil.WaitForResult(func() (bool, error) { | ||
var out structs.SingleNodeResponse | ||
err := c1.RPC("Node.GetNode", &req, &out) | ||
if err == nil { | ||
return false, fmt.Errorf("client RPC succeeded when it should have failed :\n%+v", err) | ||
} | ||
return true, nil | ||
}, | ||
func(err error) { | ||
t.Fatalf(err.Error()) | ||
}, | ||
) | ||
} | ||
|
||
newConfig := &nconfig.TLSConfig{} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assert it isn't registered |
||
err := c1.reloadTLSConnections(newConfig) | ||
assert.Nil(err) | ||
|
||
// assert that when both nodes are in plaintext mode, a RPC request should | ||
// succeed | ||
{ | ||
req := structs.NodeSpecificRequest{ | ||
NodeID: c1.Node().ID, | ||
QueryOptions: structs.QueryOptions{Region: "regionFoo"}, | ||
} | ||
testutil.WaitForResult(func() (bool, error) { | ||
var out structs.SingleNodeResponse | ||
err := c1.RPC("Node.GetNode", &req, &out) | ||
if err != nil { | ||
return false, fmt.Errorf("client RPC failed when it should have succeeded:\n%+v", err) | ||
} | ||
return true, nil | ||
}, | ||
func(err error) { | ||
t.Fatalf(err.Error()) | ||
}, | ||
) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -730,37 +730,66 @@ func (a *Agent) Stats() map[string]map[string]string { | |
return stats | ||
} | ||
|
||
// ShouldReload determines if we should reload the configuration and agent | ||
// connections. If the TLS Configuration has not changed, we shouldn't reload. | ||
func (a *Agent) ShouldReload(newConfig *Config) (bool, bool) { | ||
a.configLock.Lock() | ||
defer a.configLock.Unlock() | ||
if a.config.TLSConfig.Equals(newConfig.TLSConfig) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you need to acquire |
||
return false, false | ||
} | ||
|
||
return true, true // requires a reload of both agent and http server | ||
} | ||
|
||
// Reload handles configuration changes for the agent. Provides a method that | ||
// is easier to unit test, as this action is invoked via SIGHUP. | ||
func (a *Agent) Reload(newConfig *Config) error { | ||
a.configLock.Lock() | ||
defer a.configLock.Unlock() | ||
|
||
if newConfig.TLSConfig != nil { | ||
|
||
// TODO(chelseakomlo) In a later PR, we will introduce the ability to reload | ||
// TLS configuration if the agent is not running with TLS enabled. | ||
if a.config.TLSConfig != nil { | ||
// Reload the certificates on the keyloader and on success store the | ||
// updated TLS config. It is important to reuse the same keyloader | ||
// as this allows us to dynamically reload configurations not only | ||
// on the Agent but on the Server and Client too (they are | ||
// referencing the same keyloader). | ||
keyloader := a.config.TLSConfig.GetKeyLoader() | ||
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) | ||
if err != nil { | ||
return err | ||
} | ||
a.config.TLSConfig = newConfig.TLSConfig | ||
a.config.TLSConfig.KeyLoader = keyloader | ||
if newConfig == nil || newConfig.TLSConfig == nil { | ||
return fmt.Errorf("cannot reload agent with nil configuration") | ||
} | ||
|
||
// This is just a TLS configuration reload, we don't need to refresh | ||
// existing network connections | ||
if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { | ||
|
||
// Reload the certificates on the keyloader and on success store the | ||
// updated TLS config. It is important to reuse the same keyloader | ||
// as this allows us to dynamically reload configurations not only | ||
// on the Agent but on the Server and Client too (they are | ||
// referencing the same keyloader). | ||
keyloader := a.config.TLSConfig.GetKeyLoader() | ||
_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) | ||
if err != nil { | ||
return err | ||
} | ||
a.config.TLSConfig = newConfig.TLSConfig | ||
a.config.TLSConfig.KeyLoader = keyloader | ||
return nil | ||
} | ||
|
||
// Completely reload the agent's TLS configuration (moving from non-TLS to | ||
// TLS, or vice versa) | ||
// This does not handle errors in loading the new TLS configuration | ||
a.config.TLSConfig = newConfig.TLSConfig.Copy() | ||
|
||
if newConfig.TLSConfig.IsEmpty() { | ||
a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext") | ||
} else { | ||
a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS") | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// GetConfigCopy creates a replica of the agent's config, excluding locks | ||
// GetConfig creates a locked reference to the agent's config | ||
func (a *Agent) GetConfig() *Config { | ||
a.configLock.Lock() | ||
defer a.configLock.Unlock() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Uhoh... the comment says excluding locks. ...but it also says "replica" and we don't do a copy. I don't know what the right behavior for this method is, but we should probably quick audit callers and figure it out. :( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm. Great point. It should either return a copy or a locked config. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think GetConfig is now a different function than its comment (#3492 (diff)). If we need a GetConfigCopy, we can create it. After doing a quick look at the callers, it seems GetConfig is used in read-only mode instead of creating an actual copy. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please update the comment to clarify it simply returns a config. I'm not sure it's safe to return a reference instead of a copy as callers of this method have no way to acquire the configLock to guard accesses... Diving into that seems out of scope for this PR though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See rename here: 675311f#diff-c2b91142a0ac15d7fb511420cafb2f61R802 |
||
|
||
return a.config | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the only place that can throw any errors here the OutgoingTLSWrapper method?