-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
support for Nomad - Consul Connect integration with Consul ACLs #6905
Changes from 14 commits
fa00fbf
ad3afe5
f70998e
2e2811f
6a0e31d
369520e
a264779
869942c
53e3d8e
7416a4f
00bfedd
2116d35
28e8963
d88f25f
ee47a27
6f1503a
d3ba869
50cc565
86219d1
5b2bb63
8062cde
814b50e
968f040
676af6a
02aeb17
e4d7b5f
45920e9
0d6254e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -4,20 +4,28 @@ import ( | |||||
"bytes" | ||||||
"context" | ||||||
"fmt" | ||||||
"io/ioutil" | ||||||
"os" | ||||||
"os/exec" | ||||||
"path/filepath" | ||||||
"time" | ||||||
|
||||||
log "github.com/hashicorp/go-hclog" | ||||||
"github.com/hashicorp/go-hclog" | ||||||
"github.com/hashicorp/nomad/client/allocdir" | ||||||
"github.com/hashicorp/nomad/client/allocrunner/interfaces" | ||||||
agentconsul "github.com/hashicorp/nomad/command/agent/consul" | ||||||
"github.com/hashicorp/nomad/helper" | ||||||
"github.com/hashicorp/nomad/nomad/structs" | ||||||
"github.com/pkg/errors" | ||||||
) | ||||||
|
||||||
var _ interfaces.TaskPrestartHook = &envoyBootstrapHook{} | ||||||
const envoyBootstrapHookName = "envoy_bootstrap" | ||||||
|
||||||
type envoyBootstrapHookConfig struct { | ||||||
alloc *structs.Allocation | ||||||
consulHTTPAddr string | ||||||
logger hclog.Logger | ||||||
} | ||||||
|
||||||
const ( | ||||||
envoyBaseAdminPort = 19000 | ||||||
|
@@ -27,27 +35,28 @@ const ( | |||||
// envoyBootstrapHook writes the bootstrap config for the Connect Envoy proxy | ||||||
// sidecar. | ||||||
type envoyBootstrapHook struct { | ||||||
// alloc is the allocation with the envoy task being bootstrapped. | ||||||
alloc *structs.Allocation | ||||||
|
||||||
// Bootstrapping Envoy requires talking directly to Consul to generate | ||||||
// the bootstrap.json config. Runtime Envoy configuration is done via | ||||||
// Consul's gRPC endpoint. | ||||||
consulHTTPAddr string | ||||||
|
||||||
logger log.Logger | ||||||
// logger is used to log things | ||||||
logger hclog.Logger | ||||||
} | ||||||
|
||||||
func newEnvoyBootstrapHook(alloc *structs.Allocation, consulHTTPAddr string, logger log.Logger) *envoyBootstrapHook { | ||||||
h := &envoyBootstrapHook{ | ||||||
alloc: alloc, | ||||||
consulHTTPAddr: consulHTTPAddr, | ||||||
func newEnvoyBootstrapHook(c *envoyBootstrapHookConfig) *envoyBootstrapHook { | ||||||
return &envoyBootstrapHook{ | ||||||
alloc: c.alloc, | ||||||
consulHTTPAddr: c.consulHTTPAddr, | ||||||
logger: c.logger.Named(envoyBootstrapHookName), | ||||||
} | ||||||
h.logger = logger.Named(h.Name()) | ||||||
return h | ||||||
} | ||||||
|
||||||
func (envoyBootstrapHook) Name() string { | ||||||
return "envoy_bootstrap" | ||||||
return envoyBootstrapHookName | ||||||
} | ||||||
|
||||||
func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { | ||||||
|
@@ -59,7 +68,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP | |||||
|
||||||
serviceName := req.Task.Kind.Value() | ||||||
if serviceName == "" { | ||||||
return fmt.Errorf("Connect proxy sidecar does not specify service name") | ||||||
return errors.New("connect proxy sidecar does not specify service name") | ||||||
} | ||||||
|
||||||
tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) | ||||||
|
@@ -73,13 +82,12 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP | |||||
} | ||||||
|
||||||
if service == nil { | ||||||
return fmt.Errorf("Connect proxy sidecar task exists but no services configured with a sidecar") | ||||||
return errors.New("connect proxy sidecar task exists but no services configured with a sidecar") | ||||||
} | ||||||
|
||||||
h.logger.Debug("bootstrapping Connect proxy sidecar", "task", req.Task.Name, "service", serviceName) | ||||||
|
||||||
//TODO Should connect directly to Consul if the sidecar is running on | ||||||
// the host netns. | ||||||
//TODO Should connect directly to Consul if the sidecar is running on the host netns. | ||||||
grpcAddr := "unix://" + allocdir.AllocGRPCSocket | ||||||
|
||||||
// Envoy runs an administrative API on the loopback interface. If multiple sidecars | ||||||
|
@@ -92,24 +100,35 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP | |||||
|
||||||
// Envoy bootstrap configuration may contain a Consul token, so write | ||||||
// it to the secrets directory like Vault tokens. | ||||||
fn := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json") | ||||||
bootstrapFilePath := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json") | ||||||
|
||||||
id := agentconsul.MakeAllocServiceID(h.alloc.ID, "group-"+tg.Name, service) | ||||||
h.logger.Debug("bootstrapping envoy", "sidecar_for", service.Name, "boostrap_file", fn, "sidecar_for_id", id, "grpc_addr", grpcAddr, "admin_bind", envoyAdminBind) | ||||||
|
||||||
h.logger.Debug("bootstrapping envoy", "sidecar_for", service.Name, "bootstrap_file", bootstrapFilePath, "sidecar_for_id", id, "grpc_addr", grpcAddr, "admin_bind", envoyAdminBind) | ||||||
|
||||||
siToken, err := h.maybeLoadSIToken(req.Task.Name, req.TaskDir.SecretsDir) | ||||||
if err != nil { | ||||||
h.logger.Error("failed to generate envoy bootstrap config", "sidecar_for", service.Name) | ||||||
return errors.Wrap(err, "failed to generate envoy bootstrap config") | ||||||
} | ||||||
h.logger.Debug("check for SI token for task", "task", req.Task.Name, "exists", siToken != "") | ||||||
|
||||||
bootstrapArgs := envoyBootstrapArgs{ | ||||||
sidecarFor: id, | ||||||
grpcAddr: grpcAddr, | ||||||
consulHTTPAddr: h.consulHTTPAddr, | ||||||
envoyAdminBind: envoyAdminBind, | ||||||
siToken: siToken, | ||||||
}.args() | ||||||
|
||||||
// Since Consul services are registered asynchronously with this task | ||||||
// hook running, retry a small number of times with backoff. | ||||||
for tries := 3; ; tries-- { | ||||||
cmd := exec.CommandContext(ctx, "consul", "connect", "envoy", | ||||||
"-grpc-addr", grpcAddr, | ||||||
"-http-addr", h.consulHTTPAddr, | ||||||
"-admin-bind", envoyAdminBind, | ||||||
"-bootstrap", | ||||||
"-sidecar-for", id, | ||||||
) | ||||||
|
||||||
cmd := exec.CommandContext(ctx, "consul", bootstrapArgs...) | ||||||
|
||||||
// Redirect output to secrets/envoy_bootstrap.json | ||||||
fd, err := os.Create(fn) | ||||||
fd, err := os.Create(bootstrapFilePath) | ||||||
if err != nil { | ||||||
return fmt.Errorf("error creating secrets/envoy_bootstrap.json for envoy: %v", err) | ||||||
} | ||||||
|
@@ -138,7 +157,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP | |||||
// occurs, and (b) the file will either be rewritten on | ||||||
// retry or eventually garbage collected if the task | ||||||
// fails. | ||||||
os.Remove(fn) | ||||||
os.Remove(bootstrapFilePath) | ||||||
|
||||||
// ExitErrors are recoverable since they indicate the | ||||||
// command was runnable but exited with a unsuccessful | ||||||
|
@@ -174,3 +193,87 @@ func buildEnvoyAdminBind(alloc *structs.Allocation, taskName string) string { | |||||
} | ||||||
return fmt.Sprintf("localhost:%d", port) | ||||||
} | ||||||
|
||||||
func (h *envoyBootstrapHook) writeConfig(filename, config string) error { | ||||||
if err := ioutil.WriteFile(filename, []byte(config), 0440); err != nil { | ||||||
_ = os.Remove(filename) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
return err | ||||||
} | ||||||
return nil | ||||||
} | ||||||
|
||||||
func (_ *envoyBootstrapHook) retry(ctx context.Context) bool { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this function used anywhere? Using an uninitialized receiver is often a smell imo that you don't need it. This logic is simple enough that if you are using it I'd just inline the select and maybe make a const out of the timeout. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||||||
select { | ||||||
case <-ctx.Done(): | ||||||
return false | ||||||
case <-time.After(2 * time.Second): | ||||||
return true | ||||||
} | ||||||
} | ||||||
|
||||||
func (h *envoyBootstrapHook) execute(cmd *exec.Cmd) (string, error) { | ||||||
var ( | ||||||
stdout bytes.Buffer | ||||||
stderr bytes.Buffer | ||||||
) | ||||||
|
||||||
cmd.Stdout = &stdout | ||||||
cmd.Stderr = &stderr | ||||||
|
||||||
if err := cmd.Run(); err != nil { | ||||||
_, recoverable := err.(*exec.ExitError) | ||||||
// ExitErrors are recoverable since they indicate the | ||||||
// command was runnable but exited with a unsuccessful | ||||||
// error code. | ||||||
return stderr.String(), structs.NewRecoverableError(err, recoverable) | ||||||
} | ||||||
return stdout.String(), nil | ||||||
} | ||||||
|
||||||
// envoyBootstrapArgs is used to accumulate CLI arguments that will be passed | ||||||
// along to the exec invocation of consul which will then generate the bootstrap | ||||||
// configuration file for envoy. | ||||||
type envoyBootstrapArgs struct { | ||||||
sidecarFor string | ||||||
grpcAddr string | ||||||
envoyAdminBind string | ||||||
consulHTTPAddr string | ||||||
siToken string | ||||||
} | ||||||
|
||||||
// args returns the CLI arguments consul needs in the correct order, with the | ||||||
// -token argument present or not present depending on whether it is set. | ||||||
func (e envoyBootstrapArgs) args() []string { | ||||||
shoenig marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
arguments := []string{ | ||||||
"connect", | ||||||
"envoy", | ||||||
"-grpc-addr", e.grpcAddr, | ||||||
"-http-addr", e.consulHTTPAddr, | ||||||
"-admin-bind", e.envoyAdminBind, | ||||||
"-bootstrap", | ||||||
"-sidecar-for", e.sidecarFor, | ||||||
} | ||||||
if e.siToken != "" { | ||||||
arguments = append(arguments, "-token", e.siToken) | ||||||
} | ||||||
return arguments | ||||||
} | ||||||
|
||||||
// maybeLoadSIToken reads the SI token saved to disk in the secrets directory | ||||||
// by the service identities prestart hook. This envoy bootstrap hook blocks | ||||||
// until the sids hook completes, so if the SI token is required to exist (i.e. | ||||||
// Consul ACLs are enabled), it will be in place by the time we try to read it. | ||||||
func (h *envoyBootstrapHook) maybeLoadSIToken(task, dir string) (string, error) { | ||||||
tokenPath := filepath.Join(dir, sidsTokenFile) | ||||||
token, err := ioutil.ReadFile(tokenPath) | ||||||
if err != nil { | ||||||
if !os.IsNotExist(err) { | ||||||
h.logger.Error("failed to load SI token", "task", task, "error", err) | ||||||
return "", errors.Wrapf(err, "failed to load SI token for %s", task) | ||||||
} | ||||||
h.logger.Trace("no SI token to load", "task", task) | ||||||
return "", nil // token file does not exist | ||||||
} | ||||||
h.logger.Trace("recovered pre-existing SI token", "task", task) | ||||||
return string(token), nil | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I started thinking that maybe we want to use a different context here. This command should execute rather instantly. If it does block, blocking it will block prestart until the task is restarted. Thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you suggesting we could wrap the context with a timeout, e.g.
That would prevent the exec from hanging forever and also let the TR cancel it. If so I'd like to pull the whole body of the retry loop into a function so we can make use of
defer
for both thectx
'scancel
and thefd.Close
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The loop is because this code depends-on and runs-asynchronously-with group service registration in the alloc runner hook. Since our Consul service client does not block until a service is successfully registered we have no way of knowing whether this will succeed on the first try.
I don't think this command blocks if the expected service doesn't exist. I think it just errors. Hence the retries.
A conservative timeout (3+ seconds) just to ensure it doesn't block forever for some unexpected reason doesn't seem like a bad idea though.
Ideally this code would die someday when Consul provides an API or library for generating the bootstrap config.