Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for Nomad - Consul Connect integration with Consul ACLs #6905

Merged
merged 28 commits into from
Jan 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fa00fbf
client: enable nomad client to request and set SI tokens for tasks
shoenig Nov 27, 2019
ad3afe5
nomad: proxy requests for Service Identity tokens between Clients and…
shoenig Dec 6, 2019
f70998e
client: enable envoy bootstrap hook to set SI token
shoenig Dec 18, 2019
2e2811f
nomad: fixup token policy validation
shoenig Dec 19, 2019
6a0e31d
nomad: handle SI token revocations concurrently
shoenig Jan 2, 2020
369520e
agent: re-enable the server in dev mode
shoenig Jan 7, 2020
a264779
client: remove unused indirection for referencing consul executable
shoenig Jan 7, 2020
869942c
client: skip task SI token file load failure if testing as root
shoenig Jan 7, 2020
53e3d8e
comments: cleanup some leftover debug comments and such
shoenig Jan 7, 2020
7416a4f
nomad,client: apply smaller PR suggestions
shoenig Jan 15, 2020
00bfedd
nomad,client: apply more comment/style PR tweaks
shoenig Jan 15, 2020
2116d35
client: set context timeout around SI token derivation
shoenig Jan 15, 2020
28e8963
client: manage TR kill from parent on SI token derivation failure
shoenig Jan 15, 2020
d88f25f
nomad: fix leftover missed refactoring in consul policy checking
shoenig Jan 15, 2020
ee47a27
nomad: make TaskGroup.UsesConnect helper a public helper
shoenig Jan 30, 2020
6f1503a
client: PR cleanup - shadow context variable
shoenig Jan 30, 2020
d3ba869
client: PR cleanup - improved logging around kill task in SIDS hook
shoenig Jan 30, 2020
50cc565
client: additional test cases around failures in SIDS hook
shoenig Jan 30, 2020
86219d1
tests: skip some SIDS hook tests if running tests as root
shoenig Jan 30, 2020
5b2bb63
e2e: e2e test for connect with consul acls
shoenig Jan 13, 2020
8062cde
e2e: remove forgotten unused field from new struct
shoenig Jan 24, 2020
814b50e
e2e: do not use eventually when waiting for allocs
shoenig Jan 24, 2020
968f040
e2e: uncomment test case that is not broken
shoenig Jan 24, 2020
676af6a
e2e: use hclfmt on consul acls policy config files
shoenig Jan 27, 2020
02aeb17
e2e: agent token was only being set for server0
shoenig Jan 27, 2020
e4d7b5f
e2e: remove redundant extra API call for getting allocs
shoenig Jan 27, 2020
45920e9
e2e: setup consul ACLs a little more correctly
shoenig Jan 28, 2020
0d6254e
Merge pull request #6982 from hashicorp/f-e2e-consul-acls
shoenig Jan 31, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions client/allocrunner/alloc_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ type allocRunner struct {
// registering services and checks
consulClient consul.ConsulServiceAPI

// sidsClient is the client used by the service identity hook for
// managing SI tokens
sidsClient consul.ServiceIdentityAPI

// vaultClient is the used to manage Vault tokens
vaultClient vaultclient.VaultClient

Expand Down Expand Up @@ -157,6 +161,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) {
alloc: alloc,
clientConfig: config.ClientConfig,
consulClient: config.Consul,
sidsClient: config.ConsulSI,
vaultClient: config.Vault,
tasks: make(map[string]*taskrunner.TaskRunner, len(tg.Tasks)),
waitCh: make(chan struct{}),
Expand Down Expand Up @@ -210,6 +215,7 @@ func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error {
StateDB: ar.stateDB,
StateUpdater: ar,
Consul: ar.consulClient,
ConsulSI: ar.sidsClient,
Vault: ar.vaultClient,
DeviceStatsReporter: ar.deviceStatsReporter,
DeviceManager: ar.devicemanager,
Expand Down
3 changes: 3 additions & 0 deletions client/allocrunner/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ type Config struct {
// Consul is the Consul client used to register task services and checks
Consul consul.ConsulServiceAPI

// ConsulSI is the Consul client used to manage service identity tokens.
ConsulSI consul.ServiceIdentityAPI

// Vault is the Vault client to use to retrieve Vault tokens
Vault vaultclient.VaultClient

Expand Down
144 changes: 119 additions & 25 deletions client/allocrunner/taskrunner/envoybootstrap_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,28 @@ import (
"bytes"
"context"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"time"

log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
agentconsul "github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/pkg/errors"
)

var _ interfaces.TaskPrestartHook = &envoyBootstrapHook{}
const envoyBootstrapHookName = "envoy_bootstrap"

type envoyBootstrapHookConfig struct {
alloc *structs.Allocation
consulHTTPAddr string
logger hclog.Logger
}

const (
envoyBaseAdminPort = 19000
Expand All @@ -27,27 +35,28 @@ const (
// envoyBootstrapHook writes the bootstrap config for the Connect Envoy proxy
// sidecar.
type envoyBootstrapHook struct {
// alloc is the allocation with the envoy task being bootstrapped.
alloc *structs.Allocation

// Bootstrapping Envoy requires talking directly to Consul to generate
// the bootstrap.json config. Runtime Envoy configuration is done via
// Consul's gRPC endpoint.
consulHTTPAddr string

logger log.Logger
// logger is used to log things
logger hclog.Logger
}

func newEnvoyBootstrapHook(alloc *structs.Allocation, consulHTTPAddr string, logger log.Logger) *envoyBootstrapHook {
h := &envoyBootstrapHook{
alloc: alloc,
consulHTTPAddr: consulHTTPAddr,
func newEnvoyBootstrapHook(c *envoyBootstrapHookConfig) *envoyBootstrapHook {
return &envoyBootstrapHook{
alloc: c.alloc,
consulHTTPAddr: c.consulHTTPAddr,
logger: c.logger.Named(envoyBootstrapHookName),
}
h.logger = logger.Named(h.Name())
return h
}

func (envoyBootstrapHook) Name() string {
return "envoy_bootstrap"
return envoyBootstrapHookName
}

func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
Expand All @@ -59,7 +68,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP

serviceName := req.Task.Kind.Value()
if serviceName == "" {
return fmt.Errorf("Connect proxy sidecar does not specify service name")
return errors.New("connect proxy sidecar does not specify service name")
}

tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
Expand All @@ -73,13 +82,12 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP
}

if service == nil {
return fmt.Errorf("Connect proxy sidecar task exists but no services configured with a sidecar")
return errors.New("connect proxy sidecar task exists but no services configured with a sidecar")
}

h.logger.Debug("bootstrapping Connect proxy sidecar", "task", req.Task.Name, "service", serviceName)

//TODO Should connect directly to Consul if the sidecar is running on
// the host netns.
//TODO Should connect directly to Consul if the sidecar is running on the host netns.
grpcAddr := "unix://" + allocdir.AllocGRPCSocket

// Envoy runs an administrative API on the loopback interface. If multiple sidecars
Expand All @@ -92,24 +100,35 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP

// Envoy bootstrap configuration may contain a Consul token, so write
// it to the secrets directory like Vault tokens.
fn := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json")
bootstrapFilePath := filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json")

id := agentconsul.MakeAllocServiceID(h.alloc.ID, "group-"+tg.Name, service)
h.logger.Debug("bootstrapping envoy", "sidecar_for", service.Name, "boostrap_file", fn, "sidecar_for_id", id, "grpc_addr", grpcAddr, "admin_bind", envoyAdminBind)

h.logger.Debug("bootstrapping envoy", "sidecar_for", service.Name, "bootstrap_file", bootstrapFilePath, "sidecar_for_id", id, "grpc_addr", grpcAddr, "admin_bind", envoyAdminBind)

siToken, err := h.maybeLoadSIToken(req.Task.Name, req.TaskDir.SecretsDir)
if err != nil {
h.logger.Error("failed to generate envoy bootstrap config", "sidecar_for", service.Name)
return errors.Wrap(err, "failed to generate envoy bootstrap config")
}
h.logger.Debug("check for SI token for task", "task", req.Task.Name, "exists", siToken != "")

bootstrapArgs := envoyBootstrapArgs{
sidecarFor: id,
grpcAddr: grpcAddr,
consulHTTPAddr: h.consulHTTPAddr,
envoyAdminBind: envoyAdminBind,
siToken: siToken,
}.args()

// Since Consul services are registered asynchronously with this task
// hook running, retry a small number of times with backoff.
for tries := 3; ; tries-- {
cmd := exec.CommandContext(ctx, "consul", "connect", "envoy",
"-grpc-addr", grpcAddr,
"-http-addr", h.consulHTTPAddr,
"-admin-bind", envoyAdminBind,
"-bootstrap",
"-sidecar-for", id,
)

cmd := exec.CommandContext(ctx, "consul", bootstrapArgs...)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started thinking that maybe we want to use a different context here. This command should execute rather instantly. If it does block, blocking it will block prestart until the task is restarted. Thoughts?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you suggesting we could wrap the context with a timeout, e.g.

ctx, cancel = context.WithTimeout(ctx, 3 * time.Second)
cmd := exec.CommandContext(ctx, "consul", bootstrapArgs...)

That would prevent the exec from hanging forever and also let the TR cancel it. If so I'd like to pull the whole body of the retry loop into a function so we can make use of defer for both the ctx's cancel and the fd.Close

Copy link
Member

@schmichael schmichael Jan 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The loop is because this code depends-on and runs-asynchronously-with group service registration in the alloc runner hook. Since our Consul service client does not block until a service is successfully registered we have no way of knowing whether this will succeed on the first try.

I don't think this command blocks if the expected service doesn't exist. I think it just errors. Hence the retries.

A conservative timeout (3+ seconds) just to ensure it doesn't block forever for some unexpected reason doesn't seem like a bad idea though.

Ideally this code would die someday when Consul provides an API or library for generating the bootstrap config.


// Redirect output to secrets/envoy_bootstrap.json
fd, err := os.Create(fn)
fd, err := os.Create(bootstrapFilePath)
if err != nil {
return fmt.Errorf("error creating secrets/envoy_bootstrap.json for envoy: %v", err)
}
Expand Down Expand Up @@ -138,7 +157,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskP
// occurs, and (b) the file will either be rewritten on
// retry or eventually garbage collected if the task
// fails.
os.Remove(fn)
os.Remove(bootstrapFilePath)

// ExitErrors are recoverable since they indicate the
// command was runnable but exited with a unsuccessful
Expand Down Expand Up @@ -174,3 +193,78 @@ func buildEnvoyAdminBind(alloc *structs.Allocation, taskName string) string {
}
return fmt.Sprintf("localhost:%d", port)
}

func (h *envoyBootstrapHook) writeConfig(filename, config string) error {
if err := ioutil.WriteFile(filename, []byte(config), 0440); err != nil {
_ = os.Remove(filename)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_ = os.Remove(filename)
os.Remove(filename)

return err
}
return nil
}

func (h *envoyBootstrapHook) execute(cmd *exec.Cmd) (string, error) {
var (
stdout bytes.Buffer
stderr bytes.Buffer
)

cmd.Stdout = &stdout
cmd.Stderr = &stderr

if err := cmd.Run(); err != nil {
_, recoverable := err.(*exec.ExitError)
// ExitErrors are recoverable since they indicate the
// command was runnable but exited with a unsuccessful
// error code.
return stderr.String(), structs.NewRecoverableError(err, recoverable)
}
return stdout.String(), nil
}

// envoyBootstrapArgs is used to accumulate CLI arguments that will be passed
// along to the exec invocation of consul which will then generate the bootstrap
// configuration file for envoy.
type envoyBootstrapArgs struct {
sidecarFor string
grpcAddr string
envoyAdminBind string
consulHTTPAddr string
siToken string
}

// args returns the CLI arguments consul needs in the correct order, with the
// -token argument present or not present depending on whether it is set.
func (e envoyBootstrapArgs) args() []string {
shoenig marked this conversation as resolved.
Show resolved Hide resolved
arguments := []string{
"connect",
"envoy",
"-grpc-addr", e.grpcAddr,
"-http-addr", e.consulHTTPAddr,
"-admin-bind", e.envoyAdminBind,
"-bootstrap",
"-sidecar-for", e.sidecarFor,
}
if e.siToken != "" {
arguments = append(arguments, "-token", e.siToken)
}
return arguments
}

// maybeLoadSIToken reads the SI token saved to disk in the secrets directory
// by the service identities prestart hook. This envoy bootstrap hook blocks
// until the sids hook completes, so if the SI token is required to exist (i.e.
// Consul ACLs are enabled), it will be in place by the time we try to read it.
func (h *envoyBootstrapHook) maybeLoadSIToken(task, dir string) (string, error) {
tokenPath := filepath.Join(dir, sidsTokenFile)
token, err := ioutil.ReadFile(tokenPath)
if err != nil {
if !os.IsNotExist(err) {
h.logger.Error("failed to load SI token", "task", task, "error", err)
return "", errors.Wrapf(err, "failed to load SI token for %s", task)
}
h.logger.Trace("no SI token to load", "task", task)
return "", nil // token file does not exist
}
h.logger.Trace("recovered pre-existing SI token", "task", task)
return string(token), nil
}
Loading