Skip to content

Commit

Permalink
docs: how to troubleshoot consul connect envoy
Browse files Browse the repository at this point in the history
* largely a doc-ification of this commit message:
  d476780
  this doesn't spell out all the possible failure modes,
  but should be a good starting point for folks.

* connect: add doc link to envoy bootstrap error

* add Unwrap() to RecoverableError
  mainly for easier testing
  • Loading branch information
gulducat committed Feb 2, 2023
1 parent d881b23 commit f5a34fc
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .changelog/15908.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
docs: link to an envoy troubleshooting doc when envoy bootstrap fails
```
9 changes: 8 additions & 1 deletion client/allocrunner/taskrunner/envoy_bootstrap_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ const (
envoyBootstrapMaxJitter = 500 * time.Millisecond
)

var (
errEnvoyBootstrapError = errors.New("error creating bootstrap configuration for Connect proxy sidecar")
)

type consulTransportConfig struct {
HTTPAddr string // required
Auth string // optional, env CONSUL_HTTP_AUTH
Expand Down Expand Up @@ -373,7 +377,10 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
// Wrap the last error from Consul and set that as our status.
_, recoverable := cmdErr.(*exec.ExitError)
return structs.NewRecoverableError(
fmt.Errorf("error creating bootstrap configuration for Connect proxy sidecar: %v", cmdErr),
fmt.Errorf("%w: %v; see: <https://www.nomadproject.io/s/envoy-bootstrap-error>",
errEnvoyBootstrapError,
cmdErr,
),
recoverable,
)
}
Expand Down
4 changes: 2 additions & 2 deletions client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ func TestTaskRunner_EnvoyBootstrapHook_RecoverableError(t *testing.T) {

// Run the hook
err := h.Prestart(context.Background(), req, resp)
require.EqualError(t, err, "error creating bootstrap configuration for Connect proxy sidecar: exit status 1")
require.ErrorIs(t, err, errEnvoyBootstrapError)
require.True(t, structs.IsRecoverable(err))

// Assert it is not Done
Expand Down Expand Up @@ -760,7 +760,7 @@ func TestTaskRunner_EnvoyBootstrapHook_retryTimeout(t *testing.T) {

// Run the hook and get the error
err := h.Prestart(context.Background(), req, &resp)
require.EqualError(t, err, "error creating bootstrap configuration for Connect proxy sidecar: exit status 1")
require.ErrorIs(t, err, errEnvoyBootstrapError)

// Current time should be at least start time + total wait time
minimum := begin.Add(h.envoyBootstrapWaitTime)
Expand Down
6 changes: 6 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -11901,6 +11901,7 @@ type KeyringRequest struct {
type RecoverableError struct {
Err string
Recoverable bool
wrapped error
}

// NewRecoverableError is used to wrap an error and mark it as recoverable or
Expand All @@ -11913,6 +11914,7 @@ func NewRecoverableError(e error, recoverable bool) error {
return &RecoverableError{
Err: e.Error(),
Recoverable: recoverable,
wrapped: e,
}
}

Expand All @@ -11935,6 +11937,10 @@ func (r *RecoverableError) IsUnrecoverable() bool {
return !r.Recoverable
}

func (r *RecoverableError) Unwrap() error {
return r.wrapped
}

// Recoverable is an interface for errors to implement to indicate whether or
// not they are fatal or recoverable.
type Recoverable interface {
Expand Down
26 changes: 26 additions & 0 deletions website/content/docs/integrations/consul-connect.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,32 @@ dashes (`-`) are converted to underscores (`_`) in environment variables so
- Prior to Consul 1.9, the Envoy sidecar proxy will drop and stop accepting
connections while the Nomad agent is restarting.

## Troubleshooting

If the sidecar service is not running correctly, you can investigate
potential `envoy` failures in the following ways:

* Task logs in the associated `connect-*` task
* Task secrets (may contain sensitive information):
* envoy CLI command: `secrets/.envoy_bootstrap.cmd`
* environment variables: `secrets/.envoy_bootstrap.env`
* An extra Allocation log file: `alloc/logs/envoy_bootstrap.stderr.0`

For example, with an allocation ID starting with `b36a`:

```shell-session
nomad alloc status -short b36a # to get the connect-* task name
nomad alloc logs -task connect-proxy-count-api -stderr b36a
nomad alloc exec -task connect-proxy-count-api b36a cat secrets/.envoy_bootstrap.cmd
nomad alloc exec -task connect-proxy-count-api b36a cat secrets/.envoy_bootstrap.env
nomad alloc fs b36a alloc/logs/envoy_bootstrap.stderr.0
```

Note: If the alloc is unable to start successfully, debugging files may
only be accessible from the host filesystem. However, the sidecar task secrets
directory may not be available in systems where it is mounted in a temporary
filesystem.

[count-dashboard]: /img/count-dashboard.png
[consul_acl]: https://github.com/hashicorp/consul/issues/7414
[gh-9907]: https://github.com/hashicorp/nomad/issues/9907
Expand Down

0 comments on commit f5a34fc

Please sign in to comment.