Skip to content

Commit

Permalink
client: never embed alloc_dir in chroot
Browse files Browse the repository at this point in the history
Fixes #2522

Skip embedding client.alloc_dir when building chroot. If a user
configures a Nomad client agent so that the chroot_env will embed the
client.alloc_dir, Nomad will happily infinitely recurse while building
the chroot until something horrible happens. The best case scenario is
the filesystem's path length limit is hit. The worst case scenario is
disk space is exhausted.

A bad agent configuration will look something like this:

```hcl
data_dir = "/tmp/nomad-badagent"

client {
  enabled = true

  chroot_env {
    # Note that the source matches the data_dir
    "/tmp/nomad-badagent" = "/ohno"
    # ...
  }
}
```

Note that `/ohno/client` (the state_dir) will still be created but not
`/ohno/alloc` (the alloc_dir).
While I cannot think of a good reason why someone would want to embed
Nomad's client (and possibly server) directories in chroots, there
should be no cause for harm. chroots are only built when Nomad runs as
root, and Nomad disables running exec jobs as root by default. Therefore
even if client state is copied into chroots, it will be inaccessible to
tasks.

Skipping the `data_dir` and `{client,server}.state_dir` is possible, but
this PR attempts to implement the minimum viable solution to reduce risk
of unintended side effects or bugs.

When running tests as root in a vm without the fix, the following error
occurs:

```
=== RUN   TestAllocDir_SkipAllocDir
    alloc_dir_test.go:520:
                Error Trace:    alloc_dir_test.go:520
                Error:          Received unexpected error:
                                Couldn't create destination file /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/testtask/nomad/test/testtask/.../nomad/test/testtask/secrets/.nomad-mount: open /tmp/TestAllocDir_SkipAllocDir1457747331/001/nomad/test/.../testtask/secrets/.nomad-mount: file name too long
                Test:           TestAllocDir_SkipAllocDir
--- FAIL: TestAllocDir_SkipAllocDir (22.76s)
```

Also removed unused Copy methods on AllocDir and TaskDir structs.

Thanks to @eveld for not letting me forget about this!
  • Loading branch information
schmichael committed Oct 15, 2021
1 parent cc8d944 commit 8d77144
Show file tree
Hide file tree
Showing 14 changed files with 120 additions and 101 deletions.
40 changes: 12 additions & 28 deletions client/allocdir/alloc_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ type AllocDir struct {
// TaskDirs is a mapping of task names to their non-shared directory.
TaskDirs map[string]*TaskDir

// clientAllocDir is the client agent's root alloc directory. It must
// be excluded from chroots and is configured via client.alloc_dir.
clientAllocDir string

// built is true if Build has successfully run
built bool

Expand All @@ -104,44 +108,24 @@ type AllocDirFS interface {

// NewAllocDir initializes the AllocDir struct with allocDir as base path for
// the allocation directory.
func NewAllocDir(logger hclog.Logger, allocDir string) *AllocDir {
func NewAllocDir(logger hclog.Logger, clientAllocDir, allocID string) *AllocDir {
logger = logger.Named("alloc_dir")
allocDir := filepath.Join(clientAllocDir, allocID)
return &AllocDir{
AllocDir: allocDir,
SharedDir: filepath.Join(allocDir, SharedAllocName),
TaskDirs: make(map[string]*TaskDir),
logger: logger,
}
}

// Copy an AllocDir and all of its TaskDirs. Returns nil if AllocDir is
// nil.
func (d *AllocDir) Copy() *AllocDir {
if d == nil {
return nil
}

d.mu.RLock()
defer d.mu.RUnlock()

dcopy := &AllocDir{
AllocDir: d.AllocDir,
SharedDir: d.SharedDir,
TaskDirs: make(map[string]*TaskDir, len(d.TaskDirs)),
logger: d.logger,
}
for k, v := range d.TaskDirs {
dcopy.TaskDirs[k] = v.Copy()
clientAllocDir: clientAllocDir,
AllocDir: allocDir,
SharedDir: filepath.Join(allocDir, SharedAllocName),
TaskDirs: make(map[string]*TaskDir),
logger: logger,
}
return dcopy
}

// NewTaskDir creates a new TaskDir and adds it to the AllocDirs TaskDirs map.
func (d *AllocDir) NewTaskDir(name string) *TaskDir {
d.mu.Lock()
defer d.mu.Unlock()

td := newTaskDir(d.logger, d.AllocDir, name)
td := newTaskDir(d.logger, d.clientAllocDir, d.AllocDir, name)
d.TaskDirs[name] = td
return td
}
Expand Down
86 changes: 60 additions & 26 deletions client/allocdir/alloc_dir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"bytes"
"context"
"io"
"io/fs"
"io/ioutil"
"log"
"os"
Expand Down Expand Up @@ -53,7 +54,7 @@ func TestAllocDir_BuildAlloc(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
defer d.Destroy()
d.NewTaskDir(t1.Name)
d.NewTaskDir(t2.Name)
Expand Down Expand Up @@ -103,7 +104,7 @@ func TestAllocDir_MountSharedAlloc(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
defer d.Destroy()
if err := d.Build(); err != nil {
t.Fatalf("Build() failed: %v", err)
Expand Down Expand Up @@ -148,7 +149,7 @@ func TestAllocDir_Snapshot(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
defer d.Destroy()
if err := d.Build(); err != nil {
t.Fatalf("Build() failed: %v", err)
Expand Down Expand Up @@ -235,13 +236,13 @@ func TestAllocDir_Move(t *testing.T) {
defer os.RemoveAll(tmp2)

// Create two alloc dirs
d1 := NewAllocDir(testlog.HCLogger(t), tmp1)
d1 := NewAllocDir(testlog.HCLogger(t), tmp1, "test")
if err := d1.Build(); err != nil {
t.Fatalf("Build() failed: %v", err)
}
defer d1.Destroy()

d2 := NewAllocDir(testlog.HCLogger(t), tmp2)
d2 := NewAllocDir(testlog.HCLogger(t), tmp2, "test")
if err := d2.Build(); err != nil {
t.Fatalf("Build() failed: %v", err)
}
Expand Down Expand Up @@ -296,7 +297,7 @@ func TestAllocDir_EscapeChecking(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
if err := d.Build(); err != nil {
t.Fatalf("Build() failed: %v", err)
}
Expand Down Expand Up @@ -337,7 +338,7 @@ func TestAllocDir_ReadAt_SecretDir(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
if err := d.Build(); err != nil {
t.Fatalf("Build() failed: %v", err)
}
Expand Down Expand Up @@ -419,25 +420,6 @@ func TestAllocDir_CreateDir(t *testing.T) {
}
}

// TestAllocDir_Copy asserts that AllocDir.Copy does a deep copy of itself and
// all TaskDirs.
func TestAllocDir_Copy(t *testing.T) {
a := NewAllocDir(testlog.HCLogger(t), "foo")
a.NewTaskDir("bar")
a.NewTaskDir("baz")

b := a.Copy()

// Clear the logger
require.Equal(t, a, b)

// Make sure TaskDirs map is copied
a.NewTaskDir("new")
if b.TaskDirs["new"] != nil {
t.Errorf("TaskDirs map shared between copied")
}
}

func TestPathFuncs(t *testing.T) {
dir, err := ioutil.TempDir("", "nomadtest-pathfuncs")
if err != nil {
Expand Down Expand Up @@ -502,3 +484,55 @@ func TestAllocDir_DetectContentType(t *testing.T) {
require.Equal(expectedEncodings[file], res, "unexpected output for %v", file)
}
}

// TestAllocDir_SkipAllocDir asserts that building a chroot which contains
// itself will *not* infinitely recurse. AllocDirs should always skip embedding
// themselves into chroots.
//
// Warning: If this test fails it may fill your disk before failing, so be
// careful and/or confident.
func TestAllocDir_SkipAllocDir(t *testing.T) {
MountCompatible(t)

// Create root, alloc, and other dirs
rootDir := t.TempDir()

clientAllocDir := filepath.Join(rootDir, "nomad")
require.NoError(t, os.Mkdir(clientAllocDir, fs.ModeDir|0o777))

otherDir := filepath.Join(rootDir, "etc")
require.NoError(t, os.Mkdir(otherDir, fs.ModeDir|0o777))

// chroot contains client.alloc_dir! This could cause infinite
// recursion.
chroot := map[string]string{
rootDir: "/",
}

allocDir := NewAllocDir(testlog.HCLogger(t), clientAllocDir, "test")
taskDir := allocDir.NewTaskDir("testtask")

require.NoError(t, allocDir.Build())
defer allocDir.Destroy()

// Build chroot
err := taskDir.Build(true, chroot)
require.NoError(t, err)

// Assert other directory *was* embedded
embeddedOtherDir := filepath.Join(clientAllocDir, "test", "testtask", "etc")
if _, err := os.Stat(embeddedOtherDir); os.IsNotExist(err) {
t.Fatalf("expected other directory to exist at: %q", embeddedOtherDir)
}

// Assert client.alloc_dir was *not* embedded
embeddedChroot := filepath.Join(clientAllocDir, "test", "testtask", "nomad")
s, err := os.Stat(embeddedChroot)
if s != nil {
t.Logf("somehow you managed to embed the chroot without causing infinite recursion!")
t.Fatalf("expected chroot to not exist at: %q", embeddedChroot)
}
if !os.IsNotExist(err) {
t.Fatalf("expected chroot to not exist but error is: %v", err)
}
}
25 changes: 15 additions & 10 deletions client/allocdir/task_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,25 @@ type TaskDir struct {
// <task_dir>/secrets/
SecretsDir string

// skip embedding these paths in chroots. Used for avoiding embedding
// client.alloc_dir recursively.
skip map[string]struct{}

logger hclog.Logger
}

// newTaskDir creates a TaskDir struct with paths set. Call Build() to
// create paths on disk.
//
// Call AllocDir.NewTaskDir to create new TaskDirs
func newTaskDir(logger hclog.Logger, allocDir, taskName string) *TaskDir {
func newTaskDir(logger hclog.Logger, clientAllocDir, allocDir, taskName string) *TaskDir {
taskDir := filepath.Join(allocDir, taskName)

logger = logger.Named("task_dir").With("task_name", taskName)

// skip embedding client.alloc_dir in chroots
skip := map[string]struct{}{clientAllocDir: struct{}{}}

return &TaskDir{
AllocDir: allocDir,
Dir: taskDir,
Expand All @@ -59,21 +66,14 @@ func newTaskDir(logger hclog.Logger, allocDir, taskName string) *TaskDir {
SharedTaskDir: filepath.Join(taskDir, SharedAllocName),
LocalDir: filepath.Join(taskDir, TaskLocal),
SecretsDir: filepath.Join(taskDir, TaskSecrets),
skip: skip,
logger: logger,
}
}

// Copy a TaskDir. Panics if TaskDir is nil as TaskDirs should never be nil.
func (t *TaskDir) Copy() *TaskDir {
// No nested structures other than the logger which is safe to share,
// so just copy the struct
tcopy := *t
return &tcopy
}

// Build default directories and permissions in a task directory. chrootCreated
// allows skipping chroot creation if the caller knows it has already been
// done.
// done. client.alloc_dir will be skipped.
func (t *TaskDir) Build(createChroot bool, chroot map[string]string) error {
if err := os.MkdirAll(t.Dir, 0777); err != nil {
return err
Expand Down Expand Up @@ -149,6 +149,11 @@ func (t *TaskDir) buildChroot(entries map[string]string) error {
func (t *TaskDir) embedDirs(entries map[string]string) error {
subdirs := make(map[string]string)
for source, dest := range entries {
if _, ok := t.skip[source]; ok {
// source in skip list
continue
}

// Check to see if directory exists on host.
s, err := os.Stat(source)
if os.IsNotExist(err) {
Expand Down
8 changes: 4 additions & 4 deletions client/allocdir/task_dir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func TestTaskDir_EmbedNonexistent(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
defer d.Destroy()
td := d.NewTaskDir(t1.Name)
if err := d.Build(); err != nil {
Expand All @@ -39,7 +39,7 @@ func TestTaskDir_EmbedDirs(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
defer d.Destroy()
td := d.NewTaskDir(t1.Name)
if err := d.Build(); err != nil {
Expand Down Expand Up @@ -96,7 +96,7 @@ func TestTaskDir_NonRoot_Image(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
defer d.Destroy()
td := d.NewTaskDir(t1.Name)
if err := d.Build(); err != nil {
Expand All @@ -119,7 +119,7 @@ func TestTaskDir_NonRoot(t *testing.T) {
}
defer os.RemoveAll(tmp)

d := NewAllocDir(testlog.HCLogger(t), tmp)
d := NewAllocDir(testlog.HCLogger(t), tmp, "test")
defer d.Destroy()
td := d.NewTaskDir(t1.Name)
if err := d.Build(); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions client/allocdir/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ import (

// TestAllocDir returns a built alloc dir in a temporary directory and cleanup
// func.
func TestAllocDir(t testing.T, l hclog.Logger, prefix string) (*AllocDir, func()) {
func TestAllocDir(t testing.T, l hclog.Logger, prefix, id string) (*AllocDir, func()) {
dir, err := ioutil.TempDir("", prefix)
if err != nil {
t.Fatalf("Couldn't create temp dir: %v", err)
}

allocDir := NewAllocDir(l, dir)
allocDir := NewAllocDir(l, dir, id)

cleanup := func() {
if err := os.RemoveAll(dir); err != nil {
Expand Down
3 changes: 1 addition & 2 deletions client/allocrunner/alloc_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package allocrunner
import (
"context"
"fmt"
"path/filepath"
"sync"
"time"

Expand Down Expand Up @@ -227,7 +226,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) {
ar.allocBroadcaster = cstructs.NewAllocBroadcaster(ar.logger)

// Create alloc dir
ar.allocDir = allocdir.NewAllocDir(ar.logger, filepath.Join(config.ClientConfig.AllocDir, alloc.ID))
ar.allocDir = allocdir.NewAllocDir(ar.logger, config.ClientConfig.AllocDir, alloc.ID)

ar.taskHookCoordinator = newTaskHookCoordinator(ar.logger, tg.Tasks)

Expand Down
8 changes: 4 additions & 4 deletions client/allocrunner/consul_grpc_sock_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func TestConsulGRPCSocketHook_PrerunPostrun_Ok(t *testing.T) {

logger := testlog.HCLogger(t)

allocDir, cleanup := allocdir.TestAllocDir(t, logger, "EnvoyBootstrap")
allocDir, cleanup := allocdir.TestAllocDir(t, logger, "EnvoyBootstrap", alloc.ID)
defer cleanup()

// Start the unix socket proxy
Expand Down Expand Up @@ -105,15 +105,15 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) {

logger := testlog.HCLogger(t)

allocDir, cleanup := allocdir.TestAllocDir(t, logger, "EnvoyBootstrap")
defer cleanup()

// A config without an Addr or GRPCAddr is invalid.
consulConfig := &config.ConsulConfig{}

alloc := mock.Alloc()
connectAlloc := mock.ConnectAlloc()

allocDir, cleanup := allocdir.TestAllocDir(t, logger, "EnvoyBootstrap", alloc.ID)
defer cleanup()

{
// An alloc without a Connect proxy sidecar should not return
// an error.
Expand Down
4 changes: 2 additions & 2 deletions client/allocrunner/consul_http_sock_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestConsulSocketHook_PrerunPostrun_Ok(t *testing.T) {

logger := testlog.HCLogger(t)

allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "ConnectNativeTask")
allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "ConnectNativeTask", alloc.ID)
defer cleanupDir()

// start unix socket proxy
Expand Down Expand Up @@ -93,7 +93,7 @@ func TestConsulHTTPSocketHook_Prerun_Error(t *testing.T) {

logger := testlog.HCLogger(t)

allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "ConnectNativeTask")
allocDir, cleanupDir := allocdir.TestAllocDir(t, logger, "ConnectNativeTask", alloc.ID)
defer cleanupDir()

consulConfig := new(config.ConsulConfig)
Expand Down
Loading

0 comments on commit 8d77144

Please sign in to comment.