Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker: periodically reconcile containers #6325

Merged
merged 9 commits into from
Oct 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions drivers/docker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,25 @@ var (
Name: pluginName,
}

danglingContainersBlock = hclspec.NewObject(map[string]*hclspec.Spec{
"enabled": hclspec.NewDefault(
hclspec.NewAttr("enabled", "bool", false),
hclspec.NewLiteral(`true`),
),
"period": hclspec.NewDefault(
hclspec.NewAttr("period", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"creation_grace": hclspec.NewDefault(
hclspec.NewAttr("creation_grace", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"dry_run": hclspec.NewDefault(
hclspec.NewAttr("dry_run", "bool", false),
hclspec.NewLiteral(`false`),
),
})

// configSpec is the hcl specification returned by the ConfigSchema RPC
// and is used to parse the contents of the 'plugin "docker" {...}' block.
// Example:
Expand Down Expand Up @@ -195,6 +214,10 @@ var (
hclspec.NewAttr("container", "bool", false),
hclspec.NewLiteral("true"),
),
"dangling_containers": hclspec.NewDefault(
hclspec.NewBlock("dangling_containers", false, danglingContainersBlock),
hclspec.NewLiteral("{}"),
),
})), hclspec.NewLiteral(`{
image = true
container = true
Expand Down Expand Up @@ -491,6 +514,28 @@ type DockerVolumeDriverConfig struct {
Options hclutils.MapStrStr `codec:"options"`
}

// ContainerGCConfig controls the behavior of the GC reconciler to detects
// dangling nomad containers that aren't tracked due to docker/nomad bugs
type ContainerGCConfig struct {
notnoop marked this conversation as resolved.
Show resolved Hide resolved
// Enabled controls whether container reconciler is enabled
Enabled bool `codec:"enabled"`

// DryRun indicates that reconciler should log unexpectedly running containers
// if found without actually killing them
DryRun bool `codec:"dry_run"`

// PeriodStr controls the frequency of scanning containers
PeriodStr string `codec:"period"`
period time.Duration `codec:"-"`

// CreationGraceStr is the duration allowed for a newly created container
// to live without being registered as a running task in nomad.
// A container is treated as leaked if it lived more than grace duration
// and haven't been registered in tasks.
CreationGraceStr string `codec:"creation_grace"`
CreationGrace time.Duration `codec:"-"`
}

type DriverConfig struct {
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
Expand Down Expand Up @@ -519,6 +564,8 @@ type GCConfig struct {
ImageDelay string `codec:"image_delay"`
imageDelayDuration time.Duration `codec:"-"`
Container bool `codec:"container"`

DanglingContainers ContainerGCConfig `codec:"dangling_containers"`
}

type VolumeConfig struct {
Expand All @@ -534,6 +581,8 @@ func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
return configSpec, nil
}

const danglingContainersCreationGraceMinimum = 1 * time.Minute

func (d *Driver) SetConfig(c *base.Config) error {
var config DriverConfig
if len(c.PluginConfig) != 0 {
Expand All @@ -551,6 +600,25 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.config.GC.imageDelayDuration = dur
}

if len(d.config.GC.DanglingContainers.PeriodStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.PeriodStr)
if err != nil {
return fmt.Errorf("failed to parse 'period' duration: %v", err)
}
d.config.GC.DanglingContainers.period = dur
notnoop marked this conversation as resolved.
Show resolved Hide resolved
}

if len(d.config.GC.DanglingContainers.CreationGraceStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.CreationGraceStr)
if err != nil {
return fmt.Errorf("failed to parse 'creation_grace' duration: %v", err)
}
if dur < danglingContainersCreationGraceMinimum {
return fmt.Errorf("creation_grace is less than minimum, %v", danglingContainersCreationGraceMinimum)
}
d.config.GC.DanglingContainers.CreationGrace = dur
}

if c.AgentConfig != nil {
d.clientConfig = c.AgentConfig.Driver
}
Expand All @@ -568,6 +636,8 @@ func (d *Driver) SetConfig(c *base.Config) error {

d.coordinator = newDockerCoordinator(coordinatorConfig)

d.reconciler = newReconciler(d)

return nil
}

Expand Down
19 changes: 18 additions & 1 deletion drivers/docker/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ var (
nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
)

const (
dockerLabelAllocID = "com.hashicorp.nomad.alloc_id"
)

type Driver struct {
// eventer is used to handle multiplexing of TaskEvents calls such that an
// event can be broadcast to all callers
Expand Down Expand Up @@ -108,6 +112,8 @@ type Driver struct {
// for use during fingerprinting.
detected bool
detectedLock sync.RWMutex

reconciler *containerReconciler
}

// NewDockerDriver returns a docker implementation of a driver plugin
Expand Down Expand Up @@ -309,6 +315,10 @@ CREATE:
// the container is started
runningContainer, err := client.InspectContainer(container.ID)
if err != nil {
client.RemoveContainer(docker.RemoveContainerOptions{
schmichael marked this conversation as resolved.
Show resolved Hide resolved
ID: container.ID,
Force: true,
})
msg := "failed to inspect started container"
d.logger.Error(msg, "error", err)
client.RemoveContainer(docker.RemoveContainerOptions{
Expand Down Expand Up @@ -977,9 +987,16 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T

if len(driverConfig.Labels) > 0 {
config.Labels = driverConfig.Labels
logger.Debug("applied labels on the container", "labels", config.Labels)
}

labels := make(map[string]string, len(driverConfig.Labels)+1)
for k, v := range driverConfig.Labels {
labels[k] = v
}
labels[dockerLabelAllocID] = task.AllocID
config.Labels = labels
logger.Debug("applied labels on the container", "labels", config.Labels)

config.Env = task.EnvList()

containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID)
Expand Down
36 changes: 35 additions & 1 deletion drivers/docker/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,8 @@ func TestDockerDriver_Labels(t *testing.T) {
t.Fatalf("err: %v", err)
}

require.Equal(t, 2, len(container.Config.Labels))
// expect to see 1 additional standard labels
require.Equal(t, len(cfg.Labels)+1, len(container.Config.Labels))
for k, v := range cfg.Labels {
require.Equal(t, v, container.Config.Labels[k])
}
Expand Down Expand Up @@ -1008,6 +1009,39 @@ func TestDockerDriver_CreateContainerConfig(t *testing.T) {
require.Equal(t, containerName, c.Name)
}

func TestDockerDriver_CreateContainerConfig_Labels(t *testing.T) {
t.Parallel()

task, cfg, _ := dockerTask(t)
task.AllocID = uuid.Generate()
task.JobName = "redis-demo-job"

cfg.Labels = map[string]string{
"user_label": "user_value",

// com.hashicorp.nomad. labels are reserved and
// cannot be overridden
"com.hashicorp.nomad.alloc_id": "bad_value",
}

require.NoError(t, task.EncodeConcreteDriverConfig(cfg))

dh := dockerDriverHarness(t, nil)
driver := dh.Impl().(*Driver)

c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
require.NoError(t, err)

expectedLabels := map[string]string{
// user provided labels
"user_label": "user_value",
// default labels
"com.hashicorp.nomad.alloc_id": task.AllocID,
}

require.Equal(t, expectedLabels, c.Config.Labels)
}

func TestDockerDriver_CreateContainerConfig_Logging(t *testing.T) {
t.Parallel()

Expand Down
4 changes: 4 additions & 0 deletions drivers/docker/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ import (
)

func (d *Driver) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
// start reconciler when we start fingerprinting
// this is the only method called when driver is launched properly
d.reconciler.Start()

ch := make(chan *drivers.Fingerprint)
go d.handleFingerprint(ctx, ch)
return ch, nil
Expand Down
Loading