diff --git a/.changelog/24658.txt b/.changelog/24658.txt new file mode 100644 index 00000000000..f46da955490 --- /dev/null +++ b/.changelog/24658.txt @@ -0,0 +1,3 @@ +```release-note:bug +networking: check network namespaces on Linux during client restarts and fail the allocation if an existing namespace is invalid +``` diff --git a/client/allocrunner/network_hook.go b/client/allocrunner/network_hook.go index 570943b3795..bddcf1715c7 100644 --- a/client/allocrunner/network_hook.go +++ b/client/allocrunner/network_hook.go @@ -5,6 +5,7 @@ package allocrunner import ( "context" + "errors" "fmt" hclog "github.com/hashicorp/go-hclog" @@ -28,6 +29,8 @@ const ( dockerNetSpecHostnameKey = "docker_sandbox_hostname" ) +var ErrCNICheckFailed = errors.New("network namespace already exists but was misconfigured") + type networkIsolationSetter interface { SetNetworkIsolation(*drivers.NetworkIsolationSpec) } @@ -132,6 +135,9 @@ func (h *networkHook) Prerun() error { Hostname: interpolatedNetworks[0].Hostname, } + var checkedOnce bool + +CREATE: spec, created, err := h.manager.CreateNetwork(h.alloc.ID, &networkCreateReq) if err != nil { return fmt.Errorf("failed to create network for alloc: %v", err) @@ -142,11 +148,26 @@ func (h *networkHook) Prerun() error { h.isolationSetter.SetNetworkIsolation(spec) } - if created { - status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec) + if spec != nil { + status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec, created) if err != nil { + // if the netns already existed but is invalid, we get + // ErrCNICheckFailed. We'll try to recover from this one time by + // recreating the netns from scratch before giving up + if errors.Is(err, ErrCNICheckFailed) && !checkedOnce { + checkedOnce = true + destroyErr := h.manager.DestroyNetwork(h.alloc.ID, spec) + if destroyErr != nil { + return fmt.Errorf("%w: destroying network to retry failed: %v", err, destroyErr) + } + goto CREATE + } + return fmt.Errorf("failed to configure networking for alloc: %v", err) } + if status == nil { + return nil // netns already existed and was correctly configured + } // If the driver set the sandbox hostname label, then we will use that // to set the HostsConfig.Hostname. Otherwise, identify the sandbox diff --git a/client/allocrunner/network_hook_test.go b/client/allocrunner/network_hook_test.go index 99845f875bc..9e4be2da89a 100644 --- a/client/allocrunner/network_hook_test.go +++ b/client/allocrunner/network_hook_test.go @@ -4,6 +4,7 @@ package allocrunner import ( + "fmt" "testing" "github.com/hashicorp/nomad/ci" @@ -14,6 +15,7 @@ import ( "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" "github.com/hashicorp/nomad/plugins/drivers/testutils" + "github.com/hashicorp/nomad/testutil" "github.com/shoenig/test" "github.com/shoenig/test/must" ) @@ -65,7 +67,7 @@ func TestNetworkHook_Prerun_Postrun_group(t *testing.T) { MockNetworkManager: testutils.MockNetworkManager{ CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) { test.Eq(t, alloc.ID, allocID) - return spec, false, nil + return spec, true, nil }, DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error { @@ -137,3 +139,145 @@ func TestNetworkHook_Prerun_Postrun_host(t *testing.T) { must.NoError(t, hook.Postrun()) must.True(t, destroyCalled) } + +// TestNetworkHook_Prerun_Postrun_ExistingNetNS tests that the prerun and +// postrun hooks call the Setup and Destroy with the expected behaviors when the +// network namespace already exists (typical of agent restarts and host reboots) +func TestNetworkHook_Prerun_Postrun_ExistingNetNS(t *testing.T) { + ci.Parallel(t) + + alloc := mock.Alloc() + alloc.Job.TaskGroups[0].Networks = []*structs.NetworkResource{ + {Mode: "bridge"}, + } + + spec := &drivers.NetworkIsolationSpec{ + Mode: drivers.NetIsolationModeGroup, + Path: "test", + Labels: map[string]string{"abc": "123"}, + } + isolationSetter := &mockNetworkIsolationSetter{t: t, expectedSpec: spec} + statusSetter := &mockNetworkStatusSetter{t: t, expectedStatus: nil} + + callCounts := testutil.NewCallCounter() + + nm := &testutils.MockDriver{ + MockNetworkManager: testutils.MockNetworkManager{ + CreateNetworkF: func(allocID string, req *drivers.NetworkCreateRequest) (*drivers.NetworkIsolationSpec, bool, error) { + test.Eq(t, alloc.ID, allocID) + callCounts.Inc("CreateNetwork") + return spec, false, nil + }, + + DestroyNetworkF: func(allocID string, netSpec *drivers.NetworkIsolationSpec) error { + test.Eq(t, alloc.ID, allocID) + test.Eq(t, spec, netSpec) + callCounts.Inc("DestroyNetwork") + return nil + }, + }, + } + + fakePlugin := newMockCNIPlugin() + + configurator := &cniNetworkConfigurator{ + nodeAttrs: map[string]string{ + "plugins.cni.version.bridge": "1.6.1", + }, + nodeMeta: map[string]string{}, + logger: testlog.HCLogger(t), + cni: fakePlugin, + nsOpts: &nsOpts{}, + } + + envBuilder := taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region) + + testCases := []struct { + name string + cniVersion string + checkErrs []error + setupErrs []string + expectPrerunCreateNetworkCalls int + expectPrerunDestroyNetworkCalls int + expectCheckCalls int + expectSetupCalls int + expectPostrunDestroyNetworkCalls int + expectPrerunError string + }{ + { + name: "good check", + cniVersion: "1.6.1", + expectPrerunCreateNetworkCalls: 1, + expectPrerunDestroyNetworkCalls: 0, + expectCheckCalls: 1, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 1, + }, + { + name: "initial check fails", + cniVersion: "1.6.1", + checkErrs: []error{fmt.Errorf("whatever")}, + expectPrerunCreateNetworkCalls: 2, + expectPrerunDestroyNetworkCalls: 1, + expectCheckCalls: 2, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 2, + }, + { + name: "check fails twice", + cniVersion: "1.6.1", + checkErrs: []error{ + fmt.Errorf("whatever"), + fmt.Errorf("whatever"), + }, + expectPrerunCreateNetworkCalls: 2, + expectPrerunDestroyNetworkCalls: 1, + expectCheckCalls: 2, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 2, + expectPrerunError: "failed to configure networking for alloc: network namespace already exists but was misconfigured: whatever", + }, + { + name: "old CNI version skips check", + cniVersion: "1.2.0", + expectPrerunCreateNetworkCalls: 1, + expectPrerunDestroyNetworkCalls: 0, + expectCheckCalls: 0, + expectSetupCalls: 0, + expectPostrunDestroyNetworkCalls: 1, + }, + } + + for _, tc := range testCases { + + t.Run(tc.name, func(t *testing.T) { + callCounts.Reset() + fakePlugin.counter.Reset() + fakePlugin.checkErrors = tc.checkErrs + configurator.nodeAttrs["plugins.cni.version.bridge"] = tc.cniVersion + hook := newNetworkHook(testlog.HCLogger(t), isolationSetter, + alloc, nm, configurator, statusSetter, envBuilder.Build()) + + err := hook.Prerun() + if tc.expectPrerunError == "" { + must.NoError(t, err) + } else { + must.EqError(t, err, tc.expectPrerunError) + } + + test.Eq(t, tc.expectPrerunDestroyNetworkCalls, + callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after prerun")) + test.Eq(t, tc.expectPrerunCreateNetworkCalls, + callCounts.Get()["CreateNetwork"], test.Sprint("CreateNetwork calls after prerun")) + + test.Eq(t, tc.expectCheckCalls, fakePlugin.counter.Get()["Check"], test.Sprint("Check calls")) + test.Eq(t, tc.expectSetupCalls, fakePlugin.counter.Get()["Setup"], test.Sprint("Setup calls")) + + must.NoError(t, hook.Postrun()) + test.Eq(t, tc.expectPostrunDestroyNetworkCalls, + callCounts.Get()["DestroyNetwork"], test.Sprint("DestroyNetwork calls after postrun")) + + }) + + } +} diff --git a/client/allocrunner/networking.go b/client/allocrunner/networking.go index e98af0b4f89..07a98660795 100644 --- a/client/allocrunner/networking.go +++ b/client/allocrunner/networking.go @@ -14,7 +14,7 @@ import ( // NetworkConfigurator sets up and tears down the interfaces, routes, firewall // rules, etc for the configured networking mode of the allocation. type NetworkConfigurator interface { - Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) + Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error } @@ -23,7 +23,7 @@ type NetworkConfigurator interface { // require further configuration type hostNetworkConfigurator struct{} -func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec, bool) (*structs.AllocNetworkStatus, error) { return nil, nil } func (h *hostNetworkConfigurator) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error { @@ -41,10 +41,10 @@ type synchronizedNetworkConfigurator struct { nc NetworkConfigurator } -func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (s *synchronizedNetworkConfigurator) Setup(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) { networkingGlobalMutex.Lock() defer networkingGlobalMutex.Unlock() - return s.nc.Setup(ctx, allocation, spec) + return s.nc.Setup(ctx, allocation, spec, created) } func (s *synchronizedNetworkConfigurator) Teardown(ctx context.Context, allocation *structs.Allocation, spec *drivers.NetworkIsolationSpec) error { diff --git a/client/allocrunner/networking_bridge_linux.go b/client/allocrunner/networking_bridge_linux.go index 63a11c8f093..1204837b0dd 100644 --- a/client/allocrunner/networking_bridge_linux.go +++ b/client/allocrunner/networking_bridge_linux.go @@ -120,12 +120,12 @@ func (b *bridgeNetworkConfigurator) ensureForwardingRules() error { } // Setup calls the CNI plugins with the add action -func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) { if err := b.ensureForwardingRules(); err != nil { return nil, fmt.Errorf("failed to initialize table forwarding rules: %v", err) } - return b.cni.Setup(ctx, alloc, spec) + return b.cni.Setup(ctx, alloc, spec, created) } // Teardown calls the CNI plugins with the delete action diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 1c877fbe2ff..b2edd58670f 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -28,6 +28,7 @@ import ( consulIPTables "github.com/hashicorp/consul/sdk/iptables" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-set/v3" + "github.com/hashicorp/go-version" "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/envoy" @@ -138,8 +139,19 @@ func addNomadWorkloadCNIArgs(logger log.Logger, alloc *structs.Allocation, cniAr } } +var supportsCNICheck = mustCNICheckConstraint() + +func mustCNICheckConstraint() version.Constraints { + version, err := version.NewConstraint(">= 1.3.0") + if err != nil { + panic(err) + } + return version +} + // Setup calls the CNI plugins with the add action -func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { +func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec, created bool) (*structs.AllocNetworkStatus, error) { + if err := c.ensureCNIInitialized(); err != nil { return nil, fmt.Errorf("cni not initialized: %w", err) } @@ -171,6 +183,31 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc cniArgs[ConsulIPTablesConfigEnvVar] = string(iptablesCfg) } + if !created { + // The netns will not be created if it already exists, typically on + // agent restart. If the configuration of a prexisting netns is wrong + // (ex. after a host reboot for docker created netns), networking will + // be broken. CNI's ADD command is not idempotent so we can't simply try + // again. Run CHECK to verify the network is still valid. Older plugins + // have a broken CHECK, so we have to allow the buggy behavior in the + // case of a host reboot with docker-created netns there. + cniVersion, err := version.NewSemver(c.nodeAttrs["plugins.cni.version.bridge"]) + if err == nil && supportsCNICheck.Check(cniVersion) { + err := c.cni.Check(ctx, alloc.ID, spec.Path, + c.nsOpts.withCapabilityPortMap(portMaps.ports), + c.nsOpts.withArgs(cniArgs), + ) + if err != nil { + return nil, fmt.Errorf("%w: %w", ErrCNICheckFailed, err) + } + } else { + c.logger.Trace("network namespace exists but could not check if networking is valid because bridge plugin version was <1.3.0: continuing anyways") + return nil, nil + } + c.logger.Trace("network namespace exists and passed check: skipping setup") + return nil, nil + } + // Depending on the version of bridge cni plugin used, a known race could occure // where two alloc attempt to create the nomad bridge at the same time, resulting // in one of them to fail. This rety attempts to overcome those erroneous failures. diff --git a/client/allocrunner/networking_cni_mock_test.go b/client/allocrunner/networking_cni_mock_test.go index 9d8e67e22a9..2bc06e06819 100644 --- a/client/allocrunner/networking_cni_mock_test.go +++ b/client/allocrunner/networking_cni_mock_test.go @@ -18,6 +18,7 @@ var _ cni.CNI = &mockCNIPlugin{} type mockCNIPlugin struct { counter *testutil.CallCounter setupErrors []string + checkErrors []error } func newMockCNIPlugin() *mockCNIPlugin { @@ -25,7 +26,9 @@ func newMockCNIPlugin() *mockCNIPlugin { callCounts := testutil.NewCallCounter() callCounts.Reset() return &mockCNIPlugin{ - counter: callCounts, + counter: callCounts, + setupErrors: []string{}, + checkErrors: []error{}, } } @@ -65,6 +68,13 @@ func (f *mockCNIPlugin) Remove(ctx context.Context, id, path string, opts ...cni } func (f *mockCNIPlugin) Check(ctx context.Context, id, path string, opts ...cni.NamespaceOpts) error { + if f.counter != nil { + f.counter.Inc("Check") + } + numOfCalls := f.counter.Get()["Check"] + if numOfCalls <= len(f.checkErrors) { + return f.checkErrors[numOfCalls-1] + } return nil } diff --git a/client/allocrunner/networking_cni_test.go b/client/allocrunner/networking_cni_test.go index 798d641adb4..f76f2b2462e 100644 --- a/client/allocrunner/networking_cni_test.go +++ b/client/allocrunner/networking_cni_test.go @@ -296,7 +296,7 @@ func TestSetup(t *testing.T) { } // method under test - result, err := c.Setup(context.Background(), alloc, spec) + result, err := c.Setup(context.Background(), alloc, spec, true) if tc.expectErr == "" { must.NoError(t, err) must.Eq(t, tc.expectResult, result) diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index 2734093f0be..a724f458eb5 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -13,6 +13,19 @@ upgrade. However, specific versions of Nomad may have more details provided for their upgrades as a result of new features or changed behavior. This page is used to document those details separately from the standard upgrade flow. +## Nomad 1.9.5 + +#### CNI plugins + +Nomad 1.9.5 includes a bug fix for restoring allocation networking after a +client host reboot. This fix requires recent versions of the CNI reference +plugins (minimum 1.2.0) and will fallback to the existing behavior if the CNI +reference plugins cannot support the fix. + +We recommend installing the CNI reference plugins from the [CNI project release +page](https://github.com/containernetworking/plugins/releases) rather than your +Linux distribution's package manager. + ## Nomad 1.9.4 #### Security updates to default deny lists diff --git a/website/content/partials/install/install-cni-plugins.mdx b/website/content/partials/install/install-cni-plugins.mdx index d916ba11e68..080482be139 100644 --- a/website/content/partials/install/install-cni-plugins.mdx +++ b/website/content/partials/install/install-cni-plugins.mdx @@ -4,19 +4,23 @@ that use network namespaces. Refer to the [CNI Plugins external guide](https://www.cni.dev/plugins/current/) for details on individual plugins. The following series of commands determines your operating system architecture, -downloads the [CNI 1.5.1 -release](https://github.com/containernetworking/plugins/releases/tag/v1.5.1), +downloads the [CNI 1.6.1 +release](https://github.com/containernetworking/plugins/releases/tag/v1.6.1), and then extracts the CNI plugin binaries into the `/opt/cni/bin` directory. Update the `CNI_PLUGIN_VERSION` value to use a different release version. ```shell-session $ export ARCH_CNI=$( [ $(uname -m) = aarch64 ] && echo arm64 || echo amd64) -$ export CNI_PLUGIN_VERSION=v1.5.1 +$ export CNI_PLUGIN_VERSION=v1.6.1 $ curl -L -o cni-plugins.tgz "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGIN_VERSION}/cni-plugins-linux-${ARCH_CNI}-${CNI_PLUGIN_VERSION}".tgz && \ sudo mkdir -p /opt/cni/bin && \ sudo tar -C /opt/cni/bin -xzf cni-plugins.tgz ``` +Your Linux distribution's package manager may provide the CNI reference plugins +but we recommend installing the most recent stable version to ensure you have +fixes for known bugs shipping in those versions. + Nomad looks for CNI plugin binaries by default in the `/opt/cni/bin` directory. However, you may install in the binaries in a different directory and then configure using the [`cni_path`](/nomad/docs/configuration/client#cni_path)