diff --git a/.changelog/15407.txt b/.changelog/15407.txt new file mode 100644 index 00000000000..65e776a46cd --- /dev/null +++ b/.changelog/15407.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: detect and cleanup leaked iptables rules +``` diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 0ca806fb8f3..638e07eeb37 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -12,12 +12,14 @@ import ( "math/rand" "os" "path/filepath" + "regexp" "sort" "strings" "time" cni "github.com/containerd/go-cni" cnilibrary "github.com/containernetworking/cni/libcni" + "github.com/coreos/go-iptables/iptables" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" @@ -226,7 +228,101 @@ func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Al return err } - return c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))) + if err := c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil { + // create a real handle to iptables + ipt, iptErr := iptables.New() + if iptErr != nil { + return fmt.Errorf("failed to detect iptables: %w", iptErr) + } + // most likely the pause container was removed from underneath nomad + return c.forceCleanup(ipt, alloc.ID) + } + + return nil +} + +// IPTables is a subset of iptables.IPTables +type IPTables interface { + List(table, chain string) ([]string, error) + Delete(table, chain string, rule ...string) error + ClearAndDeleteChain(table, chain string) error +} + +var ( + // ipRuleRe is used to parse a postrouting iptables rule created by nomad, e.g. + // -A POSTROUTING -s 172.26.64.191/32 -m comment --comment "name: \"nomad\" id: \"6b235529-8111-4bbe-520b-d639b1d2a94e\"" -j CNI-50e58ea77dc52e0c731e3799 + ipRuleRe = regexp.MustCompile(`-A POSTROUTING -s (\S+) -m comment --comment "name: \\"nomad\\" id: \\"([[:xdigit:]-]+)\\"" -j (CNI-[[:xdigit:]]+)`) +) + +// forceCleanup is the backup plan for removing the iptables rule and chain associated with +// an allocation that was using bridge networking. The cni library refuses to handle a +// dirty state - e.g. the pause container is removed out of band, and so we must cleanup +// iptables ourselves to avoid leaking rules. +func (c *cniNetworkConfigurator) forceCleanup(ipt IPTables, allocID string) error { + const ( + natTable = "nat" + postRoutingChain = "POSTROUTING" + commentFmt = `--comment "name: \"nomad\" id: \"%s\""` + ) + + // list the rules on the POSTROUTING chain of the nat table + rules, err := ipt.List(natTable, postRoutingChain) + if err != nil { + return fmt.Errorf("failed to list iptables rules: %w", err) + } + + // find the POSTROUTING rule associated with our allocation + matcher := fmt.Sprintf(commentFmt, allocID) + var ruleToPurge string + for _, rule := range rules { + if strings.Contains(rule, matcher) { + ruleToPurge = rule + break + } + } + + // no rule found for our allocation, just give up + if ruleToPurge == "" { + return fmt.Errorf("failed to find postrouting rule for alloc %s", allocID) + } + + // re-create the rule we need to delete, as tokens + subs := ipRuleRe.FindStringSubmatch(ruleToPurge) + if len(subs) != 4 { + return fmt.Errorf("failed to parse postrouting rule for alloc %s", allocID) + } + cidr := subs[1] + id := subs[2] + chainID := subs[3] + toDel := []string{ + `-s`, + cidr, + `-m`, + `comment`, + `--comment`, + `name: "nomad" id: "` + id + `"`, + `-j`, + chainID, + } + + // remove the jump rule + ok := true + if err = ipt.Delete(natTable, postRoutingChain, toDel...); err != nil { + c.logger.Warn("failed to remove iptables nat.POSTROUTING rule", "alloc_id", allocID, "chain", chainID, "error", err) + ok = false + } + + // remote the associated chain + if err = ipt.ClearAndDeleteChain(natTable, chainID); err != nil { + c.logger.Warn("failed to remove iptables nat chain", "chain", chainID, "error", err) + ok = false + } + + if !ok { + return fmt.Errorf("failed to cleanup iptables rules for alloc %s", allocID) + } + + return nil } func (c *cniNetworkConfigurator) ensureCNIInitialized() error { @@ -240,7 +336,7 @@ func (c *cniNetworkConfigurator) ensureCNIInitialized() error { // getPortMapping builds a list of portMapping structs that are used as the // portmapping capability arguments for the portmap CNI plugin func getPortMapping(alloc *structs.Allocation, ignoreHostIP bool) []cni.PortMapping { - ports := []cni.PortMapping{} + var ports []cni.PortMapping if len(alloc.AllocatedResources.Shared.Ports) == 0 && len(alloc.AllocatedResources.Shared.Networks) > 0 { for _, network := range alloc.AllocatedResources.Shared.Networks { diff --git a/client/allocrunner/networking_cni_test.go b/client/allocrunner/networking_cni_test.go index bc759272f50..3bc8f859908 100644 --- a/client/allocrunner/networking_cni_test.go +++ b/client/allocrunner/networking_cni_test.go @@ -1,19 +1,129 @@ //go:build linux -// +build linux package allocrunner import ( + "errors" "net" "testing" - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +type mockIPTables struct { + listCall [2]string + listRules []string + listErr error + + deleteCall [2]string + deleteErr error + + clearCall [2]string + clearErr error +} + +func (ipt *mockIPTables) List(table, chain string) ([]string, error) { + ipt.listCall[0], ipt.listCall[1] = table, chain + return ipt.listRules, ipt.listErr +} + +func (ipt *mockIPTables) Delete(table, chain string, rule ...string) error { + ipt.deleteCall[0], ipt.deleteCall[1] = table, chain + return ipt.deleteErr +} + +func (ipt *mockIPTables) ClearAndDeleteChain(table, chain string) error { + ipt.clearCall[0], ipt.clearCall[1] = table, chain + return ipt.clearErr +} + +func (ipt *mockIPTables) assert(t *testing.T, jumpChain string) { + // List assertions + must.Eq(t, "nat", ipt.listCall[0]) + must.Eq(t, "POSTROUTING", ipt.listCall[1]) + + // Delete assertions + must.Eq(t, "nat", ipt.deleteCall[0]) + must.Eq(t, "POSTROUTING", ipt.deleteCall[1]) + + // Clear assertions + must.Eq(t, "nat", ipt.clearCall[0]) + must.Eq(t, jumpChain, ipt.clearCall[1]) +} + +func TestCNI_forceCleanup(t *testing.T) { + t.Run("ok", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + listRules: []string{ + `-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`, + `-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`, + `-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`, + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + `-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`, + `-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`, + `-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.NoError(t, err) + ipt.assert(t, "CNI-5d36f286cfbb35c5776509ec") + }) + + t.Run("missing allocation", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + listRules: []string{ + `-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`, + `-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`, + `-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`, + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"262d57a7-8f85-f3a4-9c3b-120c00ccbff1\"" -j CNI-5d36f286cfbb35c5776509ec`, + `-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`, + `-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`, + `-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to find postrouting rule for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) + + t.Run("list error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{listErr: errors.New("list error")} + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to list iptables rules: list error") + }) + + t.Run("delete error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + deleteErr: errors.New("delete error"), + listRules: []string{ + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) + + t.Run("clear error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + clearErr: errors.New("clear error"), + listRules: []string{ + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) +} + // TestCNI_cniToAllocNet_Fallback asserts if a CNI plugin result lacks an IP on // its sandbox interface, the first IP found is used. func TestCNI_cniToAllocNet_Fallback(t *testing.T) {