Skip to content

Commit

Permalink
backport of commit 99fb7d6 (#15414)
Browse files Browse the repository at this point in the history
This pull request was automerged via backport-assistant
  • Loading branch information
hc-github-team-nomad-core authored Nov 28, 2022
1 parent 0d2e39f commit e074036
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .changelog/15407.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
client: detect and cleanup leaked iptables rules
```
100 changes: 98 additions & 2 deletions client/allocrunner/networking_cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@ import (
"math/rand"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"

cni "github.com/containerd/go-cni"
cnilibrary "github.com/containernetworking/cni/libcni"
"github.com/coreos/go-iptables/iptables"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
Expand Down Expand Up @@ -226,7 +228,101 @@ func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Al
return err
}

return c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP)))
if err := c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil {
// create a real handle to iptables
ipt, iptErr := iptables.New()
if iptErr != nil {
return fmt.Errorf("failed to detect iptables: %w", iptErr)
}
// most likely the pause container was removed from underneath nomad
return c.forceCleanup(ipt, alloc.ID)
}

return nil
}

// IPTables is a subset of iptables.IPTables
type IPTables interface {
List(table, chain string) ([]string, error)
Delete(table, chain string, rule ...string) error
ClearAndDeleteChain(table, chain string) error
}

var (
// ipRuleRe is used to parse a postrouting iptables rule created by nomad, e.g.
// -A POSTROUTING -s 172.26.64.191/32 -m comment --comment "name: \"nomad\" id: \"6b235529-8111-4bbe-520b-d639b1d2a94e\"" -j CNI-50e58ea77dc52e0c731e3799
ipRuleRe = regexp.MustCompile(`-A POSTROUTING -s (\S+) -m comment --comment "name: \\"nomad\\" id: \\"([[:xdigit:]-]+)\\"" -j (CNI-[[:xdigit:]]+)`)
)

// forceCleanup is the backup plan for removing the iptables rule and chain associated with
// an allocation that was using bridge networking. The cni library refuses to handle a
// dirty state - e.g. the pause container is removed out of band, and so we must cleanup
// iptables ourselves to avoid leaking rules.
func (c *cniNetworkConfigurator) forceCleanup(ipt IPTables, allocID string) error {
const (
natTable = "nat"
postRoutingChain = "POSTROUTING"
commentFmt = `--comment "name: \"nomad\" id: \"%s\""`
)

// list the rules on the POSTROUTING chain of the nat table
rules, err := ipt.List(natTable, postRoutingChain)
if err != nil {
return fmt.Errorf("failed to list iptables rules: %w", err)
}

// find the POSTROUTING rule associated with our allocation
matcher := fmt.Sprintf(commentFmt, allocID)
var ruleToPurge string
for _, rule := range rules {
if strings.Contains(rule, matcher) {
ruleToPurge = rule
break
}
}

// no rule found for our allocation, just give up
if ruleToPurge == "" {
return fmt.Errorf("failed to find postrouting rule for alloc %s", allocID)
}

// re-create the rule we need to delete, as tokens
subs := ipRuleRe.FindStringSubmatch(ruleToPurge)
if len(subs) != 4 {
return fmt.Errorf("failed to parse postrouting rule for alloc %s", allocID)
}
cidr := subs[1]
id := subs[2]
chainID := subs[3]
toDel := []string{
`-s`,
cidr,
`-m`,
`comment`,
`--comment`,
`name: "nomad" id: "` + id + `"`,
`-j`,
chainID,
}

// remove the jump rule
ok := true
if err = ipt.Delete(natTable, postRoutingChain, toDel...); err != nil {
c.logger.Warn("failed to remove iptables nat.POSTROUTING rule", "alloc_id", allocID, "chain", chainID, "error", err)
ok = false
}

// remote the associated chain
if err = ipt.ClearAndDeleteChain(natTable, chainID); err != nil {
c.logger.Warn("failed to remove iptables nat chain", "chain", chainID, "error", err)
ok = false
}

if !ok {
return fmt.Errorf("failed to cleanup iptables rules for alloc %s", allocID)
}

return nil
}

func (c *cniNetworkConfigurator) ensureCNIInitialized() error {
Expand All @@ -240,7 +336,7 @@ func (c *cniNetworkConfigurator) ensureCNIInitialized() error {
// getPortMapping builds a list of portMapping structs that are used as the
// portmapping capability arguments for the portmap CNI plugin
func getPortMapping(alloc *structs.Allocation, ignoreHostIP bool) []cni.PortMapping {
ports := []cni.PortMapping{}
var ports []cni.PortMapping

if len(alloc.AllocatedResources.Shared.Ports) == 0 && len(alloc.AllocatedResources.Shared.Networks) > 0 {
for _, network := range alloc.AllocatedResources.Shared.Networks {
Expand Down
114 changes: 112 additions & 2 deletions client/allocrunner/networking_cni_test.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,129 @@
//go:build linux
// +build linux

package allocrunner

import (
"errors"
"net"
"testing"

cni "github.com/containerd/go-cni"
"github.com/containerd/go-cni"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper/testlog"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

type mockIPTables struct {
listCall [2]string
listRules []string
listErr error

deleteCall [2]string
deleteErr error

clearCall [2]string
clearErr error
}

func (ipt *mockIPTables) List(table, chain string) ([]string, error) {
ipt.listCall[0], ipt.listCall[1] = table, chain
return ipt.listRules, ipt.listErr
}

func (ipt *mockIPTables) Delete(table, chain string, rule ...string) error {
ipt.deleteCall[0], ipt.deleteCall[1] = table, chain
return ipt.deleteErr
}

func (ipt *mockIPTables) ClearAndDeleteChain(table, chain string) error {
ipt.clearCall[0], ipt.clearCall[1] = table, chain
return ipt.clearErr
}

func (ipt *mockIPTables) assert(t *testing.T, jumpChain string) {
// List assertions
must.Eq(t, "nat", ipt.listCall[0])
must.Eq(t, "POSTROUTING", ipt.listCall[1])

// Delete assertions
must.Eq(t, "nat", ipt.deleteCall[0])
must.Eq(t, "POSTROUTING", ipt.deleteCall[1])

// Clear assertions
must.Eq(t, "nat", ipt.clearCall[0])
must.Eq(t, jumpChain, ipt.clearCall[1])
}

func TestCNI_forceCleanup(t *testing.T) {
t.Run("ok", func(t *testing.T) {
c := cniNetworkConfigurator{logger: testlog.HCLogger(t)}
ipt := &mockIPTables{
listRules: []string{
`-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`,
`-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`,
`-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`,
`-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`,
`-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`,
`-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`,
`-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`,
},
}
err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964")
must.NoError(t, err)
ipt.assert(t, "CNI-5d36f286cfbb35c5776509ec")
})

t.Run("missing allocation", func(t *testing.T) {
c := cniNetworkConfigurator{logger: testlog.HCLogger(t)}
ipt := &mockIPTables{
listRules: []string{
`-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`,
`-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`,
`-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`,
`-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"262d57a7-8f85-f3a4-9c3b-120c00ccbff1\"" -j CNI-5d36f286cfbb35c5776509ec`,
`-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`,
`-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`,
`-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`,
},
}
err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964")
must.EqError(t, err, "failed to find postrouting rule for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964")
})

t.Run("list error", func(t *testing.T) {
c := cniNetworkConfigurator{logger: testlog.HCLogger(t)}
ipt := &mockIPTables{listErr: errors.New("list error")}
err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964")
must.EqError(t, err, "failed to list iptables rules: list error")
})

t.Run("delete error", func(t *testing.T) {
c := cniNetworkConfigurator{logger: testlog.HCLogger(t)}
ipt := &mockIPTables{
deleteErr: errors.New("delete error"),
listRules: []string{
`-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`,
},
}
err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964")
must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964")
})

t.Run("clear error", func(t *testing.T) {
c := cniNetworkConfigurator{logger: testlog.HCLogger(t)}
ipt := &mockIPTables{
clearErr: errors.New("clear error"),
listRules: []string{
`-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`,
},
}
err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964")
must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964")
})
}

// TestCNI_cniToAllocNet_Fallback asserts if a CNI plugin result lacks an IP on
// its sandbox interface, the first IP found is used.
func TestCNI_cniToAllocNet_Fallback(t *testing.T) {
Expand Down

0 comments on commit e074036

Please sign in to comment.