Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CNI]: Teardown pod network when IPAMD connection fails #2145

Merged
merged 1 commit into from
Dec 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 88 additions & 31 deletions cmd/routed-eni-cni-plugin/cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ import (

const ipamdAddress = "127.0.0.1:50051"

const dummyVlanInterfacePrefix = "dummy"
const dummyInterfacePrefix = "dummy"

var version string

Expand Down Expand Up @@ -187,11 +187,11 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
log.Infof("Received add network response from ipamd for container %s interface %s: %+v",
args.ContainerID, args.IfName, r)

//We will let the values in result struct guide us in terms of IP Address Family configured.
// We will let the values in result struct guide us in terms of IP Address Family configured.
var v4Addr, v6Addr, addr *net.IPNet
var addrFamily string

//We don't support dual stack mode currently so it has to be either v4 or v6 mode.
// We don't support dual stack mode currently so it has to be either v4 or v6 mode.
if r.IPv4Addr != "" {
v4Addr = &net.IPNet{
IP: net.ParseIP(r.IPv4Addr),
Expand All @@ -209,31 +209,28 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
}

var hostVethName string
var dummyVlanInterface *current.Interface
var dummyInterface *current.Interface

// The dummy interface is purely virtual and is stored in the prevResult struct to assist in cleanup during the DEL command.
dummyInterfaceName := networkutils.GeneratePodHostVethName(dummyInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))

// Non-zero value means pods are using branch ENI
if r.PodVlanId != 0 {
hostVethNamePrefix := sgpp.BuildHostVethNamePrefix(conf.VethPrefix, conf.PodSGEnforcingMode)
hostVethName = networkutils.GeneratePodHostVethName(hostVethNamePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupBranchENIPodNetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.PodVlanId), r.PodENIMAC,
r.PodENISubnetGW, int(r.ParentIfIndex), mtu, conf.PodSGEnforcingMode, log)

// This is a dummyVlanInterfaceName generated to identify dummyVlanInterface
// which will be created for PPSG scenario to pass along the vlanId information
// as a part of the ADD cmd Result struct
// The podVlanId is used by DEL cmd, fetched from the prevResult struct to cleanup the pod network
dummyVlanInterfaceName := networkutils.GeneratePodHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))

// The dummyVlanInterface is purely virtual and relevent only for ppsg, so we decided to keep it separate
// and not overload the already available hostVethInterface
dummyVlanInterface = &current.Interface{Name: dummyVlanInterfaceName, Mac: fmt.Sprint(r.PodVlanId)}
log.Debugf("Using dummy vlanInterface: %v", dummyVlanInterface)
// For branch ENI mode, the pod VLAN ID is packed in Interface.Mac
dummyInterface = &current.Interface{Name: dummyInterfaceName, Mac: fmt.Sprint(r.PodVlanId)}
} else {
// build hostVethName
// Note: the maximum length for linux interface name is 15
hostVethName = networkutils.GeneratePodHostVethName(conf.VethPrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
err = driverClient.SetupPodNetwork(hostVethName, args.IfName, args.Netns, v4Addr, v6Addr, int(r.DeviceNumber), mtu, log)
// For non-branch ENI, the pod VLAN ID value of 0 is packed in Interface.Mac, while the interface device number is packed in Interface.Sandbox
dummyInterface = &current.Interface{Name: dummyInterfaceName, Mac: fmt.Sprint(0), Sandbox: fmt.Sprint(r.DeviceNumber)}
}
log.Debugf("Using dummy interface: %v", dummyInterface)

if err != nil {
log.Errorf("Failed SetupPodNetwork for container %s: %v",
Expand Down Expand Up @@ -280,10 +277,8 @@ func add(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
},
}

// We append dummyVlanInterface only for pods using branch ENI
if dummyVlanInterface != nil {
result.Interfaces = append(result.Interfaces, dummyVlanInterface)
}
// dummy interface is appended to PrevResult for use during cleanup
result.Interfaces = append(result.Interfaces, dummyInterface)

return cniTypes.PrintResult(result, conf.CNIVersion)
}
Expand Down Expand Up @@ -311,6 +306,7 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
return errors.Wrap(err, "del cmd: failed to load k8s config from args")
}

// For pods using branch ENI, try to delete using previous result
handled, err := tryDelWithPrevResult(driverClient, conf, k8sArgs, args.IfName, args.Netns, log)
if err != nil {
return errors.Wrap(err, "del cmd: failed to delete with prevResult")
Expand All @@ -328,6 +324,13 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
log.Errorf("Failed to connect to backend server for container %s: %v",
args.ContainerID, err)

// When IPAMD is unreachable, try to teardown pod network using previous result. This action prevents rules from leaking while IPAMD is unreachable.
// Note that connection error is still returned to kubelet so that delete retries until IPAMD can be notified.
if teardownPodNetworkWithPrevResult(driverClient, conf, k8sArgs, args.IfName, log) {
log.Infof("Handled pod teardown using prevResult: ContainerID(%s) Netns(%s) IfName(%s) PodNamespace(%s) PodName(%s)",
args.ContainerID, args.Netns, args.IfName, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
}

return errors.Wrap(err, "del cmd: failed to connect to backend server")
}
defer conn.Close()
Expand Down Expand Up @@ -355,6 +358,12 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
}
log.Errorf("Error received from DelNetwork gRPC call for container %s: %v",
args.ContainerID, err)

// DelNetworkRequest may return a connection error, so try to delete using PrevResult whenever an error is returned.
if teardownPodNetworkWithPrevResult(driverClient, conf, k8sArgs, args.IfName, log) {
log.Infof("Handled pod teardown using prevResult: ContainerID(%s) Netns(%s) IfName(%s) PodNamespace(%s) PodName(%s)",
args.ContainerID, args.Netns, args.IfName, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
}
return errors.Wrap(err, "del cmd: error received from DelNetwork gRPC call")
}

Expand Down Expand Up @@ -405,6 +414,18 @@ func del(args *skel.CmdArgs, cniTypes typeswrapper.CNITYPES, grpcClient grpcwrap
return nil
}

func getContainerIP(prevResult *current.Result, contVethName string) (net.IPNet, error) {
containerIfaceIndex, _, found := cniutils.FindInterfaceByName(prevResult.Interfaces, contVethName)
if !found {
return net.IPNet{}, errors.Errorf("cannot find contVethName %s in prevResult", contVethName)
}
containerIPs := cniutils.FindIPConfigsByIfaceIndex(prevResult.IPs, containerIfaceIndex)
if len(containerIPs) != 1 {
return net.IPNet{}, errors.Errorf("found %d containerIPs for %v in prevResult", len(containerIPs), contVethName)
}
return containerIPs[0].Address, nil
}

// tryDelWithPrevResult will try to process CNI delete request without IPAMD.
// returns true if the del request is handled.
func tryDelWithPrevResult(driverClient driver.NetworkAPIs, conf *NetConf, k8sArgs K8sArgs, contVethName string, netNS string, log logger.Logger) (bool, error) {
Expand All @@ -414,36 +435,72 @@ func tryDelWithPrevResult(driverClient driver.NetworkAPIs, conf *NetConf, k8sArg
return false, nil
}

dummyIfaceName := networkutils.GeneratePodHostVethName(dummyVlanInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
dummyIfaceName := networkutils.GeneratePodHostVethName(dummyInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
_, dummyIface, found := cniutils.FindInterfaceByName(prevResult.Interfaces, dummyIfaceName)
if !found {
return false, nil
}
podVlanID, err := strconv.Atoi(dummyIface.Mac)
if err != nil || podVlanID == 0 {
return true, errors.Errorf("malformed vlanID in prevResult: %s", dummyIface.Mac)
if err != nil {
return false, errors.Errorf("malformed vlanID in prevResult: %s", dummyIface.Mac)
}
// For non-branch ENI pods, deletion requires handshake with IPAMD
if podVlanID == 0 {
return false, nil
}
if isNetnsEmpty(netNS) {
log.Infof("Ignoring TeardownPodENI as Netns is empty for SG pod:%s namespace: %s containerID:%s", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_INFRA_CONTAINER_ID)
log.Infof("Ignoring TeardownPodENI as Netns is empty for SG pod: %s namespace: %s containerID: %s", k8sArgs.K8S_POD_NAME, k8sArgs.K8S_POD_NAMESPACE, k8sArgs.K8S_POD_INFRA_CONTAINER_ID)
return true, nil
}

containerIfaceIndex, _, found := cniutils.FindInterfaceByName(prevResult.Interfaces, contVethName)
if !found {
return false, errors.Errorf("cannot find contVethName %s in prevResult", contVethName)
}
containerIPs := cniutils.FindIPConfigsByIfaceIndex(prevResult.IPs, containerIfaceIndex)
if len(containerIPs) != 1 {
return false, errors.Errorf("found %d containerIP for %v in prevResult", len(containerIPs), contVethName)
containerIP, err := getContainerIP(prevResult, contVethName)
if err != nil {
return false, err
}
containerIP := containerIPs[0].Address

if err := driverClient.TeardownBranchENIPodNetwork(&containerIP, podVlanID, conf.PodSGEnforcingMode, log); err != nil {
return true, err
}
return true, nil
}

// teardownPodNetworkWithPrevResult will try to process CNI delete for non-branch ENIs without IPAMD.
// Returns true if pod network is torn down
func teardownPodNetworkWithPrevResult(driverClient driver.NetworkAPIs, conf *NetConf, k8sArgs K8sArgs, contVethName string, log logger.Logger) bool {
// For non-branch ENI, prevResult is only available in v1.13.0+
prevResult, ok := conf.PrevResult.(*current.Result)
if !ok {
return false
}
dummyIfaceName := networkutils.GeneratePodHostVethName(dummyInterfacePrefix, string(k8sArgs.K8S_POD_NAMESPACE), string(k8sArgs.K8S_POD_NAME))
_, dummyIface, found := cniutils.FindInterfaceByName(prevResult.Interfaces, dummyIfaceName)
if !found {
return false
}
// For non-branch ENI, VLAN ID of 0 is encoded in Mac and device number is encoded in Sandbox
podVlanID, err := strconv.Atoi(dummyIface.Mac)
if err != nil || podVlanID != 0 {
log.Errorf("Invalid VLAN ID for non-branch ENI pod: %s", dummyIface.Mac)
return false
}
deviceNumber, err := strconv.Atoi(dummyIface.Sandbox)
if err != nil {
log.Errorf("Invalid device number for pod: %s", dummyIface.Sandbox)
return false
}
containerIP, err := getContainerIP(prevResult, contVethName)
if err != nil {
log.Errorf("Failed to get container IP: %v", err)
return false
}

if err := driverClient.TeardownPodNetwork(&containerIP, deviceNumber, log); err != nil {
log.Errorf("Failed to teardown pod network: %v", err)
return false
}
return true
}

// Scope usage of this function to only SG pods scenario
// Don't process deletes when NetNS is empty
// as it implies that veth for this request is already deleted
Expand Down
Loading