diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index 5095efd120f..8b550eeeafa 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -665,7 +665,7 @@ data: # Enable antrea proxy which provides ServiceLB for in-cluster services in antrea agent. # It should be enabled on Windows, otherwise NetworkPolicy will not take effect on # Service traffic. - # AntreaProxy: false + AntreaProxy: true # Enable traceflow which provides packet tracing feature to diagnose network issue. # Traceflow: false # Enable flowexporter which exports polled conntrack connections as IPFIX flow records from each agent to a configured collector. @@ -770,7 +770,7 @@ metadata: annotations: {} labels: app: antrea - name: antrea-config-hhthk4g2f4 + name: antrea-config-h7cg6t86ht namespace: kube-system --- apiVersion: v1 @@ -876,7 +876,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-hhthk4g2f4 + name: antrea-config-h7cg6t86ht name: antrea-config - name: antrea-controller-tls secret: @@ -1093,7 +1093,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-hhthk4g2f4 + name: antrea-config-h7cg6t86ht name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index 79e318fbddd..51f2aafbb44 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -665,7 +665,7 @@ data: # Enable antrea proxy which provides ServiceLB for in-cluster services in antrea agent. # It should be enabled on Windows, otherwise NetworkPolicy will not take effect on # Service traffic. - # AntreaProxy: false + AntreaProxy: true # Enable traceflow which provides packet tracing feature to diagnose network issue. # Traceflow: false # Enable flowexporter which exports polled conntrack connections as IPFIX flow records from each agent to a configured collector. @@ -770,7 +770,7 @@ metadata: annotations: {} labels: app: antrea - name: antrea-config-mbkmc9bb22 + name: antrea-config-db6h57cm79 namespace: kube-system --- apiVersion: v1 @@ -876,7 +876,7 @@ spec: key: node-role.kubernetes.io/master volumes: - configMap: - name: antrea-config-mbkmc9bb22 + name: antrea-config-db6h57cm79 name: antrea-config - name: antrea-controller-tls secret: @@ -1091,7 +1091,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-config-mbkmc9bb22 + name: antrea-config-db6h57cm79 name: antrea-config - hostPath: path: /etc/cni/net.d diff --git a/docs/policy-only.md b/docs/policy-only.md index b7f0abeb860..3d37b7ee60f 100644 --- a/docs/policy-only.md +++ b/docs/policy-only.md @@ -6,10 +6,13 @@ primary CNI. ## Design -Antrea is designed to work as NetworkPolicy plug-in to work together with a routed CNIs. +Antrea is designed to work as NetworkPolicy plug-in to work together with a routed CNIs. For as long as a CNI implementation fits into this model, Antrea may be inserted to enforce NetworkPolicy in that CNI's environment using Open VSwitch(OVS). +In addition, Antrea working as NetworkPolicy plug-in automatically enables Antrea-proxy, because +it requires Antrea-proxy to load balance Pod-to-Service traffic. + Antrea Switched CNI The above diagram depicts a routed CNI network topology on the left, and what it looks like @@ -24,7 +27,7 @@ incoming traffic is received on this PtP device. This is a spoke-and-hub model, traffic, even within the same worker Node must traverse first to the host network and be routed by it. -When a Pod is instantiated, the container runtime first calls the primary CNI to configure Pod's +When the container runtime instantiates a Pod, it first calls the primary CNI to configure Pod's IP, route table, DNS etc, and then connects Pod to host network with a PtP device such as a veth-pair. When Antrea is chained with this primary CNI, container runtime then calls Antrea Agent, and the Antrea Agent attaches Pod's PtP device to the OVS bridge, and moves the host @@ -34,137 +37,17 @@ illustrated by the diagram on the right. Antrea needs to satisfy that 1. All IP packets, sent on ``antrea-gw0`` in the host network, are received by the Pods exactly the same as if the OVS bridge had not been inserted. -1. Similarly all IP packets, sent by Pods, are received by other Pods or the host network exactly +1. All IP packets, sent by Pods, are received by other Pods or the host network exactly the same as if OVS bridge had not been inserted. 1. There are no requirements on Pod MAC addresses as all MAC addresses stays within the OVS bridge. To satisfy the above requirements, Antrea needs no knowledge of Pod's network configurations nor of underlying CNI network, it simply needs to program the following OVS flows on the OVS bridge: -1. A default ARP responder flow that answers any ARP request. Its sole purpose is so that a Pod's -neighbor may be resolved, and packets may be sent by that Pod to that neighbor. -1. IP packets are routed based on their destination IP if it matches any local Pod's IP. -1. All other IP packets are routed to host network via ``antrea-gw0`` interface. - -These flows together handle all Pod traffic patterns with exception of Pod-to-Service traffic -that we will address next. - -## Handling Pod-To-Service -The discussion in this section is relevant also to Pod-to-Service traffic in NoEncap traffic -mode. Antrea applies the same principle to handle Pod-to-Service traffic in all traffic modes where -traffic requires no encapsulation. - -Antrea uses kube-proxy for load balancing. At the same time, it also supports Pod level -NetworkPolicy enforcement. - -This means that a Pod-to-Service traffic flow needs to -1. first traverse to the host network for load balancing (DNAT), then -1. come back to OVS bridge for Pod Egress NetworkPolicy processing, and -1. go back to the host network yet again to be forwarded, if DNATed destination in 1) is an -inter-Node Pod or external network entity. - -We refer to the last traffic pattern as re-entrance traffic because in this pattern, a traffic flow -enters host network twice -- first time for load balancing, and second time for forwarding. - -Denote -- VIP as cluster IP of a service -- SP_IP/DP_IP as respective client and server Pod IP -- VPort as service port of a service -- TPort as target port of server Pod -- SPort as original source port - -The service request's 5-tuples upon first and second entrance to the host network, and -its reply's 5-tuples would be like - -``` -request/service: --- Entering Host Network(via antrea-gw0): SP_IP/SPort->VIP/VPort --- After LB(DNAT): SP_IP/SPort->DP_IP/TPort --- After Route(to antrea-gw0): SP_IP/SPort->DP_IP/TPort - -request/forwarding: --- Entering Host Network(via antrea-gw0): SP_IP/SPort->DP_IP/TPort --- After route(to uplink): SP_IP/SPort->DP_IP/TPort - -reply: --- Entering Host Network(via uplink): DP_IP/TPort -> SP_IP/SPort --- After LB(DNAT): VIP/VPort->SP_IP/Sport --- After route(to antrea-gw0): VIP/VPort->SP_IP/Sport -``` - -#### Routing -Note that the request with destination IP DP_IP needs to be routed differently in LB and -forwarding cases.(This differs from encap traffic where all traffic flows including post LB -service traffic share the same ``main`` route table.) Antrea creates a customized -``antrea_service`` route table, it is used in conjunction with ip-rule and ip-tables to handle -service traffic. Together they work as follows -1. At Antrea initialization, an ip-tables rule is created in ``mangle table`` that marks IP packets -with service IP as destination IP and are from ``antrea-gw0``. -1. At Antrea initialization, an ip-rule is added to select ``antrea_service`` route table as routing -table if traffic is marked in 1). -1. At Antrea initialization, a default route entry is added to ``antrea_service`` route table to -forward all traffic to ``antrea-gw0``. - -The outcome may be something like this -```bash -ip neigh | grep antrea-gw0 -169.254.253.1 dev antrea-gw0 lladdr 12:34:56:78:9a:bc PERMANENT - -ip route show table 300 #tbl_idx=300 is antrea_service -default via 169.254.253.1 dev antrea-gw0 onlink - -ip rule | grep antrea-gw0 -300: from all fwmark 0x800/0x800 iif antrea-gw0 lookup 300 - -iptables -t mangle -L ANTREA-MANGLE -Chain ANTREA-MANGLE (1 references) -target prot opt source destination -MARK all -- anywhere 10.0.0.0/16 /* Antrea: mark service traffic */ MARK or 0x800 -MARK all -- anywhere !10.0.0.0/16 /* Antrea: unmark post LB service traffic */ MARK and 0x0 -``` - -The above configuration allows Pod-to-Service traffic to use ``antrea_service`` route table after -load balancing, and to be steered back to OVS bridge for Pod NetworkPolicy processing. - -#### Conntrack -Note also that with re-entrance traffic, a service request, after being load balanced and routed -back to OVS bridge via ``antrea-gw0``, has exactly the same 5-tuple as when it re-enters the host network -for forwarding. - -When a service request with same 5-tuples re-enters the host network, it confuses Linux conntrack. -The Linux considers the re-entrance IP packet from a new connection flow that uses same source port -that has been allocated in the DNAT connection. In turn, the re-entrance packet triggers -another SNAT connection. The overall effect is that the service's DNAT connection is not -discovered by the service reply, and no Un-DNAT takes place. As a result, the reply is not -recognized, and therefore dropped by the source Pod. - -Antrea uses the following mechanisms to handle Pod-to-Service traffic re-entrance to the host -network, and bypasses conntrack in host network. -1. In OVS bridge, adds flow that marks any re-entrance traffic with a special source MAC. -1. In OVS bridge, adds flow that causes any re-entrance traffic to bypasses conntrack in OVS zone. -1. In the host network' ip-tables, adds a rule in ``raw`` table that if matching the special -source MAC in 1), bypass conntrack in host zone. - -#### NetworkPolicy Considerations -Note that when a traffic flow is re-entrance, the original reply packets do not make it into OVS, -as it is un-DNATted in the host network before reaching OVS. This, however, does not have any -impact on NetworkPolicy enforcement. - -Antrea enforces NetworkPolicy by allowing or disallowing initial connection packets (e.g. TCP - SYN) to go through and to establish connection. Once a connection is -established, Antrea relies on conntrack to admit or reject packets for that connection. This still -holds true for re-entrance traffic flows, except that conntrack takes place not within OVS conntrack -zone, but instead is in the host network's default conntrack zone. Hence NetworkPolicy -enforcement is not impacted. - -It has some effects on statistics collection. If original reply traffic reaches OVS bridge as is -the case of encap traffic flows, the OVS bridge knows about any reply packets dropped by OVS zone -conntrack, and can record them accordingly. With re-entrance traffic, the reply traffic with -original server Pod IPs does not reach OVS bridge, and any dropped traffic by host network -conntrack is unknown to the OVS bridge. - -## Future Work -1. Smoother transition in/out of Antrea in policy mode, Kubernetes deployment shall be easily -scaled up and down after/before Antrea insertion to allow Pods be added to Antrea after -installation, and reconnect to old CNI topology after Antrea is uninstalled. -1. NetworkPolicy for external services is not working. -See https://github.com/vmware-tanzu/antrea/issues/538. +1. A default ARP responder flow that answers any ARP request. Its sole purpose is so that a Pod can +resolve its neighbors, and the Pod therefore can generate traffic to these neighbors. +1. A L3 flow for each local Pod that routes IP packets to that Pod if packets' destination IP + matches that of the Pod. +1. A L3 fow that routes all other IP packets to host network via ``antrea-gw0 +`` interface. + +These flows together handle all Pod traffic patterns. diff --git a/hack/generate-manifest.sh b/hack/generate-manifest.sh index 7b04c964b1b..6964d788fbd 100755 --- a/hack/generate-manifest.sh +++ b/hack/generate-manifest.sh @@ -134,6 +134,11 @@ if [ "$MODE" == "release" ] && [ -z "$IMG_TAG" ]; then exit 1 fi +# noEncap/policy-only mode works with antrea-proxy. +if [[ "$ENCAP_MODE" != "" ]] && [[ "$ENCAP_MODE" != "encap" ]]; then + PROXY=true +fi + THIS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" source $THIS_DIR/verify-kustomize.sh diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 900b9e38e94..4b2f16af760 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -453,9 +453,10 @@ func (i *Initializer) configureGatewayInterface(gatewayIface *interfacestore.Int i.nodeConfig.GatewayConfig = &config.GatewayConfig{Name: i.hostGateway, MAC: gwMAC} gatewayIface.MAC = gwMAC if i.networkConfig.TrafficEncapMode.IsNetworkPolicyOnly() { - // In policy-only mode, Node IP is also assigned to local gateway for masquerade. + // Assign IP to gw as required by SpoofGuard. i.nodeConfig.GatewayConfig.IP = i.nodeConfig.NodeIPAddr.IP gatewayIface.IP = i.nodeConfig.NodeIPAddr.IP + // No need to assign local CIDR to gw0 because local CIDR is not managed by Antrea return nil } diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 8701ef9c87e..8b7db1ad12c 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -478,10 +478,6 @@ func (c *client) InstallGatewayFlows(gatewayAddr net.IP, gatewayMAC net.Hardware flows = append(flows, c.l3ToGatewayFlow(gatewayAddr, gatewayMAC, cookie.Default)) } - if c.encapMode.SupportsNoEncap() { - flows = append(flows, c.reEntranceBypassCTFlow(gatewayOFPort, gatewayOFPort, cookie.Default)) - } - if err := c.ofEntryOperations.AddAll(flows); err != nil { return err } @@ -524,11 +520,6 @@ func (c *client) initialize() error { if err := c.ofEntryOperations.AddAll(c.establishedConnectionFlows(cookie.Default)); err != nil { return fmt.Errorf("failed to install flows to skip established connections: %v", err) } - if c.encapMode.SupportsNoEncap() { - if err := c.ofEntryOperations.Add(c.l2ForwardOutputReentInPortFlow(c.gatewayPort, cookie.Default)); err != nil { - return fmt.Errorf("failed to install L2 forward same in-port and out-port flow: %v", err) - } - } if c.encapMode.IsNetworkPolicyOnly() { if err := c.setupPolicyOnlyFlows(); err != nil { return fmt.Errorf("failed to setup policy only flows: %w", err) @@ -644,8 +635,6 @@ func (c *client) DeleteStaleFlows() error { func (c *client) setupPolicyOnlyFlows() error { flows := []binding.Flow{ - // Bypasses remaining l3forwarding flows if the MAC is set via ctRewriteDstMACFlow. - c.l3BypassMACRewriteFlow(c.nodeConfig.GatewayConfig.MAC, cookie.Default), // Rewrites MAC to gw port if the packet received is unmatched by local Pod flows. c.l3ToGWFlow(c.nodeConfig.GatewayConfig.MAC, cookie.Default), // Replies any ARP request with the same global virtual MAC. diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index f29116cf08c..efa02798700 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -206,7 +206,6 @@ var ( serviceLearnRegRange = binding.Range{16, 18} globalVirtualMAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:ff") - ReentranceMAC, _ = net.ParseMAC("de:ad:be:ef:de:ad") hairpinIP = net.ParseIP("169.254.169.252").To4() ) @@ -521,20 +520,6 @@ func (c *client) traceflowConnectionTrackFlows(dataplaneTag uint8, category cook Done() } -// reEntranceBypassCTFlow generates flow that bypass CT for traffic re-entering host network space. -// In host network space, we disable conntrack for re-entrance traffic so not to confuse conntrack -// in host namespace, This however has inverse effect on conntrack in Antrea conntrack zone as well, -// all subsequent re-entrance traffic becomes invalid. -func (c *client) reEntranceBypassCTFlow(gwPort, reentPort uint32, category cookie.Category) binding.Flow { - conntrackCommitTable := c.pipeline[conntrackCommitTable] - return conntrackCommitTable.BuildFlow(priorityHigh).MatchProtocol(binding.ProtocolIP). - MatchRegRange(int(marksReg), portFoundMark, ofPortMarkRange). - MatchInPort(gwPort).MatchReg(int(portCacheReg), reentPort). - Action().GotoTable(conntrackCommitTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - // ctRewriteDstMACFlow rewrites the destination MAC with local host gateway MAC if the packets has set ct_mark but not sent from the host gateway. func (c *client) ctRewriteDstMACFlow(gatewayMAC net.HardwareAddr, category cookie.Category) binding.Flow { connectionTrackStateTable := c.pipeline[conntrackStateTable] @@ -599,18 +584,6 @@ func (c *client) traceflowL2ForwardOutputFlow(dataplaneTag uint8, category cooki Done() } -// l2ForwardOutputReentInPortFlow generates the flow that forwards re-entrance peer Node traffic via antrea-gw0. -// This flow supersedes default output flow because ovs by default auto-skips packets with output = input port. -func (c *client) l2ForwardOutputReentInPortFlow(gwPort uint32, category cookie.Category) binding.Flow { - return c.pipeline[L2ForwardingOutTable].BuildFlow(priorityHigh).MatchProtocol(binding.ProtocolIP). - MatchRegRange(int(marksReg), portFoundMark, ofPortMarkRange). - MatchInPort(gwPort).MatchReg(int(portCacheReg), gwPort). - Action().SetSrcMAC(ReentranceMAC). - Action().OutputInPort(). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - // l2ForwardOutputServiceHairpinFlow uses in_port action for Service // hairpin packets to avoid packets from being dropped by OVS. func (c *client) l2ForwardOutputServiceHairpinFlow() binding.Flow { @@ -621,18 +594,6 @@ func (c *client) l2ForwardOutputServiceHairpinFlow() binding.Flow { Done() } -// l3BypassMACRewriteFlow bypasses remaining l3forwarding flows if the MAC is set via ctRewriteDstMACFlow in -// conntrackState stage. -func (c *client) l3BypassMACRewriteFlow(gatewayMAC net.HardwareAddr, category cookie.Category) binding.Flow { - l3FwdTable := c.pipeline[l3ForwardingTable] - return l3FwdTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). - MatchCTMark(gatewayCTMark). - MatchDstMAC(gatewayMAC). - Action().GotoTable(l3FwdTable.GetNext()). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done() -} - // l3FlowsToPod generates the flow to rewrite MAC if the packet is received from tunnel port and destined for local Pods. func (c *client) l3FlowsToPod(localGatewayMAC net.HardwareAddr, podInterfaceIP net.IP, podInterfaceMAC net.HardwareAddr, category cookie.Category) binding.Flow { l3FwdTable := c.pipeline[l3ForwardingTable] diff --git a/pkg/agent/route/route_linux.go b/pkg/agent/route/route_linux.go index 47ef6a77f7f..fe5156fcecb 100644 --- a/pkg/agent/route/route_linux.go +++ b/pkg/agent/route/route_linux.go @@ -18,9 +18,8 @@ import ( "bytes" "fmt" "net" - "os" "os/exec" - "strings" + "reflect" "sync" "github.com/vishvananda/netlink" @@ -29,7 +28,6 @@ import ( "k8s.io/klog/v2" "github.com/vmware-tanzu/antrea/pkg/agent/config" - "github.com/vmware-tanzu/antrea/pkg/agent/openflow" "github.com/vmware-tanzu/antrea/pkg/agent/util" "github.com/vmware-tanzu/antrea/pkg/agent/util/ipset" "github.com/vmware-tanzu/antrea/pkg/agent/util/iptables" @@ -37,21 +35,6 @@ import ( ) const ( - // AntreaServiceTable is route table name for Antrea service traffic. - AntreaServiceTable = "Antrea-service" - // AntreaServiceTableIdx is route table index for Antrea service traffic. - AntreaServiceTableIdx = 300 - mainTable = "main" - mainTableIdx = 254 - - routeTableConfigPath = "/etc/iproute2/rt_tables" - // AntreaIPRulePriority is Antrea IP rule priority - AntreaIPRulePriority = 300 - // Service route table default route next hop IP, used in policy-only mode. - svcTblVirtualDefaultGWIP = "169.254.253.1" - // Service route table default route next hop MAC, used in policy-only mode. - svcTblVirtualDefaultGWMAC = "12:34:56:78:9a:bc" - // Antrea managed ipset. // antreaPodIPSet contains all Pod CIDRs of this cluster. antreaPodIPSet = "ANTREA-POD-IP" @@ -60,13 +43,6 @@ const ( antreaForwardChain = "ANTREA-FORWARD" antreaPostRoutingChain = "ANTREA-POSTROUTING" antreaMangleChain = "ANTREA-MANGLE" - antreaRawChain = "ANTREA-RAW" -) - -var ( - // RtTblSelectorValue selects which route table to use to forward service traffic back to host gateway antrea-gw0. - RtTblSelectorValue = 1 << 11 - rtTblSelectorMark = fmt.Sprintf("%#x/%#x", RtTblSelectorValue, RtTblSelectorValue) ) // Client implements Interface. @@ -78,25 +54,10 @@ type Client struct { encapMode config.TrafficEncapModeType serviceCIDR *net.IPNet ipt *iptables.Client - // serviceRtTable contains Antrea service route table information. - serviceRtTable *serviceRtTableConfig // nodeRoutes caches ip routes to remote Pods. It's a map of podCIDR to routes. nodeRoutes sync.Map } -type serviceRtTableConfig struct { - Idx int - Name string -} - -func (s *serviceRtTableConfig) String() string { - return fmt.Sprintf("%s: idx %d", s.Name, s.Idx) -} - -func (s *serviceRtTableConfig) IsMainTable() bool { - return s.Name == "main" -} - // NewClient returns a route client. func NewClient(serviceCIDR *net.IPNet, encapMode config.TrafficEncapModeType) (*Client, error) { ipt, err := iptables.New() @@ -104,17 +65,10 @@ func NewClient(serviceCIDR *net.IPNet, encapMode config.TrafficEncapModeType) (* return nil, fmt.Errorf("error creating IPTables instance: %v", err) } - serviceRtTable := &serviceRtTableConfig{Idx: mainTableIdx, Name: mainTable} - if encapMode.SupportsNoEncap() { - serviceRtTable.Idx = AntreaServiceTableIdx - serviceRtTable.Name = AntreaServiceTable - } - return &Client{ - serviceCIDR: serviceCIDR, - encapMode: encapMode, - ipt: ipt, - serviceRtTable: serviceRtTable, + serviceCIDR: serviceCIDR, + encapMode: encapMode, + ipt: ipt, }, nil } @@ -204,7 +158,6 @@ func (c *Client) initIPTables() error { {iptables.FilterTable, iptables.ForwardChain, antreaForwardChain, "Antrea: jump to Antrea forwarding rules"}, {iptables.NATTable, iptables.PostRoutingChain, antreaPostRoutingChain, "Antrea: jump to Antrea postrouting rules"}, {iptables.MangleTable, iptables.PreRoutingChain, antreaMangleChain, "Antrea: jump to Antrea mangle rules"}, - {iptables.RawTable, iptables.PreRoutingChain, antreaRawChain, "Antrea: jump to Antrea raw rules"}, } for _, rule := range jumpRules { if err := c.ipt.EnsureChain(rule.table, rule.dstChain); err != nil { @@ -224,24 +177,10 @@ func (c *Client) initIPTables() error { writeLine(iptablesData, "*mangle") writeLine(iptablesData, iptables.MakeChainLine(antreaMangleChain)) hostGateway := c.nodeConfig.GatewayConfig.Name - if c.encapMode.SupportsNoEncap() { - writeLine(iptablesData, []string{ - "-A", antreaMangleChain, - "-m", "comment", "--comment", `"Antrea: mark pod to service packets"`, - "-i", hostGateway, "-d", c.serviceCIDR.String(), - "-j", iptables.MarkTarget, "--set-xmark", rtTblSelectorMark, - }...) - writeLine(iptablesData, []string{ - "-A", antreaMangleChain, - "-m", "comment", "--comment", `"Antrea: unmark post LB service packets"`, - "-i", hostGateway, "!", "-d", c.serviceCIDR.String(), - "-j", iptables.MarkTarget, "--set-xmark", "0/0xffffffff", - }...) - // When Antrea is used to enforce NetworkPolicies in EKS, an additional iptables - // mangle rule is required. See https://github.com/vmware-tanzu/antrea/issues/678. - if env.IsCloudEKS() { - c.writeEKSMangleRule(iptablesData) - } + // When Antrea is used to enforce NetworkPolicies in EKS, an additional iptables + // mangle rule is required. See https://github.com/vmware-tanzu/antrea/issues/678. + if env.IsCloudEKS() { + c.writeEKSMangleRule(iptablesData) } writeLine(iptablesData, "COMMIT") @@ -275,18 +214,6 @@ func (c *Client) initIPTables() error { } writeLine(iptablesData, "COMMIT") - writeLine(iptablesData, "*raw") - writeLine(iptablesData, iptables.MakeChainLine(antreaRawChain)) - if c.encapMode.SupportsNoEncap() { - writeLine(iptablesData, []string{ - "-A", antreaRawChain, - "-m", "comment", "--comment", `"Antrea: reentry pod traffic skip conntrack"`, - "-i", hostGateway, "-m", "mac", "--mac-source", openflow.ReentranceMAC.String(), - "-j", iptables.ConnTrackTarget, "--notrack", - }...) - } - writeLine(iptablesData, "COMMIT") - // Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. if err := c.ipt.Restore(iptablesData.Bytes(), false); err != nil { return err @@ -295,16 +222,11 @@ func (c *Client) initIPTables() error { } func (c *Client) initIPRoutes() error { - if c.serviceRtTable.IsMainTable() { - _ = c.removeServiceRouting() - return nil - } - if err := c.addServiceRouting(); err != nil { - return err - } if c.encapMode.IsNetworkPolicyOnly() { - if err := c.setupPolicyOnlyMode(); err != nil { - return err + gwLink := util.GetNetLink(c.nodeConfig.GatewayConfig.Name) + _, gwIP, _ := net.ParseCIDR(fmt.Sprintf("%s/32", c.nodeConfig.NodeIPAddr.IP.String())) + if err := netlink.AddrReplace(gwLink, &netlink.Addr{IPNet: gwIP}); err != nil { + return fmt.Errorf("failed to add address %s to gw %s: %v", gwIP, gwLink.Attrs().Name, err) } } return nil @@ -313,8 +235,6 @@ func (c *Client) initIPRoutes() error { // Reconcile removes orphaned podCIDRs from ipset and removes routes to orphaned podCIDRs // based on the desired podCIDRs. func (c *Client) Reconcile(podCIDRs []string) error { - // TODO add an IPSet for migrated routes for reconciliation too. - desiredPodCIDRs := sets.NewString(podCIDRs...) // Remove orphaned podCIDRs from antreaPodIPSet. @@ -326,31 +246,47 @@ func (c *Client) Reconcile(podCIDRs []string) error { if desiredPodCIDRs.Has(entry) { continue } - klog.V(4).Infof("Deleting orphaned ip %s from ipset", entry) + klog.Infof("Deleting orphaned PodIP %s from ipset and route table", entry) if err := ipset.DelEntry(antreaPodIPSet, entry); err != nil { return err } + _, cidr, err := net.ParseCIDR(entry) + if err != nil { + return err + } + route := &netlink.Route{Dst: cidr} + if err := netlink.RouteDel(route); err != nil && err != unix.ESRCH { + return err + } } - // Remove orphaned routes from host network. - actualRouteMap, err := c.listIPRoutes() + // Remove any unknown routes on antrea-gw0. + routes, err := c.listIPRoutesOnGW() if err != nil { return fmt.Errorf("error listing ip routes: %v", err) } - for podCIDR, actualRoutes := range actualRouteMap { - if desiredPodCIDRs.Has(podCIDR) { + for _, route := range routes { + if reflect.DeepEqual(route.Dst, c.nodeConfig.PodCIDR) { continue } - for _, route := range actualRoutes { - klog.V(4).Infof("Deleting orphaned route %v", route) - if err := netlink.RouteDel(route); err != nil && err != unix.ESRCH { - return err - } + if desiredPodCIDRs.Has(route.Dst.String()) { + continue + } + klog.Infof("Deleting unknown route %v", route) + if err := netlink.RouteDel(&route); err != nil && err != unix.ESRCH { + return err } } return nil } +// listIPRoutes returns list of routes on antrea-gw0. +func (c *Client) listIPRoutesOnGW() ([]netlink.Route, error) { + filter := &netlink.Route{ + LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex} + return netlink.RouteListFiltered(netlink.FAMILY_V4, filter, netlink.RT_FILTER_OIF) +} + // AddRoutes adds routes to a new podCIDR. It overrides the routes if they already exist. func (c *Client) AddRoutes(podCIDR *net.IPNet, nodeIP, nodeGwIP net.IP) error { podCIDRStr := podCIDR.String() @@ -358,52 +294,25 @@ func (c *Client) AddRoutes(podCIDR *net.IPNet, nodeIP, nodeGwIP net.IP) error { if err := ipset.AddEntry(antreaPodIPSet, podCIDRStr); err != nil { return err } - // Install routes to this Node. - routes := []*netlink.Route{ - { - Dst: podCIDR, - Flags: int(netlink.FLAG_ONLINK), - LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex, - Gw: nodeGwIP, - Table: c.serviceRtTable.Idx, - }, - } - - // If service route table and main route table is not the same , add - // peer CIDR to main route table too (i.e in NoEncap and hybrid mode) - if !c.serviceRtTable.IsMainTable() { - if c.encapMode.NeedsEncapToPeer(nodeIP, c.nodeConfig.NodeIPAddr) { - // need overlay tunnel - routes = append(routes, &netlink.Route{ - Dst: podCIDR, - Flags: int(netlink.FLAG_ONLINK), - LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex, - Gw: nodeGwIP, - }) - } else if !c.encapMode.NeedsRoutingToPeer(nodeIP, c.nodeConfig.NodeIPAddr) { - routes = append(routes, &netlink.Route{ - Dst: podCIDR, - Gw: nodeIP, - }) - } - // If Pod traffic needs underlying routing support, it is handled by host default route. - } - - // clean up function if any route add failed - deleteRtFn := func() { - for _, route := range routes { - _ = netlink.RouteDel(route) - } + route := &netlink.Route{ + Dst: podCIDR, + } + if c.encapMode.NeedsEncapToPeer(nodeIP, c.nodeConfig.NodeIPAddr) { + route.Flags = int(netlink.FLAG_ONLINK) + route.LinkIndex = c.nodeConfig.GatewayConfig.LinkIndex + route.Gw = nodeGwIP + } else if !c.encapMode.NeedsRoutingToPeer(nodeIP, c.nodeConfig.NodeIPAddr) { + // NoEncap traffic need routing help. + route.Gw = nodeIP + } else { + // NoEncap traffic to Node on the same subnet. It is handled by host default route. + return nil } - - for _, route := range routes { - if err := netlink.RouteReplace(route); err != nil { - deleteRtFn() - return fmt.Errorf("failed to install route to peer %s with netlink: %v", nodeIP, err) - } + if err := netlink.RouteReplace(route); err != nil { + return fmt.Errorf("failed to install route to peer %s with netlink: %v", nodeIP, err) } - c.nodeRoutes.Store(podCIDRStr, routes) + c.nodeRoutes.Store(podCIDRStr, route) return nil } @@ -415,192 +324,16 @@ func (c *Client) DeleteRoutes(podCIDR *net.IPNet) error { return err } - routes, exists := c.nodeRoutes.Load(podCIDRStr) + i, exists := c.nodeRoutes.Load(podCIDRStr) if !exists { return nil } - for _, r := range routes.([]*netlink.Route) { - klog.V(4).Infof("Deleting route %v", r) - if err := netlink.RouteDel(r); err != nil && err != unix.ESRCH { - return err - } - } - c.nodeRoutes.Delete(podCIDRStr) - return nil -} - -// listIPRoutes returns list of routes from peer and local CIDRs -func (c *Client) listIPRoutes() (map[string][]*netlink.Route, error) { - // get all routes on antrea-gw0 from service table. - filter := &netlink.Route{ - Table: c.serviceRtTable.Idx, - LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex} - routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, filter, netlink.RT_FILTER_TABLE|netlink.RT_FILTER_OIF) - if err != nil { - return nil, err - } - - rtMap := make(map[string][]*netlink.Route) - for _, rt := range routes { - // rt is reference to actual data, as it changes, - // it cannot be used for assignment - tmpRt := rt - rtMap[rt.Dst.String()] = append(rtMap[rt.Dst.String()], &tmpRt) - } - - if !c.serviceRtTable.IsMainTable() { - // get all routes on antrea-gw0 from main table. - filter.Table = 0 - routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, filter, netlink.RT_FILTER_OIF) - if err != nil { - return nil, err - } - for _, rt := range routes { - // rt is reference to actual data, as it changes, - // it cannot be used for assignment - tmpRt := rt - rtMap[rt.Dst.String()] = append(rtMap[rt.Dst.String()], &tmpRt) - } - - // now get all routes antrea-gw0 on other interfaces from main table. - routes, err = netlink.RouteListFiltered(netlink.FAMILY_V4, nil, 0) - if err != nil { - return nil, err - } - for _, rt := range routes { - if rt.Dst == nil { - continue - } - // insert the route if it is CIDR route and has not been added already. - // routes with same dst are different if table or linkIndex differs. - if rl, ok := rtMap[rt.Dst.String()]; ok && (rl[len(rl)-1].LinkIndex != rt.LinkIndex || rl[len(rl)-1].Table != rt.Table) { - tmpRt := rt - rtMap[rt.Dst.String()] = append(rl, &tmpRt) - } - } - } - return rtMap, nil -} - -func (c *Client) addServiceRouting() error { - f, err := os.OpenFile(routeTableConfigPath, os.O_RDWR|os.O_APPEND, 0) - if err != nil { - return fmt.Errorf("unable to create service route table(open): %v", err) - } - defer f.Close() - - oldTablesRaw := make([]byte, 1024) - bLen, err := f.Read(oldTablesRaw) - if err != nil { - return fmt.Errorf("unable to create service route table(read): %v", err) - } - oldTables := string(oldTablesRaw[:bLen]) - newTable := fmt.Sprintf("%d %s", c.serviceRtTable.Idx, c.serviceRtTable.Name) - - if strings.Index(oldTables, newTable) == -1 { - if _, err := f.WriteString(newTable); err != nil { - return fmt.Errorf("failed to add antrea service route table: %v", err) - } - } - - gwConfig := c.nodeConfig.GatewayConfig - if !c.encapMode.IsNetworkPolicyOnly() { - // Add local podCIDR if applicable to service rt table. - route := &netlink.Route{ - LinkIndex: gwConfig.LinkIndex, - Scope: netlink.SCOPE_LINK, - Dst: c.nodeConfig.PodCIDR, - Table: c.serviceRtTable.Idx, - } - if err := netlink.RouteReplace(route); err != nil { - return fmt.Errorf("failed to add link route to service table: %v", err) - } - } - - // create ip rule to select route table - ipRule := netlink.NewRule() - ipRule.IifName = c.nodeConfig.GatewayConfig.Name - ipRule.Mark = RtTblSelectorValue - ipRule.Mask = RtTblSelectorValue - ipRule.Table = c.serviceRtTable.Idx - ipRule.Priority = AntreaIPRulePriority - - ruleList, err := netlink.RuleList(netlink.FAMILY_V4) - if err != nil { - return fmt.Errorf("failed to get ip rule: %v", err) - } - // Check for ip rule presence. - for _, rule := range ruleList { - if rule == *ipRule { - return nil - } - } - err = netlink.RuleAdd(ipRule) - if err != nil { - return fmt.Errorf("failed to create ip rule for service route table: %v", err) - } - return nil -} - -func (c *Client) readRtTable() (string, error) { - f, err := os.OpenFile(routeTableConfigPath, os.O_RDONLY, 0) - if err != nil { - return "", fmt.Errorf("route table(open): %w", err) - } - defer f.Close() - - tablesRaw := make([]byte, 1024) - bLen, err := f.Read(tablesRaw) - if err != nil { - return "", fmt.Errorf("route table(read): %w", err) - } - return string(tablesRaw[:bLen]), nil -} - -// removeServiceRouting removes service routing setup. -func (c *Client) removeServiceRouting() error { - // remove service table - tables, err := c.readRtTable() - if err != nil { + r := i.(*netlink.Route) + klog.V(4).Infof("Deleting route %v", r) + if err := netlink.RouteDel(r); err != nil && err != unix.ESRCH { return err } - newTable := fmt.Sprintf("%d %s", AntreaServiceTableIdx, AntreaServiceTable) - if strings.Index(tables, newTable) != -1 { - tables = strings.Replace(tables, newTable, "", -1) - f, err := os.OpenFile(routeTableConfigPath, os.O_WRONLY|os.O_TRUNC, 0) - if err != nil { - return fmt.Errorf("route table(open): %w", err) - } - defer f.Close() - if _, err = f.WriteString(tables); err != nil { - return fmt.Errorf("route table(write): %w", err) - } - } - - // flush service table - filter := &netlink.Route{ - Table: AntreaServiceTableIdx, - LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex} - routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, filter, netlink.RT_FILTER_TABLE|netlink.RT_FILTER_OIF) - if err != nil { - return fmt.Errorf("route table(list): %w", err) - } - for _, route := range routes { - if err = netlink.RouteDel(&route); err != nil { - return fmt.Errorf("route delete: %w", err) - } - } - - // delete ip rule for service table - ipRule := netlink.NewRule() - ipRule.IifName = c.nodeConfig.GatewayConfig.Name - ipRule.Mark = RtTblSelectorValue - ipRule.Mask = RtTblSelectorValue - ipRule.Table = AntreaServiceTableIdx - ipRule.Priority = AntreaIPRulePriority - if err = netlink.RuleDel(ipRule); err != nil { - return fmt.Errorf("ip rule delete: %w", err) - } + c.nodeRoutes.Delete(podCIDRStr) return nil } @@ -627,48 +360,6 @@ func disableICMPSendRedirects(intfName string) error { return nil } -// resolveDefaultRouteNHMAC resolves the MAC of default route next -// hop on service route table. -func (c *Client) resolveDefaultRouteNHMAC() (net.HardwareAddr, error) { - return net.ParseMAC(svcTblVirtualDefaultGWMAC) -} - -// setupPolicyOnlyMode configures routing needed by traffic in policy-only mode. -func (c *Client) setupPolicyOnlyMode() error { - gwLink := util.GetNetLink(c.nodeConfig.GatewayConfig.Name) - _, gwIP, _ := net.ParseCIDR(fmt.Sprintf("%s/32", c.nodeConfig.NodeIPAddr.IP.String())) - if err := netlink.AddrReplace(gwLink, &netlink.Addr{IPNet: gwIP}); err != nil { - return fmt.Errorf("failed to add address %s to gw %s: %v", gwIP, gwLink.Attrs().Name, err) - } - - // Add default route to service table. - _, defaultRt, _ := net.ParseCIDR("0/0") - nhIP := net.ParseIP(svcTblVirtualDefaultGWIP) - route := &netlink.Route{ - LinkIndex: gwLink.Attrs().Index, - Table: c.serviceRtTable.Idx, - Flags: int(netlink.FLAG_ONLINK), - Dst: defaultRt, - Gw: nhIP, - } - if err := netlink.RouteReplace(route); err != nil { - return fmt.Errorf("failed to add default route to service table: %v", err) - } - // Add static neighbor to next hop so that no ARPING is ever required on antrea-gw0. - nhMAC, _ := c.resolveDefaultRouteNHMAC() - neigh := &netlink.Neigh{ - LinkIndex: gwLink.Attrs().Index, - Family: netlink.FAMILY_V4, - State: netlink.NUD_PERMANENT, - IP: nhIP, - HardwareAddr: nhMAC, - } - if err := netlink.NeighSet(neigh); err != nil { - return fmt.Errorf("failed to add neigh %v to gw %s: %v", neigh, gwLink.Attrs().Name, err) - } - return nil -} - // MigrateRoutesToGw moves routes (including assigned IP addresses if any) from link linkName to // host gateway. func (c *Client) MigrateRoutesToGw(linkName string) error { diff --git a/test/integration/agent/route_test.go b/test/integration/agent/route_test.go index d70de4ba6f6..d8f63663898 100644 --- a/test/integration/agent/route_test.go +++ b/test/integration/agent/route_test.go @@ -26,7 +26,6 @@ import ( "github.com/containernetworking/plugins/pkg/ip" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "github.com/vishvananda/netlink" "github.com/vmware-tanzu/antrea/pkg/agent/config" @@ -57,9 +56,6 @@ var ( gwIP = net.ParseIP("10.10.10.1") gwMAC, _ = net.ParseMAC("12:34:56:78:bb:cc") gwName = "antrea-gw0" - svcTblIdx = route.AntreaServiceTableIdx - svcTblName = route.AntreaServiceTable - mainTblIdx = 254 gwConfig = &config.GatewayConfig{IP: gwIP, MAC: gwMAC, Name: gwName} nodeConfig = &config.NodeConfig{ Name: "test", @@ -94,18 +90,13 @@ func TestInitialize(t *testing.T) { link := createDummyGW(t) defer netlink.LinkDel(link) - refRouteTablesStr, _ := ExecOutputTrim("cat /etc/iproute2/rt_tables") - tcs := []struct { // variations mode config.TrafficEncapModeType - // expectations - expSvcTbl bool - expIPRule bool }{ - {mode: config.TrafficEncapModeNoEncap, expSvcTbl: true, expIPRule: true}, - {mode: config.TrafficEncapModeHybrid, expSvcTbl: true, expIPRule: true}, - {mode: config.TrafficEncapModeEncap, expSvcTbl: false, expIPRule: false}, + {mode: config.TrafficEncapModeNoEncap}, + {mode: config.TrafficEncapModeHybrid}, + {mode: config.TrafficEncapModeEncap}, } for _, tc := range tcs { @@ -123,41 +114,6 @@ func TestInitialize(t *testing.T) { t.Error(err) } - // verify route tables - expRouteTablesStr := refRouteTablesStr - if tc.expSvcTbl { - expRouteTablesStr = fmt.Sprintf("%s%d%s", refRouteTablesStr, svcTblIdx, svcTblName) - } - routeTables, err := ExecOutputTrim("cat /etc/iproute2/rt_tables") - if err != nil { - t.Error(err) - } - if !assert.Equal(t, expRouteTablesStr, routeTables) { - t.Errorf("mismatch route tables") - } - - if tc.expSvcTbl { - expRouteStr := fmt.Sprintf("%s dev %s scope link", podCIDR, gwName) - expRouteStr = strings.Join(strings.Fields(expRouteStr), "") - ipRoute, _ := ExecOutputTrim(fmt.Sprintf("ip route show table %d | grep %s", svcTblIdx, podCIDR)) - if len(ipRoute) > len(expRouteStr) { - ipRoute = ipRoute[:len(expRouteStr)] - } - assert.Equal(t, expRouteStr, ipRoute, "mismatch link route") - } - - // verify ip rules - expIPRulesStr := "" - if tc.expIPRule { - expIPRulesStr = fmt.Sprintf("%d: from all fwmark %#x/%#x iif %s lookup %s", route.AntreaIPRulePriority, route.RtTblSelectorValue, route.RtTblSelectorValue, - gwName, svcTblName) - expIPRulesStr = strings.Join(strings.Fields(expIPRulesStr), "") - } - ipRule, _ := ExecOutputTrim(fmt.Sprintf("ip rule | grep %x", route.RtTblSelectorValue)) - if !assert.Equal(t, expIPRulesStr, ipRule) { - t.Errorf("mismatch ip rules") - } - // verify ipset err = exec.Command("ipset", "list", "ANTREA-POD-IP").Run() assert.NoError(t, err, "ipset not exist") @@ -171,9 +127,6 @@ func TestInitialize(t *testing.T) { -A FORWARD -m comment --comment "Antrea: jump to Antrea forwarding rules" -j ANTREA-FORWARD -A ANTREA-FORWARD -i antrea-gw0 -m comment --comment "Antrea: accept packets from local pods" -j ACCEPT -A ANTREA-FORWARD -o antrea-gw0 -m comment --comment "Antrea: accept packets to local pods" -j ACCEPT -`, - "raw": `:ANTREA-RAW - [0:0] --A PREROUTING -m comment --comment "Antrea: jump to Antrea raw rules" -j ANTREA-RAW `, "mangle": `:ANTREA-MANGLE - [0:0] -A PREROUTING -m comment --comment "Antrea: jump to Antrea mangle rules" -j ANTREA-MANGLE @@ -183,17 +136,6 @@ func TestInitialize(t *testing.T) { -A ANTREA-POSTROUTING -s 10.10.10.0/24 -m comment --comment "Antrea: masquerade pod to external packets" -m set ! --match-set ANTREA-POD-IP dst -j MASQUERADE `, } - if tc.mode.SupportsNoEncap() { - expectedIPTables["mangle"] = `:ANTREA-MANGLE - [0:0] --A PREROUTING -m comment --comment "Antrea: jump to Antrea mangle rules" -j ANTREA-MANGLE --A ANTREA-MANGLE -d 200.200.0.0/16 -i antrea-gw0 -m comment --comment "Antrea: mark pod to service packets" -j MARK --set-xmark 0x800/0x800 --A ANTREA-MANGLE ! -d 200.200.0.0/16 -i antrea-gw0 -m comment --comment "Antrea: unmark post LB service packets" -j MARK --set-xmark 0x0/0xffffffff -` - expectedIPTables["raw"] = `:ANTREA-RAW - [0:0] --A PREROUTING -m comment --comment "Antrea: jump to Antrea raw rules" -j ANTREA-RAW --A ANTREA-RAW -i antrea-gw0 -m comment --comment "Antrea: reentry pod traffic skip conntrack" -m mac --mac-source DE:AD:BE:EF:DE:AD -j CT --notrack -` - } for table, expectedData := range expectedIPTables { // #nosec G204: ignore in test code @@ -221,18 +163,13 @@ func TestAddAndDeleteRoutes(t *testing.T) { peerCIDR string peerIP net.IP // expectations - expRoutes map[int]netlink.Link // keyed on rt id, and val indicates outbound dev + uplink netlink.Link // indicates outbound of the route. }{ - {mode: config.TrafficEncapModeEncap, peerCIDR: "10.10.20.0/24", peerIP: localPeerIP, - expRoutes: map[int]netlink.Link{mainTblIdx: gwLink}}, - {mode: config.TrafficEncapModeNoEncap, peerCIDR: "10.10.30.0/24", peerIP: localPeerIP, - expRoutes: map[int]netlink.Link{svcTblIdx: gwLink, mainTblIdx: nodeLink}}, - {mode: config.TrafficEncapModeNoEncap, peerCIDR: "10.10.40.0/24", peerIP: remotePeerIP, - expRoutes: map[int]netlink.Link{svcTblIdx: gwLink}}, - {mode: config.TrafficEncapModeHybrid, peerCIDR: "10.10.50.0/24", peerIP: localPeerIP, - expRoutes: map[int]netlink.Link{svcTblIdx: gwLink, mainTblIdx: nodeLink}}, - {mode: config.TrafficEncapModeHybrid, peerCIDR: "10.10.60.0/24", peerIP: remotePeerIP, - expRoutes: map[int]netlink.Link{svcTblIdx: gwLink, mainTblIdx: gwLink}}, + {mode: config.TrafficEncapModeEncap, peerCIDR: "10.10.20.0/24", peerIP: localPeerIP, uplink: gwLink}, + {mode: config.TrafficEncapModeNoEncap, peerCIDR: "10.10.30.0/24", peerIP: localPeerIP, uplink: nodeLink}, + {mode: config.TrafficEncapModeNoEncap, peerCIDR: "10.10.40.0/24", peerIP: remotePeerIP, uplink: nil}, + {mode: config.TrafficEncapModeHybrid, peerCIDR: "10.10.50.0/24", peerIP: localPeerIP, uplink: nodeLink}, + {mode: config.TrafficEncapModeHybrid, peerCIDR: "10.10.60.0/24", peerIP: remotePeerIP, uplink: gwLink}, } for _, tc := range tcs { @@ -251,22 +188,23 @@ func TestAddAndDeleteRoutes(t *testing.T) { t.Errorf("route add failed with err %v", err) } - for tblIdx, link := range tc.expRoutes { + expRouteStr := "" + if tc.uplink != nil { nhIP := nhCIDRIP onlink := "onlink" - if link.Attrs().Name != gwName { + if tc.uplink.Attrs().Name != gwName { nhIP = tc.peerIP onlink = "" } - expRouteStr := fmt.Sprintf("%s via %s dev %s %s", peerCIDR, nhIP, link.Attrs().Name, onlink) + expRouteStr = fmt.Sprintf("%s via %s dev %s %s", peerCIDR, nhIP, tc.uplink.Attrs().Name, onlink) expRouteStr = strings.Join(strings.Fields(expRouteStr), "") - ipRoute, _ := ExecOutputTrim(fmt.Sprintf("ip route show table %d | grep %s", tblIdx, tc.peerCIDR)) - if len(ipRoute) > len(expRouteStr) { - ipRoute = ipRoute[:len(expRouteStr)] - } - if !assert.Equal(t, expRouteStr, ipRoute) { - t.Errorf("mismatch route") - } + } + ipRoute, _ := ExecOutputTrim(fmt.Sprintf("ip route show | grep %s", tc.peerCIDR)) + if len(ipRoute) > len(expRouteStr) { + ipRoute = ipRoute[:len(expRouteStr)] + } + if !assert.Equal(t, expRouteStr, ipRoute) { + t.Errorf("mismatch route") } entries, err := ipset.ListEntries("ANTREA-POD-IP") @@ -305,7 +243,7 @@ func TestReconcile(t *testing.T) { addedRoutes []peer desiredPeerCIDRs []string // expectations - expRouteNum map[string]int + expRoutes map[string]netlink.Link }{ { mode: config.TrafficEncapModeEncap, @@ -314,7 +252,7 @@ func TestReconcile(t *testing.T) { {peerCIDR: "10.10.30.0/24", peerIP: ip.NextIP((remotePeerIP))}, }, desiredPeerCIDRs: []string{"10.10.20.0/24"}, - expRouteNum: map[string]int{"10.10.20.0/24": 1, "10.10.30.0/24": 0}, + expRoutes: map[string]netlink.Link{"10.10.20.0/24": gwLink, "10.10.30.0/24": nil}, }, { mode: config.TrafficEncapModeNoEncap, @@ -323,7 +261,7 @@ func TestReconcile(t *testing.T) { {peerCIDR: "10.10.30.0/24", peerIP: ip.NextIP((localPeerIP))}, }, desiredPeerCIDRs: []string{"10.10.20.0/24"}, - expRouteNum: map[string]int{"10.10.20.0/24": 2, "10.10.30.0/24": 0}, + expRoutes: map[string]netlink.Link{"10.10.20.0/24": nodeLink, "10.10.30.0/24": nil}, }, { mode: config.TrafficEncapModeHybrid, @@ -334,11 +272,12 @@ func TestReconcile(t *testing.T) { {peerCIDR: "10.10.50.0/24", peerIP: ip.NextIP((remotePeerIP))}, }, desiredPeerCIDRs: []string{"10.10.20.0/24", "10.10.40.0/24"}, - expRouteNum: map[string]int{"10.10.20.0/24": 2, "10.10.30.0/24": 0, "10.10.40.0/24": 2, "10.10.50.0/24": 0}, + expRoutes: map[string]netlink.Link{"10.10.20.0/24": nodeLink, "10.10.30.0/24": nil, "10.10.40.0/24": gwLink, "10.10.50.0/24": nil}, }, } for _, tc := range tcs { + t.Logf("Running test with mode %s added routes %v desired routes %v", tc.mode, tc.addedRoutes, tc.desiredPeerCIDRs) routeClient, err := route.NewClient(serviceCIDR, tc.mode) if err != nil { t.Error(err) @@ -359,7 +298,14 @@ func TestReconcile(t *testing.T) { t.Errorf("Reconcile failed with err %v", err) } - for dst, expNum := range tc.expRouteNum { + for dst, uplink := range tc.expRoutes { + expNum := 0 + if uplink != nil { + output, err := ExecOutputTrim(fmt.Sprintf("ip route show table 0 exact %s", dst)) + assert.NoError(t, err) + assert.Contains(t, output, fmt.Sprintf("dev%s", uplink.Attrs().Name)) + expNum = 1 + } output, err := ExecOutputTrim(fmt.Sprintf("ip route show table 0 exact %s | wc -l", dst)) assert.NoError(t, err) assert.Equal(t, fmt.Sprint(expNum), output, "mismatch number of routes to %s", dst) @@ -387,7 +333,7 @@ func TestRouteTablePolicyOnly(t *testing.T) { if err := routeClient.Initialize(nodeConfig); err != nil { t.Error(err) } - //verify gw IP + // Verify gw IP gwName := nodeConfig.GatewayConfig.Name gwIPOut, err := ExecOutputTrim(fmt.Sprintf("ip addr show %s", gwName)) if err != nil { @@ -398,17 +344,6 @@ func TestRouteTablePolicyOnly(t *testing.T) { Mask: net.CIDRMask(32, 32), } assert.Contains(t, gwIPOut, gwIP.String()) - // verify default routes and neigh - expRoute := strings.Join(strings.Fields( - "default via 169.254.253.1 dev antrea-gw0 onlink"), "") - routeOut, err := ExecOutputTrim(fmt.Sprintf("ip route show table %d", svcTblIdx)) - require.Nil(t, err, "error when running 'ip route show'") - assert.Equal(t, expRoute, routeOut) - expNeigh := strings.Join(strings.Fields( - "169.254.253.1 dev antrea-gw0 lladdr 12:34:56:78:9a:bc PERMANENT"), "") - neighOut, err := ExecOutputTrim(fmt.Sprintf("ip neigh | grep %s", gwName)) - require.Nil(t, err, "error when running 'ip neigh'") - assert.Equal(t, expNeigh, neighOut) cLink := &netlink.Dummy{} cLink.Name = "containerLink" @@ -439,7 +374,7 @@ func TestRouteTablePolicyOnly(t *testing.T) { if err := routeClient.MigrateRoutesToGw(cLink.Name); err != nil { t.Error(err) } - expRoute = strings.Join(strings.Fields( + expRoute := strings.Join(strings.Fields( fmt.Sprintf("%s dev %s scope link", hostRt.IP, gwName)), "") output, _ := ExecOutputTrim(fmt.Sprintf("ip route show")) assert.Containsf(t, output, expRoute, output)