Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

connectivity: Automatically install IP routes on nodes w/o Cilium for from CIDR tests AND get rid of --datapath #1579

Merged
merged 11 commits into from
May 25, 2023
64 changes: 6 additions & 58 deletions .github/workflows/kind.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ env:
jobs:
installation-and-connectivity:
runs-on: ubuntu-22.04
timeout-minutes: 40
timeout-minutes: 50
strategy:
matrix:
mode: ["classic", "helm"]
Expand Down Expand Up @@ -73,59 +73,6 @@ jobs:
kubectl label nodes "${node}" cilium.io/no-schedule=true
done

# This is needed for the tests that run with nodes without Cilium.
- name: Install static routes
run: |
EXTERNAL_FROM_CIDRS=($(kubectl get nodes -o jsonpath='{range .items[*]}{.spec.podCIDR}{"\n"}{end}'))
EXTERNAL_NODE_IPS=() # Nodes IPs are collected to be passed to the Cilium CLI later on.

# Loop over each pod CIDR from all nodes.
for i in "${!EXTERNAL_FROM_CIDRS[@]}"; do
EXTERNAL_FROM_CIDR=${EXTERNAL_FROM_CIDRS[i]}

if [[ $EXTERNAL_FROM_CIDR =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/[0-9]+$ ]]; then
IP_FAMILY="v4"
elif [[ $EXTERNAL_FROM_CIDR =~ ^[0-9a-fA-F:]+/[0-9]+$ ]]; then
IP_FAMILY="v6"
else
echo "ERROR: Malformed pod CIDR '${EXTERNAL_FROM_CIDR}'" >&2
exit 1
fi

IFS=',' read -ra NODES_WITHOUT <<< "$NODES_WITHOUT_CILIUM" # Split by comma into an array
for WITHOUT in "${NODES_WITHOUT[@]}"; do
# Fetch node with a specific pod CIDR.
node=$(kubectl get nodes -o jsonpath="{range .items[?(@.spec.podCIDR == '$EXTERNAL_FROM_CIDR')]}{@.metadata.name}{end}")
if [[ -z "$node" ]]; then
echo "ERROR: Could not find node with .spec.podCIDR matching ${EXTERNAL_FROM_CIDR}" >&2
exit 1
fi
for NODE_IP in $(kubectl get node "$node" -o jsonpath="{.status.addresses[?(@.type == 'InternalIP')].address}"); do
# Skip if the node IP's family mismatches with pod CIDR's
# family. Cannot create a route with the gateway IP family
# mismatching the subnet.
if [[ "$IP_FAMILY" == "v4" && ! "$NODE_IP" =~ \. ]]; then
continue
elif [[ "$IP_FAMILY" == "v6" && ! "$NODE_IP" =~ \: ]]; then
continue
fi
# Install static route on the host, towards the pod CIDR via the node IP.
docker exec "$WITHOUT" ip route replace "${EXTERNAL_FROM_CIDR}" via "${NODE_IP}"
EXTERNAL_NODE_IPS+=("${NODE_IP}")
done
done
done

# Join the elements with a comma delimiter, or leave them unmodified
# if there's only one element so that it can be passed properly to
# the CLI.
if [[ ${#EXTERNAL_NODE_IPS[@]} -eq 1 ]]; then
EXTERNAL_NODE_IPS_PARAM="${EXTERNAL_NODE_IPS[0]}"
else
EXTERNAL_NODE_IPS_PARAM=$(IFS=','; echo "${EXTERNAL_NODE_IPS[*]}")
fi
echo "EXTERNAL_NODE_IPS_PARAM=${EXTERNAL_NODE_IPS_PARAM}" >> $GITHUB_ENV

# Install Cilium with HostPort support and enables Prometheus for extended connectivity test.
- name: Install Cilium
run: |
Expand Down Expand Up @@ -154,7 +101,7 @@ jobs:
run: |
# Run the connectivity test in non-default namespace (i.e. not cilium-test)
cilium connectivity test --debug --all-flows --test-namespace test-namespace \
--external-from-cidrs="${EXTERNAL_NODE_IPS_PARAM}" \
--include-unsafe-tests \
--collect-sysdump-on-failure --junit-file connectivity-${{ matrix.mode }}.xml

- name: Upload junit output
Expand Down Expand Up @@ -205,7 +152,7 @@ jobs:
- name: Connectivity test
run: |
cilium connectivity test --debug --force-deploy --all-flows --test-namespace test-namespace \
--external-from-cidrs="${EXTERNAL_NODE_IPS_PARAM}" \
--include-unsafe-tests \
--collect-sysdump-on-failure --junit-file connectivity-ipsec-${{ matrix.mode }}.xml

- name: Upload junit output
Expand Down Expand Up @@ -242,7 +189,7 @@ jobs:

helm-upgrade-clustermesh:
runs-on: ubuntu-22.04
timeout-minutes: 40
timeout-minutes: 50

env:
CILIUM_CLI_MODE: helm
Expand Down Expand Up @@ -356,7 +303,8 @@ jobs:
- name: Run the multicluster connectivity tests
run: |
cilium connectivity test --context $CLUSTER1 --multi-cluster $CLUSTER2 --debug \
--collect-sysdump-on-failure --junit-file connectivity-clustermesh.xml
--include-unsafe-tests \
--collect-sysdump-on-failure --junit-file connectivity-clustermesh.xml

- name: Upload junit output
if: ${{ always() }}
Expand Down
5 changes: 1 addition & 4 deletions connectivity/check/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -810,10 +810,7 @@ func (a *Action) followFlows(ctx context.Context, ready chan bool) error {
// All tests are initiated from the source Pod, so filtering traffic
// originating from and destined to the Pod should capture what we need.
pod := a.Source()
filter := []*flow.FlowFilter{
{SourcePod: []string{pod.Name()}},
{DestinationPod: []string{pod.Name()}},
}
filter := pod.FlowFilters()

// Initiate long-poll against Hubble Relay.
b, err := hubbleClient.GetFlows(ctx, &observer.GetFlowsRequest{
Expand Down
16 changes: 13 additions & 3 deletions connectivity/check/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ type Parameters struct {
JSONMockImage string
AgentDaemonSetName string
DNSTestServerImage string
Datapath bool
IncludeUnsafeTests bool
AgentPodSelector string
NodeSelector map[string]string
ExternalTarget string
ExternalCIDR string
ExternalIP string
ExternalOtherIP string
ExternalFromCIDRs []string
ExternalFromCIDRMasks []int // Derived from ExternalFromCIDRs
PodCIDRs []podCIDRs
NodesWithoutCiliumIPs []nodesWithoutCiliumIP
JunitFile string

K8sVersion string
Expand All @@ -75,6 +75,16 @@ type Parameters struct {
SysdumpOptions sysdump.Options
}

type podCIDRs struct {
CIDR string
HostIP string
}

type nodesWithoutCiliumIP struct {
IP string
Mask int
}

func (p Parameters) ciliumEndpointTimeout() time.Duration {
return 5 * time.Minute
}
Expand Down
74 changes: 62 additions & 12 deletions connectivity/check/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ type ConnectivityTest struct {

lastFlowTimestamps map[string]time.Time

nodes map[string]*corev1.Node
nodesWithoutCilium []string
nodes map[string]*corev1.Node
nodesWithoutCilium []string
nodesWithoutCiliumMap map[string]struct{}

manifests map[string]string
helmYAMLValues string
Expand Down Expand Up @@ -308,8 +309,12 @@ func (ct *ConnectivityTest) SetupAndValidate(ctx context.Context) error {
}
}
if ct.features.MatchRequirements(RequireFeatureEnabled(FeatureNodeWithoutCilium)) {
if err := ct.validateExternalFromCIDRsWithNodesWithoutCilium(); err != nil {
return fmt.Errorf("invalid configuration for nodes without Cilium: %w", err)
if err := ct.detectPodCIDRs(ctx); err != nil {
return fmt.Errorf("unable to detect pod CIDRs: %w", err)
}

if err := ct.detectNodesWithoutCiliumIPs(); err != nil {
return fmt.Errorf("unable to detect nodes w/o Cilium IPs: %w", err)
}
}
return nil
Expand Down Expand Up @@ -544,17 +549,62 @@ func (ct *ConnectivityTest) enableHubbleClient(ctx context.Context) error {
return nil
}

func (ct *ConnectivityTest) validateExternalFromCIDRsWithNodesWithoutCilium() error {
if len(ct.params.ExternalFromCIDRs) == 0 {
ct.Fatalf("--%s must not be empty if Cilium was install with --%s set", "external-from-cidrs", "nodes-without-cilium")
func (ct *ConnectivityTest) detectPodCIDRs(ctx context.Context) error {
nodes, err := ct.client.ListNodes(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("failed to list nodes: %w", err)
}
for _, cidr := range ct.params.ExternalFromCIDRs {
addr, err := netip.ParseAddr(cidr)
if err != nil {
return fmt.Errorf("unable to parse external-from-cidr %q: %w", cidr, err)

for _, n := range nodes.Items {
for _, cidr := range n.Spec.PodCIDRs {
f := GetIPFamily(ct.hostNetNSPodsByNode[n.Name].Pod.Status.HostIP)
if strings.Contains(cidr, ":") && f == IPFamilyV4 {
// Skip if the host IP of the pod mismatches with pod CIDR.
// Cannot create a route with the gateway IP family
// mismatching the subnet.
continue
}
hostIP := ct.hostNetNSPodsByNode[n.Name].Pod.Status.HostIP
ct.params.PodCIDRs = append(ct.params.PodCIDRs, podCIDRs{cidr, hostIP})
}
ct.params.ExternalFromCIDRMasks = append(ct.params.ExternalFromCIDRMasks, addr.BitLen())
}

return nil
}

func (ct *ConnectivityTest) detectNodesWithoutCiliumIPs() error {
for _, n := range ct.nodesWithoutCilium {
pod := ct.hostNetNSPodsByNode[n]
for _, ip := range pod.Pod.Status.PodIPs {
viktor-kurchenko marked this conversation as resolved.
Show resolved Hide resolved
hostIP, err := netip.ParseAddr(ip.IP)
if err != nil {
return fmt.Errorf("unable to parse nodes without Cilium IP addr %q: %w", ip.IP, err)
}
ct.params.NodesWithoutCiliumIPs = append(ct.params.NodesWithoutCiliumIPs,
nodesWithoutCiliumIP{ip.IP, hostIP.BitLen()})
}
}

return nil
}

func (ct *ConnectivityTest) modifyStaticRoutesForNodesWithoutCilium(ctx context.Context, verb string) error {
for _, e := range ct.params.PodCIDRs {
for _, withoutCilium := range ct.nodesWithoutCilium {
pod := ct.hostNetNSPodsByNode[withoutCilium]
_, err := ct.client.ExecInPod(ctx, pod.Pod.Namespace, pod.Pod.Name, hostNetNSDeploymentNameNonCilium,
[]string{"ip", "route", verb, e.CIDR, "via", e.HostIP},
)
ct.Debugf("Modifying (%s) static route on nodes without Cilium (%v): %v",
verb, withoutCilium,
[]string{"ip", "route", verb, e.CIDR, "via", e.HostIP},
)
if err != nil {
return fmt.Errorf("failed to %s static route: %w", verb, err)
}
}
}

return nil
}

Expand Down
Loading