From 87c686831248f24b486b27cd8958c003e2e5c6be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E7=A5=96=E5=BB=BA?= Date: Sat, 6 May 2023 13:26:22 +0800 Subject: [PATCH] iptables: use the same mode with kube-proxy (#2758) * iptables: use the same mode with kube-proxy (#2535) * build base images for pr on necessary --- .github/workflows/build-x86-image.yaml | 64 +++++- Makefile | 26 ++- dist/images/Dockerfile | 1 + dist/images/Dockerfile.base | 3 - dist/images/iptables-wrapper-installer.sh | 211 ++++++++++++++++++ dist/images/start-cniserver.sh | 3 + dist/images/start-ovs.sh | 3 + go.mod | 2 +- go.sum | 4 +- pkg/daemon/controller_linux.go | 70 +++++- pkg/daemon/gateway_linux.go | 259 +++++++++++++++------- 11 files changed, 532 insertions(+), 114 deletions(-) create mode 100755 dist/images/iptables-wrapper-installer.sh diff --git a/.github/workflows/build-x86-image.yaml b/.github/workflows/build-x86-image.yaml index d788fc6db1c..730e057fb3d 100644 --- a/.github/workflows/build-x86-image.yaml +++ b/.github/workflows/build-x86-image.yaml @@ -25,9 +25,46 @@ env: HELM_VERSION: v3.11.1 jobs: + build-kube-ovn-base: + name: Build kube-ovn-base + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 2 + - uses: docker/setup-buildx-action@v2 + if: github.base_ref != null + - name: Build + run: | + touch .CI_PASSED_VAR + if git diff --name-only HEAD^ HEAD | grep -q ^dist/images/Dockerfile.base$; then + echo "BUILD_BASE=1" > .CI_PASSED_VAR + echo "BUILD_BASE=1" >> "$GITHUB_ENV" + make base-amd64 + make base-tar-amd64 + fi + if git diff --name-only HEAD^ HEAD | grep -q ^dist/images/Dockerfile.base-dpdk$; then + make base-amd64-dpdk + fi + + - name: Upload variable file to artifact + uses: actions/upload-artifact@v3 + with: + name: variables + path: .CI_PASSED_VAR + + - name: Upload base images to artifact + if: env.BUILD_BASE == 1 + uses: actions/upload-artifact@v3 + with: + name: kube-ovn-base + path: image-amd64.tar + build-kube-ovn: name: Build kube-ovn runs-on: ubuntu-22.04 + needs: + - build-kube-ovn-base steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 @@ -62,12 +99,37 @@ jobs: install "$tmp/gosec" /usr/local/bin rm -rf $tmp + - name: Download variable file + uses: actions/download-artifact@v3 + with: + name: variables + + - name: Export passed variables + run: cat .CI_PASSED_VAR >> "$GITHUB_ENV" + + - name: Download base images + if: env.BUILD_BASE == 1 + uses: actions/download-artifact@v3 + with: + name: kube-ovn-base + + - name: Load base images + if: env.BUILD_BASE == 1 + run: docker load --input image-amd64.tar + - name: Build run: | go mod tidy git diff --exit-code make lint - make image-kube-ovn + if [ "x${{ env.BUILD_BASE }}" = "x1" ]; then + TAG=$(cat VERSION) + docker tag kubeovn/kube-ovn-base:$TAG-amd64 kubeovn/kube-ovn-base:$TAG + docker tag kubeovn/kube-ovn-base:$TAG-amd64-no-avx512 kubeovn/kube-ovn-base:$TAG-no-avx512 + make build-kube-ovn + else + make image-kube-ovn + fi make tar-kube-ovn - name: Upload images to artifact diff --git a/Makefile b/Makefile index 9f3cd56248f..5222e01a74a 100644 --- a/Makefile +++ b/Makefile @@ -46,9 +46,15 @@ build-go-arm: CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -ldflags $(GOLDFLAGS) -v ./cmd CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-webhook -ldflags $(GOLDFLAGS) -v ./cmd/webhook +.PHONY: build-kube-ovn +build-kube-ovn: build-go + docker build -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG) -f dist/images/Dockerfile dist/images/ + docker build -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG)-no-avx512 -f dist/images/Dockerfile.no-avx512 dist/images/ + docker build -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG)-dpdk -f dist/images/Dockerfile.dpdk dist/images/ + .PHONY: build-dev build-dev: build-go - docker build --build-arg ARCH=amd64 -t $(REGISTRY)/kube-ovn:$(DEV_TAG) -f dist/images/Dockerfile dist/images/ + docker build -t $(REGISTRY)/kube-ovn:$(DEV_TAG) -f dist/images/Dockerfile dist/images/ .PHONY: build-dpdk build-dpdk: @@ -69,9 +75,9 @@ base-arm64: .PHONY: image-kube-ovn image-kube-ovn: build-go - docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG) -o type=docker -f dist/images/Dockerfile dist/images/ - docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG)-no-avx512 -o type=docker -f dist/images/Dockerfile.no-avx512 dist/images/ - docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG)-dpdk -o type=docker -f dist/images/Dockerfile.dpdk dist/images/ + docker buildx build --platform linux/amd64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG) -o type=docker -f dist/images/Dockerfile dist/images/ + docker buildx build --platform linux/amd64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG)-no-avx512 -o type=docker -f dist/images/Dockerfile.no-avx512 dist/images/ + docker buildx build --platform linux/amd64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG)-dpdk -o type=docker -f dist/images/Dockerfile.dpdk dist/images/ .PHONY: image-debug image-debug: build-go @@ -79,24 +85,24 @@ image-debug: build-go .PHONY: image-vpc-nat-gateway image-vpc-nat-gateway: - docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/vpc-nat-gateway:$(RELEASE_TAG) -o type=docker -f dist/images/vpcnatgateway/Dockerfile dist/images/vpcnatgateway + docker buildx build --platform linux/amd64 -t $(REGISTRY)/vpc-nat-gateway:$(RELEASE_TAG) -o type=docker -f dist/images/vpcnatgateway/Dockerfile dist/images/vpcnatgateway .PHONY: image-centos-compile image-centos-compile: - docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/centos7-compile:$(RELEASE_TAG) -o type=docker -f dist/images/compile/centos7/Dockerfile fastpath/ - # docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/centos8-compile:$(RELEASE_TAG) -o type=docker -f dist/images/compile/centos8/Dockerfile fastpath/ + docker buildx build --platform linux/amd64 -t $(REGISTRY)/centos7-compile:$(RELEASE_TAG) -o type=docker -f dist/images/compile/centos7/Dockerfile fastpath/ + # docker buildx build --platform linux/amd64 -t $(REGISTRY)/centos8-compile:$(RELEASE_TAG) -o type=docker -f dist/images/compile/centos8/Dockerfile fastpath/ .PHOONY: image-test image-test: build-go - docker buildx build --platform linux/amd64 --build-arg ARCH=amd64 -t $(REGISTRY)/test:$(RELEASE_TAG) -o type=docker -f dist/images/Dockerfile.test dist/images/ + docker buildx build --platform linux/amd64 -t $(REGISTRY)/test:$(RELEASE_TAG) -o type=docker -f dist/images/Dockerfile.test dist/images/ .PHONY: release release: lint image-kube-ovn image-vpc-nat-gateway image-centos-compile .PHONY: release-arm release-arm: build-go-arm - docker buildx build --platform linux/arm64 --build-arg ARCH=arm64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG) -o type=docker -f dist/images/Dockerfile dist/images/ - docker buildx build --platform linux/arm64 --build-arg ARCH=arm64 -t $(REGISTRY)/vpc-nat-gateway:$(RELEASE_TAG) -o type=docker -f dist/images/vpcnatgateway/Dockerfile dist/images/vpcnatgateway + docker buildx build --platform linux/arm64 -t $(REGISTRY)/kube-ovn:$(RELEASE_TAG) -o type=docker -f dist/images/Dockerfile dist/images/ + docker buildx build --platform linux/arm64 -t $(REGISTRY)/vpc-nat-gateway:$(RELEASE_TAG) -o type=docker -f dist/images/vpcnatgateway/Dockerfile dist/images/vpcnatgateway .PHONY: push-dev push-dev: diff --git a/dist/images/Dockerfile b/dist/images/Dockerfile index e819451e247..a20ce5f5ff3 100644 --- a/dist/images/Dockerfile +++ b/dist/images/Dockerfile @@ -9,6 +9,7 @@ COPY grace_stop_ovn_controller /usr/share/ovn/scripts/grace_stop_ovn_controller WORKDIR /kube-ovn +RUN /kube-ovn/iptables-wrapper-installer.sh --no-sanity-check RUN rm -f /usr/bin/nc &&\ rm -f /usr/bin/netcat RUN deluser sync diff --git a/dist/images/Dockerfile.base b/dist/images/Dockerfile.base index 7cac6e37dcb..cb64468424d 100644 --- a/dist/images/Dockerfile.base +++ b/dist/images/Dockerfile.base @@ -83,9 +83,6 @@ RUN apt update && apt upgrade -y && apt install ca-certificates python3 hostname logrotate dnsutils net-tools strongswan strongswan-pki libcharon-extra-plugins \ libcharon-extauth-plugins libstrongswan-extra-plugins libstrongswan-standard-plugins -y --no-install-recommends && \ rm -rf /var/lib/apt/lists/* && \ - cd /usr/sbin && \ - ln -sf /usr/sbin/iptables-legacy iptables && \ - ln -sf /usr/sbin/ip6tables-legacy ip6tables && \ rm -rf /etc/localtime RUN mkdir -p /var/run/openvswitch && \ diff --git a/dist/images/iptables-wrapper-installer.sh b/dist/images/iptables-wrapper-installer.sh new file mode 100755 index 00000000000..8a650434740 --- /dev/null +++ b/dist/images/iptables-wrapper-installer.sh @@ -0,0 +1,211 @@ +#!/bin/sh + +# Copyright 2020 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Usage: +# +# iptables-wrapper-installer.sh [--no-sanity-check] +# +# Installs a wrapper iptables script in a container that will figure out +# whether iptables-legacy or iptables-nft is in use on the host and then +# replaces itself with the correct underlying iptables version. +# +# Unless "--no-sanity-check" is passed, it will first verify that the +# container already contains a suitable version of iptables. + +# NOTE: This can only use POSIX /bin/sh features; the build container +# might not contain bash. + +# original source: +# https://github.com/kubernetes-sigs/iptables-wrappers/blob/master/iptables-wrapper-installer.sh + +set -eu + +# Find iptables binary location +if [ -d /usr/sbin -a -e /usr/sbin/iptables ]; then + sbin="/usr/sbin" +elif [ -d /sbin -a -e /sbin/iptables ]; then + sbin="/sbin" +else + echo "ERROR: iptables is not present in either /usr/sbin or /sbin" 1>&2 + exit 1 +fi + +# Determine how the system selects between iptables-legacy and iptables-nft +if [ -x /usr/sbin/alternatives ]; then + # Fedora/SUSE style alternatives + altstyle="fedora" +elif [ -x /usr/sbin/update-alternatives ]; then + # Debian style alternatives + altstyle="debian" +else + # No alternatives system + altstyle="none" +fi + +if [ "${1:-}" != "--no-sanity-check" ]; then + # Ensure dependencies are installed + if ! version=$("${sbin}/iptables-nft" --version 2> /dev/null); then + echo "ERROR: iptables-nft is not installed" 1>&2 + exit 1 + fi + if ! "${sbin}/iptables-legacy" --version > /dev/null 2>&1; then + echo "ERROR: iptables-legacy is not installed" 1>&2 + exit 1 + fi + + case "${version}" in + *v1.8.[0123]\ *) + echo "ERROR: iptables 1.8.0 - 1.8.3 have compatibility bugs." 1>&2 + echo " Upgrade to 1.8.4 or newer." 1>&2 + exit 1 + ;; + *) + # 1.8.4+ are OK + ;; + esac +fi + +# Start creating the wrapper... +rm -f "${sbin}/iptables-wrapper" +cat > "${sbin}/iptables-wrapper" </dev/null | grep -E '^:(KUBE-IPTABLES-HINT|KUBE-KUBELET-CANARY)' | wc -l) +if [ "\${nft_kubelet_rules}" -ne 0 ]; then + mode=nft +else + # Check for kubernetes 1.17-or-later with iptables-legacy. We + # can't pass "-t mangle" to iptables-legacy-save because it would + # cause the kernel to create that table if it didn't already + # exist, which we don't want. So we have to grab all the rules + legacy_kubelet_rules=\$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep -E '^:(KUBE-IPTABLES-HINT|KUBE-KUBELET-CANARY)' | wc -l) + if [ "\${legacy_kubelet_rules}" -ne 0 ]; then + mode=legacy + else + # With older kubernetes releases there may not be any _specific_ + # rules we can look for, but we assume that some non-containerized process + # (possibly kubelet) will have created _some_ iptables rules. + num_legacy_lines=\$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep '^-' | wc -l) + num_nft_lines=\$( (iptables-nft-save || true; ip6tables-nft-save || true) 2>/dev/null | grep '^-' | wc -l) + if [ "\${num_legacy_lines}" -gt "\${num_nft_lines}" ]; then + mode=legacy + else + mode=nft + fi + fi +fi + +EOF + +# Write out the appropriate alternatives-selection commands +case "${altstyle}" in + fedora) +cat >> "${sbin}/iptables-wrapper" < /dev/null || failed=1 +EOF + ;; + + debian) +cat >> "${sbin}/iptables-wrapper" < /dev/null || failed=1 +update-alternatives --set ip6tables "/usr/sbin/ip6tables-\${mode}" > /dev/null || failed=1 +EOF + ;; + + *) +cat >> "${sbin}/iptables-wrapper" </dev/null || failed=1 +EOF + ;; +esac + +# Write out the post-alternatives-selection error checking and final wrap-up +cat >> "${sbin}/iptables-wrapper" <&2 + # fake it, though this will probably also fail if they aren't root + exec "${sbin}/xtables-\${mode}-multi" "\$0" "\$@" +fi + +# Now re-exec the original command with the newly-selected alternative +exec "\$0" "\$@" +EOF +chmod +x "${sbin}/iptables-wrapper" + +# Now back in the installer script, point the iptables binaries at our +# wrapper +case "${altstyle}" in + fedora) + alternatives \ + --install /usr/sbin/iptables iptables /usr/sbin/iptables-wrapper 100 \ + --slave /usr/sbin/iptables-restore iptables-restore /usr/sbin/iptables-wrapper \ + --slave /usr/sbin/iptables-save iptables-save /usr/sbin/iptables-wrapper \ + --slave /usr/sbin/ip6tables iptables /usr/sbin/iptables-wrapper \ + --slave /usr/sbin/ip6tables-restore iptables-restore /usr/sbin/iptables-wrapper \ + --slave /usr/sbin/ip6tables-save iptables-save /usr/sbin/iptables-wrapper + ;; + + debian) + update-alternatives \ + --install /usr/sbin/iptables iptables /usr/sbin/iptables-wrapper 100 \ + --slave /usr/sbin/iptables-restore iptables-restore /usr/sbin/iptables-wrapper \ + --slave /usr/sbin/iptables-save iptables-save /usr/sbin/iptables-wrapper + update-alternatives \ + --install /usr/sbin/ip6tables ip6tables /usr/sbin/iptables-wrapper 100 \ + --slave /usr/sbin/ip6tables-restore ip6tables-restore /usr/sbin/iptables-wrapper \ + --slave /usr/sbin/ip6tables-save ip6tables-save /usr/sbin/iptables-wrapper + ;; + + *) + for cmd in iptables iptables-save iptables-restore ip6tables ip6tables-save ip6tables-restore; do + rm -f "${sbin}/${cmd}" + ln -s "${sbin}/iptables-wrapper" "${sbin}/${cmd}" + done + ;; +esac + +# Cleanup +rm -f "$0" diff --git a/dist/images/start-cniserver.sh b/dist/images/start-cniserver.sh index 304ec06ee46..0839843a11c 100755 --- a/dist/images/start-cniserver.sh +++ b/dist/images/start-cniserver.sh @@ -41,6 +41,9 @@ do fi done +# update links to point to the iptables binaries +iptables -V + # If nftables not exist do not exit set +e iptables -P FORWARD ACCEPT diff --git a/dist/images/start-ovs.sh b/dist/images/start-ovs.sh index d105c75ba43..607b9c3400f 100755 --- a/dist/images/start-ovs.sh +++ b/dist/images/start-ovs.sh @@ -50,6 +50,9 @@ function quit { } trap quit EXIT +# update links to point to the iptables binaries +iptables -V + # Start ovsdb /usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd --system-id=random # Restrict the number of pthreads ovs-vswitchd creates to reduce the diff --git a/go.mod b/go.mod index 05014c74ea7..873af421df6 100644 --- a/go.mod +++ b/go.mod @@ -12,12 +12,12 @@ require ( github.com/cnf/structhash v0.0.0-20201127153200-e1b16c1ebc08 github.com/containernetworking/cni v1.1.2 github.com/containernetworking/plugins v1.1.1 - github.com/coreos/go-iptables v0.6.0 github.com/docker/docker v20.10.22+incompatible github.com/emicklei/go-restful/v3 v3.10.1 github.com/evanphx/json-patch/v5 v5.6.0 github.com/greenpau/ovsdb v1.0.3 github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0 + github.com/kubeovn/go-iptables v0.0.0-20230322103850-8619a8ab3dca github.com/kubeovn/gonetworkmanager/v2 v2.0.0-20230327064018-0b27f88874f7 github.com/mdlayher/arp v0.0.0-20220512170110-6706a2966875 github.com/moby/sys/mountinfo v0.6.2 diff --git a/go.sum b/go.sum index df4d1638457..2c8a1875ee6 100644 --- a/go.sum +++ b/go.sum @@ -365,8 +365,6 @@ github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-iptables v0.4.5/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= github.com/coreos/go-iptables v0.5.0/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU= -github.com/coreos/go-iptables v0.6.0 h1:is9qnZMPYjLd8LYqmm/qlE+wwEgJIkTYdhV3rfZo4jk= -github.com/coreos/go-iptables v0.6.0/go.mod h1:Qe8Bv2Xik5FyTXwgIbLAnv2sWSBmvWdFETJConOQ//Q= github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= @@ -896,6 +894,8 @@ github.com/kubeovn/arp v0.0.0-20230101053045-8a0772d9c34c h1:AcOKlV+lInNlGO3o3+1 github.com/kubeovn/arp v0.0.0-20230101053045-8a0772d9c34c/go.mod h1:Ce8lvkopTGXfPmeb5AY3/umEOmoFVV3HlCPGfGk0+Y0= github.com/kubeovn/felix v0.0.0-20220325073257-c8a0f705d139 h1:MaLC8/dohKHU8nkfglfE2oikefB6urJG75yZDOcKTRU= github.com/kubeovn/felix v0.0.0-20220325073257-c8a0f705d139/go.mod h1:ulxnUH9cbIOtCH+exhJPeV2mleh+bDv67WKsl/MVU/g= +github.com/kubeovn/go-iptables v0.0.0-20230322103850-8619a8ab3dca h1:fTMjoho2et9nKVOFrjzVEWVd9XD1zzxOYrlxxZpO0fU= +github.com/kubeovn/go-iptables v0.0.0-20230322103850-8619a8ab3dca/go.mod h1:jY1XeGzkx8ASNJ+SqQSxTESNXARkjvt+I6IJOTnzIjw= github.com/kubeovn/gonetworkmanager/v2 v2.0.0-20230327064018-0b27f88874f7 h1:X5/DAYXXe8p3mUz3Z+j0dsgpIUPiNhaq0f7D1Z9/8CY= github.com/kubeovn/gonetworkmanager/v2 v2.0.0-20230327064018-0b27f88874f7/go.mod h1:rNwHas8aX9k/BEz5dwObhRvfV7KEd0MnrTTDd4gQ3D0= github.com/kubeovn/kubevirt-client-go v0.0.0-20221209084839-9c2ed1f0604d h1:sM7V2MhONBa10zYQA1yg/UbPm/Y7JqVqymtgoDiGqMo= diff --git a/pkg/daemon/controller_linux.go b/pkg/daemon/controller_linux.go index e754965f704..c6e95252ea5 100644 --- a/pkg/daemon/controller_linux.go +++ b/pkg/daemon/controller_linux.go @@ -14,7 +14,7 @@ import ( "syscall" "github.com/alauda/felix/ipsets" - "github.com/coreos/go-iptables/iptables" + "github.com/kubeovn/go-iptables/iptables" "github.com/vishvananda/netlink" v1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -30,29 +30,77 @@ import ( // ControllerRuntime represents runtime specific controller members type ControllerRuntime struct { - iptables map[string]*iptables.IPTables - ipsets map[string]*ipsets.IPSets + iptables map[string]*iptables.IPTables + iptablesObsolete map[string]*iptables.IPTables + ipsets map[string]*ipsets.IPSets +} + +func evalCommandSymlinks(cmd string) (string, error) { + path, err := exec.LookPath(cmd) + if err != nil { + return "", fmt.Errorf("failed to search for command %q: %v", cmd, err) + } + file, err := filepath.EvalSymlinks(path) + if err != nil { + return "", fmt.Errorf("failed to read evaluate symbolic links for file %q: %v", path, err) + } + + return file, nil +} + +func isLegacyIptablesMode() (bool, error) { + path, err := evalCommandSymlinks("iptables") + if err != nil { + return false, err + } + pathLegacy, err := evalCommandSymlinks("iptables-legacy") + if err != nil { + return false, err + } + return path == pathLegacy, nil } func (c *Controller) initRuntime() error { - c.ControllerRuntime.iptables = make(map[string]*iptables.IPTables) - c.ControllerRuntime.ipsets = make(map[string]*ipsets.IPSets) + ok, err := isLegacyIptablesMode() + if err != nil { + klog.Errorf("failed to check iptables mode: %v", err) + return err + } + if !ok { + // iptables works in nft mode, we should migrate iptables rules + c.iptablesObsolete = make(map[string]*iptables.IPTables, 2) + } + + c.iptables = make(map[string]*iptables.IPTables) + c.ipsets = make(map[string]*ipsets.IPSets) if c.protocol == kubeovnv1.ProtocolIPv4 || c.protocol == kubeovnv1.ProtocolDual { - iptables, err := iptables.NewWithProtocol(iptables.ProtocolIPv4) + ipt, err := iptables.NewWithProtocol(iptables.ProtocolIPv4) if err != nil { return err } - c.ControllerRuntime.iptables[kubeovnv1.ProtocolIPv4] = iptables - c.ControllerRuntime.ipsets[kubeovnv1.ProtocolIPv4] = ipsets.NewIPSets(ipsets.NewIPVersionConfig(ipsets.IPFamilyV4, IPSetPrefix, nil, nil)) + c.iptables[kubeovnv1.ProtocolIPv4] = ipt + if c.iptablesObsolete != nil { + if ipt, err = iptables.NewWithProtocolAndMode(iptables.ProtocolIPv4, "legacy"); err != nil { + return err + } + c.iptablesObsolete[kubeovnv1.ProtocolIPv4] = ipt + } + c.ipsets[kubeovnv1.ProtocolIPv4] = ipsets.NewIPSets(ipsets.NewIPVersionConfig(ipsets.IPFamilyV4, IPSetPrefix, nil, nil)) } if c.protocol == kubeovnv1.ProtocolIPv6 || c.protocol == kubeovnv1.ProtocolDual { - iptables, err := iptables.NewWithProtocol(iptables.ProtocolIPv6) + ipt, err := iptables.NewWithProtocol(iptables.ProtocolIPv6) if err != nil { return err } - c.ControllerRuntime.iptables[kubeovnv1.ProtocolIPv6] = iptables - c.ControllerRuntime.ipsets[kubeovnv1.ProtocolIPv6] = ipsets.NewIPSets(ipsets.NewIPVersionConfig(ipsets.IPFamilyV6, IPSetPrefix, nil, nil)) + c.iptables[kubeovnv1.ProtocolIPv6] = ipt + if c.iptablesObsolete != nil { + if ipt, err = iptables.NewWithProtocolAndMode(iptables.ProtocolIPv6, "legacy"); err != nil { + return err + } + c.iptablesObsolete[kubeovnv1.ProtocolIPv6] = ipt + } + c.ipsets[kubeovnv1.ProtocolIPv6] = ipsets.NewIPSets(ipsets.NewIPVersionConfig(ipsets.IPFamilyV6, IPSetPrefix, nil, nil)) } return nil diff --git a/pkg/daemon/gateway_linux.go b/pkg/daemon/gateway_linux.go index 25b5c3d8088..67b03f97bf0 100644 --- a/pkg/daemon/gateway_linux.go +++ b/pkg/daemon/gateway_linux.go @@ -12,6 +12,7 @@ import ( "syscall" "github.com/alauda/felix/ipsets" + "github.com/kubeovn/go-iptables/iptables" "github.com/vishvananda/netlink" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -300,8 +301,8 @@ func (c *Controller) deletePolicyRouting(family int, gateway string, priority, t return nil } -func (c *Controller) createIptablesRule(protocol string, rule util.IPTableRule) error { - exists, err := c.iptables[protocol].Exists(rule.Table, rule.Chain, rule.Rule...) +func (c *Controller) createIptablesRule(ipt *iptables.IPTables, rule util.IPTableRule) error { + exists, err := ipt.Exists(rule.Table, rule.Chain, rule.Rule...) if err != nil { klog.Errorf("failed to check iptables rule existence: %v", err) return err @@ -314,7 +315,7 @@ func (c *Controller) createIptablesRule(protocol string, rule util.IPTableRule) } klog.Infof(`creating iptables rules: "%s"`, s) - if err = c.iptables[protocol].Insert(rule.Table, rule.Chain, 1, rule.Rule...); err != nil { + if err = ipt.Insert(rule.Table, rule.Chain, 1, rule.Rule...); err != nil { klog.Errorf(`failed to insert iptables rule "%s": %v`, s, err) return err } @@ -322,14 +323,14 @@ func (c *Controller) createIptablesRule(protocol string, rule util.IPTableRule) return nil } -func (c *Controller) updateIptablesChain(protocol, table, chain, parent string, rules []util.IPTableRule) error { - ok, err := c.iptables[protocol].ChainExists(table, chain) +func (c *Controller) updateIptablesChain(ipt *iptables.IPTables, table, chain, parent string, rules []util.IPTableRule) error { + ok, err := ipt.ChainExists(table, chain) if err != nil { klog.Errorf("failed to check existence of iptables chain %s in table %s: %v", chain, table, err) return err } if !ok { - if err = c.iptables[protocol].NewChain(table, chain); err != nil { + if err = ipt.NewChain(table, chain); err != nil { klog.Errorf("failed to create iptables chain %s in table %s: %v", chain, table, err) return err } @@ -342,13 +343,13 @@ func (c *Controller) updateIptablesChain(protocol, table, chain, parent string, Chain: parent, Rule: []string{"-m", "comment", "--comment", comment, "-j", chain}, } - if err = c.createIptablesRule(protocol, rule); err != nil { + if err = c.createIptablesRule(ipt, rule); err != nil { klog.Errorf("failed to create iptables rule: %v", err) return err } // list existing rules - ruleList, err := c.iptables[protocol].List(table, chain) + ruleList, err := ipt.List(table, chain) if err != nil { klog.Errorf("failed to list iptables rules in chain %s/%s: %v", table, chain, err) return err @@ -370,7 +371,7 @@ func (c *Controller) updateIptablesChain(protocol, table, chain, parent string, klog.V(5).Infof("iptables rule %v already exists", rule.Rule) continue } - if err = c.iptables[protocol].Insert(table, chain, i+1, rule.Rule...); err != nil { + if err = ipt.Insert(table, chain, i+1, rule.Rule...); err != nil { klog.Errorf(`failed to insert iptables rule %v: %v`, rule.Rule, err) return err } @@ -378,7 +379,7 @@ func (c *Controller) updateIptablesChain(protocol, table, chain, parent string, added++ } for i := len(existingRules) - 1; i >= len(rules)-added; i-- { - if err = c.iptables[protocol].Delete(table, chain, strconv.Itoa(i+added+1)); err != nil { + if err = ipt.Delete(table, chain, strconv.Itoa(i+added+1)); err != nil { klog.Errorf(`failed to delete iptables rule %v: %v`, existingRules[i], err) return err } @@ -410,49 +411,6 @@ func (c *Controller) setIptables() error { klog.V(3).Infof("centralized subnets nat ips %v", centralGwNatIPs) var ( - v4AbandonedRules = []util.IPTableRule{ - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x40000/0x40000 -j MASQUERADE`)}, - {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn40subnets src -m set --match-set ovn40services dst -j MARK --set-xmark 0x40000/0x40000`)}, - // legacy rules - // nat packets marked by kube-proxy or kube-ovn - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x4000/0x4000 -j MASQUERADE`)}, - // nat service traffic - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn40subnets src -m set --match-set ovn40subnets dst -j MASQUERADE`)}, - // do not nat node port service traffic with external traffic policy set to local - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -m set --match-set ovn40subnets-distributed-gw dst -j RETURN`)}, - // nat node port service traffic with external traffic policy set to local for subnets with centralized gateway - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -j MASQUERADE`)}, - // do not nat reply packets in direct routing - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-p tcp --tcp-flags SYN NONE -m conntrack --ctstate NEW -j RETURN`)}, - // do not nat route traffic - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set ! --match-set ovn40subnets src -m set ! --match-set ovn40other-node src -m set --match-set ovn40subnets-nat dst -j RETURN`)}, - // nat outgoing - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn40subnets-nat src -m set ! --match-set ovn40subnets dst -j MASQUERADE`)}, - // mark packets from pod to service - {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn40subnets src -m set --match-set ovn40services dst -j MARK --set-xmark 0x4000/0x4000`)}, - } - v6AbandonedRules = []util.IPTableRule{ - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x40000/0x40000 -j MASQUERADE`)}, - {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn60subnets src -m set --match-set ovn60services dst -j MARK --set-xmark 0x40000/0x40000`)}, - // legacy rules - // nat packets marked by kube-proxy or kube-ovn - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x4000/0x4000 -j MASQUERADE`)}, - // nat service traffic - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn60subnets src -m set --match-set ovn60subnets dst -j MASQUERADE`)}, - // do not nat node port service traffic with external traffic policy set to local - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -m set --match-set ovn60subnets-distributed-gw dst -j RETURN`)}, - // nat node port service traffic with external traffic policy set to local for subnets with centralized gateway - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -j MASQUERADE`)}, - // do not nat reply packets in direct routing - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-p tcp --tcp-flags SYN NONE -m conntrack --ctstate NEW -j RETURN`)}, - // do not nat route traffic - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set ! --match-set ovn60subnets src -m set ! --match-set ovn60other-node src -m set --match-set ovn60subnets-nat dst -j RETURN`)}, - // nat outgoing - {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn60subnets-nat src -m set ! --match-set ovn60subnets dst -j MASQUERADE`)}, - // mark packets from pod to service - {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn60subnets src -m set --match-set ovn60services dst -j MARK --set-xmark 0x4000/0x4000`)}, - } - v4Rules = []util.IPTableRule{ // mark packets from pod to service {Table: NAT, Chain: OvnPrerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn40subnets src -m set --match-set ovn40services dst -j MARK --set-xmark 0x4000/0x4000`)}, @@ -523,26 +481,27 @@ func (c *Controller) setIptables() error { } for _, protocol := range protocols { - if c.iptables[protocol] == nil { + ipt := c.iptables[protocol] + if ipt == nil { continue } var kubeProxyIpsetProtocol, matchset string - var abandonedRules, iptablesRules []util.IPTableRule + var obsoleteRules, iptablesRules []util.IPTableRule if protocol == kubeovnv1.ProtocolIPv4 { - iptablesRules, abandonedRules = v4Rules, v4AbandonedRules + iptablesRules = v4Rules matchset = "ovn40subnets" } else { - iptablesRules, abandonedRules = v6Rules, v6AbandonedRules + iptablesRules = v6Rules kubeProxyIpsetProtocol, matchset = "6-", "ovn60subnets" } if nodeIP := nodeIPs[protocol]; nodeIP != "" { - abandonedRules = append(abandonedRules, - util.IPTableRule{Table: NAT, Chain: Postrouting, Rule: strings.Fields(fmt.Sprintf(`! -s %s -m set --match-set %s dst -j MASQUERADE`, nodeIP, matchset))}, - util.IPTableRule{Table: NAT, Chain: Postrouting, Rule: strings.Fields(fmt.Sprintf(`! -s %s -m mark --mark 0x4000/0x4000 -j MASQUERADE`, nodeIP))}, - util.IPTableRule{Table: NAT, Chain: Postrouting, Rule: strings.Fields(fmt.Sprintf(`! -s %s -m set ! --match-set %s src -m set --match-set %s dst -j MASQUERADE`, nodeIP, matchset, matchset))}, - ) + obsoleteRules = []util.IPTableRule{ + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(fmt.Sprintf(`! -s %s -m set --match-set %s dst -j MASQUERADE`, nodeIP, matchset))}, + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(fmt.Sprintf(`! -s %s -m mark --mark 0x4000/0x4000 -j MASQUERADE`, nodeIP))}, + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(fmt.Sprintf(`! -s %s -m set ! --match-set %s src -m set --match-set %s dst -j MASQUERADE`, nodeIP, matchset, matchset))}, + } for _, p := range [...]string{"tcp", "udp"} { ipset := fmt.Sprintf("KUBE-%sNODE-PORT-LOCAL-%s", kubeProxyIpsetProtocol, strings.ToUpper(p)) @@ -556,7 +515,7 @@ func (c *Controller) setIptables() error { continue } rule := fmt.Sprintf("-p %s -m addrtype --dst-type LOCAL -m set --match-set %s dst -j MARK --set-xmark 0x80000/0x80000", p, ipset) - abandonedRules = append(abandonedRules, util.IPTableRule{Table: NAT, Chain: Prerouting, Rule: strings.Fields(rule)}) + obsoleteRules = append(obsoleteRules, util.IPTableRule{Table: NAT, Chain: Prerouting, Rule: strings.Fields(rule)}) iptablesRules = append(iptablesRules, util.IPTableRule{Table: NAT, Chain: OvnPrerouting, Rule: strings.Fields(rule)}) } } @@ -574,7 +533,7 @@ func (c *Controller) setIptables() error { } } - if err = c.createIptablesRule(protocol, rule); err != nil { + if err = c.createIptablesRule(ipt, rule); err != nil { klog.Errorf(`failed to create iptables rule "%s": %v`, strings.Join(rule.Rule, " "), err) return err } @@ -597,37 +556,165 @@ func (c *Controller) setIptables() error { natPostroutingRules = append(natPostroutingRules[:n-1], rule, natPostroutingRules[n-1]) } - if err = c.updateIptablesChain(protocol, NAT, OvnPrerouting, Prerouting, natPreroutingRules); err != nil { + if err = c.updateIptablesChain(ipt, NAT, OvnPrerouting, Prerouting, natPreroutingRules); err != nil { klog.Errorf("failed to update chain %s/%s: %v", NAT, OvnPrerouting) return err } - if err = c.updateIptablesChain(protocol, NAT, OvnPostrouting, Postrouting, natPostroutingRules); err != nil { + if err = c.updateIptablesChain(ipt, NAT, OvnPostrouting, Postrouting, natPostroutingRules); err != nil { klog.Errorf("failed to update chain %s/%s: %v", NAT, OvnPostrouting) return err } - // delete unused iptables rule when nat gw with designative ip has been changed in centralized subnet - if err = c.deleteLegacySnatRules(protocol, NAT, Postrouting); err != nil { - klog.Errorf("failed to delete legacy iptables rule for SNAT: %v", err) + if err = c.cleanObsoleteIptablesRules(protocol, obsoleteRules); err != nil { + klog.Errorf("failed to clean legacy iptables rules: %v", err) return err } + } + return nil +} - // delete abandoned iptables rules - for _, rule := range abandonedRules { - exists, err := c.iptables[protocol].Exists(rule.Table, rule.Chain, rule.Rule...) - if err != nil { - klog.Errorf("failed to check existence of iptables rule: %v", err) - return err - } - if exists { - klog.Infof("deleting abandoned iptables rule: %s", strings.Join(rule.Rule, " ")) - if err := c.iptables[protocol].Delete(rule.Table, rule.Chain, rule.Rule...); err != nil { - klog.Errorf("failed to delete iptables rule %s: %v", strings.Join(rule.Rule, " "), err) - return err - } - } +func deleteIptablesRule(ipt *iptables.IPTables, rule util.IPTableRule) error { + if err := ipt.DeleteIfExists(rule.Table, rule.Chain, rule.Rule...); err != nil { + klog.Errorf("failed to delete iptables rule %q: %v", strings.Join(rule.Rule, " "), err) + return err + } + return nil +} + +func clearObsoleteIptablesChain(ipt *iptables.IPTables, table, chain, parent string) error { + exists, err := ipt.ChainExists(table, chain) + if err != nil { + klog.Error(err) + return err + } + if !exists { + return nil + } + + rule := fmt.Sprintf(`-m comment --comment "kube-ovn %s rules" -j %s`, strings.ToLower(parent), chain) + if err = deleteIptablesRule(ipt, util.IPTableRule{Table: table, Chain: parent, Rule: util.DoubleQuotedFields(rule)}); err != nil { + klog.Error(err) + return err + } + if err = ipt.ClearAndDeleteChain(table, chain); err != nil { + klog.Errorf("failed to delete iptables chain %q in table %s: %v", chain, table, err) + return err + } + return nil +} + +func (c *Controller) cleanObsoleteIptablesRules(protocol string, rules []util.IPTableRule) error { + if c.iptablesObsolete == nil || c.iptablesObsolete[protocol] == nil { + return nil + } + + var ( + v4ObsoleteRules = []util.IPTableRule{ + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x40000/0x40000 -j MASQUERADE`)}, + {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn40subnets src -m set --match-set ovn40services dst -j MARK --set-xmark 0x40000/0x40000`)}, + // legacy rules + // nat packets marked by kube-proxy or kube-ovn + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x4000/0x4000 -j MASQUERADE`)}, + // nat service traffic + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn40subnets src -m set --match-set ovn40subnets dst -j MASQUERADE`)}, + // do not nat node port service traffic with external traffic policy set to local + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -m set --match-set ovn40subnets-distributed-gw dst -j RETURN`)}, + // nat node port service traffic with external traffic policy set to local for subnets with centralized gateway + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -j MASQUERADE`)}, + // do not nat reply packets in direct routing + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-p tcp --tcp-flags SYN NONE -m conntrack --ctstate NEW -j RETURN`)}, + // do not nat route traffic + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set ! --match-set ovn40subnets src -m set ! --match-set ovn40other-node src -m set --match-set ovn40subnets-nat dst -j RETURN`)}, + // nat outgoing + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn40subnets-nat src -m set ! --match-set ovn40subnets dst -j MASQUERADE`)}, + // mark packets from pod to service + {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn40subnets src -m set --match-set ovn40services dst -j MARK --set-xmark 0x4000/0x4000`)}, + // Input Accept + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn40subnets src -j ACCEPT`)}, + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn40subnets dst -j ACCEPT`)}, + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn40services src -j ACCEPT`)}, + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn40services dst -j ACCEPT`)}, + // Forward Accept + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn40subnets src -j ACCEPT`)}, + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn40subnets dst -j ACCEPT`)}, + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn40services src -j ACCEPT`)}, + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn40services dst -j ACCEPT`)}, + // Output unmark to bypass kernel nat checksum issue https://github.com/flannel-io/flannel/issues/1279 + {Table: "filter", Chain: "OUTPUT", Rule: strings.Fields(`-p udp -m udp --dport 6081 -j MARK --set-xmark 0x0`)}, + } + v6ObsoleteRules = []util.IPTableRule{ + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x40000/0x40000 -j MASQUERADE`)}, + {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn60subnets src -m set --match-set ovn60services dst -j MARK --set-xmark 0x40000/0x40000`)}, + // legacy rules + // nat packets marked by kube-proxy or kube-ovn + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x4000/0x4000 -j MASQUERADE`)}, + // nat service traffic + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn60subnets src -m set --match-set ovn60subnets dst -j MASQUERADE`)}, + // do not nat node port service traffic with external traffic policy set to local + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -m set --match-set ovn60subnets-distributed-gw dst -j RETURN`)}, + // nat node port service traffic with external traffic policy set to local for subnets with centralized gateway + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m mark --mark 0x80000/0x80000 -j MASQUERADE`)}, + // do not nat reply packets in direct routing + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-p tcp --tcp-flags SYN NONE -m conntrack --ctstate NEW -j RETURN`)}, + // do not nat route traffic + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set ! --match-set ovn60subnets src -m set ! --match-set ovn60other-node src -m set --match-set ovn60subnets-nat dst -j RETURN`)}, + // nat outgoing + {Table: NAT, Chain: Postrouting, Rule: strings.Fields(`-m set --match-set ovn60subnets-nat src -m set ! --match-set ovn60subnets dst -j MASQUERADE`)}, + // mark packets from pod to service + {Table: "mangle", Chain: Prerouting, Rule: strings.Fields(`-i ovn0 -m set --match-set ovn60subnets src -m set --match-set ovn60services dst -j MARK --set-xmark 0x4000/0x4000`)}, + // Input Accept + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn60subnets src -j ACCEPT`)}, + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn60subnets dst -j ACCEPT`)}, + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn60services src -j ACCEPT`)}, + {Table: "filter", Chain: "INPUT", Rule: strings.Fields(`-m set --match-set ovn60services dst -j ACCEPT`)}, + // Forward Accept + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn60subnets src -j ACCEPT`)}, + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn60subnets dst -j ACCEPT`)}, + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn60services src -j ACCEPT`)}, + {Table: "filter", Chain: "FORWARD", Rule: strings.Fields(`-m set --match-set ovn60services dst -j ACCEPT`)}, + // Output unmark to bypass kernel nat checksum issue https://github.com/flannel-io/flannel/issues/1279 + {Table: "filter", Chain: "OUTPUT", Rule: strings.Fields(`-p udp -m udp --dport 6081 -j MARK --set-xmark 0x0`)}, + } + ) + + var obsoleteRules []util.IPTableRule + if protocol == kubeovnv1.ProtocolIPv4 { + obsoleteRules = v4ObsoleteRules + } else { + obsoleteRules = v6ObsoleteRules + } + + ipt := c.iptablesObsolete[protocol] + for _, rule := range obsoleteRules { + if err := deleteIptablesRule(ipt, rule); err != nil { + klog.Error(err) + return err + } + } + for _, rule := range rules { + if err := deleteIptablesRule(ipt, rule); err != nil { + klog.Error(err) + return err } } + + // delete unused iptables rule when nat gw with designative ip has been changed in centralized subnet + if err := c.deleteObsoleteSnatRules(ipt, NAT, Postrouting); err != nil { + klog.Errorf("failed to delete legacy iptables rule for SNAT: %v", err) + return err + } + + if err := clearObsoleteIptablesChain(ipt, NAT, OvnPrerouting, Prerouting); err != nil { + return err + } + if err := clearObsoleteIptablesChain(ipt, NAT, OvnPostrouting, Postrouting); err != nil { + return err + } + + delete(c.iptablesObsolete, protocol) + if len(c.iptablesObsolete) == 0 { + c.iptablesObsolete = nil + } return nil } @@ -891,8 +978,8 @@ func (c *Controller) updateMssRuleByProtocol(protocol string, MssMangleRule util } } -func (c *Controller) deleteLegacySnatRules(protocol, table, chain string) error { - rules, err := c.iptables[protocol].List(table, chain) +func (c *Controller) deleteObsoleteSnatRules(ipt *iptables.IPTables, table, chain string) error { + rules, err := ipt.List(table, chain) if err != nil { klog.Errorf("failed to list iptables rules in table %v chain %v, %+v", table, chain, err) return err @@ -906,7 +993,7 @@ func (c *Controller) deleteLegacySnatRules(protocol, table, chain string) error // "-A POSTROUTING -s 100.168.10.0/24 -m set ! --match-set ovn40subnets dst -j SNAT --to-source 172.17.0.3" rule := rule[4+len(chain):] spec := util.DoubleQuotedFields(rule) - if err = c.iptables[protocol].Delete(table, chain, spec...); err != nil { + if err = ipt.Delete(table, chain, spec...); err != nil { klog.Errorf(`failed to delete iptables rule "%s": %v`, rule, err) return err }