From 6aada80d67de7780779e31108ae988cf7e2c017a Mon Sep 17 00:00:00 2001 From: abdallahyas Date: Tue, 13 Jul 2021 08:56:52 +0000 Subject: [PATCH] Support Jenkins CI This patch adds a Jenkins file example, it also modifies the E2E kind script to support a new mode of operation: the bash service. The modification was done to support running the scripts in a limited privilege shell. --- ci/README.md | 23 ++ ci/admin-list.yaml | 5 + ci/examples/jenkins/README.md | 5 + .../jenkins/sriov-network-operator-ci.yaml | 92 +++++ doc/testing-kind.md | 35 ++ hack/run-e2e-test-kind.sh | 57 ++- hack/teardown-e2e-kind-cluster.sh | 2 + hack/vf-netns-switcher.sh | 369 ++++++++++++++++++ hack/vf-switcher.service | 12 + 9 files changed, 590 insertions(+), 10 deletions(-) create mode 100644 ci/README.md create mode 100644 ci/admin-list.yaml create mode 100644 ci/examples/jenkins/README.md create mode 100644 ci/examples/jenkins/sriov-network-operator-ci.yaml create mode 100755 hack/vf-netns-switcher.sh create mode 100644 hack/vf-switcher.service diff --git a/ci/README.md b/ci/README.md new file mode 100644 index 0000000000..9f8fc096bd --- /dev/null +++ b/ci/README.md @@ -0,0 +1,23 @@ +## CI configurations and examples +This folder holds vendors CIes configurations and examples. Configurations are used to control the vendors CIes behaviors, and examples are used as a reference for other vendors to be able to setup a CI of their own. + +### Admin list +The admin list contains the list of github users and organizations that have permission to trigger the vendors CIes. Only trusted users who have merge permissions should be on the list. The vendors should be responsible for how the admin list on their CIes is updated, but to keep everything organized the vendors CIes should at least update their admin list in response to a PR comment with the following phrase `/update-admins`. + +### CI Examples +The examples folder contains configuration examples for vendor CIes. It can be used as a reference for vendors to setup their CIes. For more information on an example refer to that folder README. + +### CIes trigger convention +A vendor CI trigger phrases should follow the following convention: + +``` +/test--- +/skip--- +``` + + * `test-type`: The type of test to conduct on the vendor's setups, for example: E2E. + * `vendor`: The vendor that implemented the CI. + * `sub-test`: In case there are many tests for the `test-type`, this field specify what sub-test to run. + +In addition to the convention, all vendors CIes should be triggered on the general phrase `/test-all` + diff --git a/ci/admin-list.yaml b/ci/admin-list.yaml new file mode 100644 index 0000000000..0d5503abbb --- /dev/null +++ b/ci/admin-list.yaml @@ -0,0 +1,5 @@ +admin-list: + - mellanox-ci +org-list: + - Mellanox + diff --git a/ci/examples/jenkins/README.md b/ci/examples/jenkins/README.md new file mode 100644 index 0000000000..c95b812e9d --- /dev/null +++ b/ci/examples/jenkins/README.md @@ -0,0 +1,5 @@ +## Jenkins CI examples +This folder holds examples for jenkins CI. + +### sriov-network-operator-ci.yaml +This file holds an example jenkins-job-builder configuration that would be triggered on PRs by the admin list, and would simply run the `hack/run-e2e-test-kind.sh` script with `system-service` netns device switcher. diff --git a/ci/examples/jenkins/sriov-network-operator-ci.yaml b/ci/examples/jenkins/sriov-network-operator-ci.yaml new file mode 100644 index 0000000000..21d3d307b8 --- /dev/null +++ b/ci/examples/jenkins/sriov-network-operator-ci.yaml @@ -0,0 +1,92 @@ +- project: + name: sriov-network-operator-github-ci + jobs: + - 'sriov-network-operator-ci': + project: sriov-network-operator + disabled_var: false + concurrent: false + node: + git-site: https://github.com + git-root: k8snetworkplumbingwg + git-project: sriov-network-operator + +- job-template: + name: 'sriov-network-operator-ci' + node: '{node}' + builders: + - inject: + properties-content: | + KUBECONFIG=/etc/kubernetes/admin.conf + INTERFACES_SWITCHER=system-service + - run-e2e-test + concurrent: false + description: + disabled: false + project-type: freestyle + properties: + - build-discarder: + artifact-days-to-keep: 60 + artifact-num-to-keep: 100 + days-to-keep: 60 + num-to-keep: 100 + - github: + url: '{git-site}/{git-root}/{git-project}' + scm: + - git: + branches: ["${{sha1}}"] + credentials-id: '{credentials-id}' + name: '{git-project}' + refspec: +refs/pull/*:refs/remotes/origin/pr/* + url: '{git-site}/{git-root}/{git-project}' + wipe-workspace: true + triggers: + - github-pull-request: + admin-list: + - mellanox-ci + allow-whitelist-orgs-as-admins: true + org-list: + - Mellanox + auth-id: '{auth-id}' + auto-close-on-fail: false + build-desc-template: null + cron: H/5 * * * * + github-hooks: false + only-trigger-phrase: true + cancel-builds-on-update: true + permit-all: false + status-url: --none-- + success-status: "Build Passed" + failure-status: "Build Failed, comment `/test-e2e`, /test-e2e-nvidia-all, or `/test-all` to retrigger" + error-status: "Build Failed, comment `/test-e2e`, /test-e2e-nvidia-all, or `/test-all` to retrigger" + status-context: '{project} CI' + trigger-phrase: ".*/test-(all|e2e|e2e-nvidia-all(,| |$)).*" + white-list: + - '*' + white-list-target-branches: + - master + - github + wrappers: + - timeout: + timeout: 120 + fail: true + - timestamps + +- builder: + name: run-e2e-test + builders: + - shell: | + #!/bin/bash + status=0 + ./hack/teardown-e2e-kind-cluster.sh + sleep 5 + + # This line is vendor specific, it should be changed according to hardware. + mlnx_pci=$(lspci | grep Mellanox | grep -Ev 'MT27500|MT27520|Virt' | head -n 1 | awk '{print $1}') + ./hack/run-e2e-test-kind.sh 0000:${mlnx_pci} + let status=$status+$? + + ./hack/teardown-e2e-kind-cluster.sh + sleep 5 + + exit $status + diff --git a/doc/testing-kind.md b/doc/testing-kind.md index e5571132c3..a7e389f56f 100644 --- a/doc/testing-kind.md +++ b/doc/testing-kind.md @@ -1,5 +1,21 @@ ## E2E test with KinD +Kubernetes IN Docker (KIND) is a tool to deploy Kubernetes inside Docker containers. It is used to test multi nodes scenarios on a single baremetal node. +To run the E2E tests inside a KIND cluster, `./hack/run-e2e-test-kind.sh` can be used. The script performs the following operations: + + * Deploys a 2 node KIND cluster (master and worker) + * Moves the specified SR-IOV capable PCI net device to KIND worker namespace + * Deploys the operator + * Runs E2E tests + +There are two modes of moving the specified SR-IOV capable PCI net device to the KIND worker namespace: + + * `test-suite` (default): In this mode, the E2E test suite handle the PF and its VFs switching to the test namespace. + * `system-service` mode: In this mode a dedicated system service is used to switch the PF and VFs to the test namespace. + +The mode can be selected using the `INTERFACES_SWITCHER` environment variable, or by passing the mode to the `./hack/run-e2e-test-kind.sh` script using the `--device-netns-switcher` flag. + ### How to test +#### Device netns switcher mode `test-suite` To execute E2E tests, a SR-IOV Physical Function device is required and will be added to a KinD workers network namespace. ``` $ git clone https://github.com/k8snetworkplumbingwg/sriov-network-operator.git @@ -10,6 +26,25 @@ $ sudo ./hack/run-e2e-test-kind.sh $TEST_PCI_DEVICE ``` Note: Test device will remain in KinD worker node until cluster is terminated. +#### Device netns switcher mode `system-service` +The `system-service` mode uses a linux service to handle the interface switching. To prepare the service, the following needs to be done as root: +``` +cp ./hack/vf-netns-switcher.sh /usr/bin/ +cp ./hack/vf-switcher.service /etc/systemd/system/ +systemctl daemon-reload +``` +For the service to work properly the `jq` tool is needed. + +To run the E2E tests do: +``` +$ git clone https://github.com/k8snetworkplumbingwg/sriov-network-operator.git +$ cd sriov-network-operator/ +$ source hack/get-e2e-kind-tools.sh +$ KUBECONFIG=/etc/kubernetes/admin.conf +$ INTERFACES_SWITCHER=system-service +$ ./hack/run-e2e-test-kind.sh +``` + ### How to repeat test using existing KinD cluster Export test PCI device used to set up KinD cluster and export KinD worker network namespace path: ``` diff --git a/hack/run-e2e-test-kind.sh b/hack/run-e2e-test-kind.sh index 1001f76ca1..12e5977421 100755 --- a/hack/run-e2e-test-kind.sh +++ b/hack/run-e2e-test-kind.sh @@ -4,11 +4,34 @@ here="$(dirname "$(readlink --canonicalize "${BASH_SOURCE[0]}")")" root="$(readlink --canonicalize "$here/..")" export SRIOV_NETWORK_OPERATOR_IMAGE="${SRIOV_NETWORK_OPERATOR_IMAGE:-sriov-network-operator:latest}" export SRIOV_NETWORK_CONFIG_DAEMON_IMAGE="${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE:-origin-sriov-network-config-daemon:latest}" +export KUBECONFIG="${KUBECONFIG:-${HOME}/.kube/config}" +INTERFACES_SWITCHER="${INTERFACES_SWITCHER:-"test-suit"}" +SUPPORTED_INTERFACE_SWTICHER_MODES=("test-suit", "system-service") RETRY_MAX=10 INTERVAL=10 TIMEOUT=300 -MULTUS_CNI_DS="https://raw.githubusercontent.com/intel/multus-cni/master/images/multus-daemonset.yml" -test_pf_pci_addr="$1" +MULTUS_CNI_DS="https://raw.githubusercontent.com/intel/multus-cni/master/deployments/multus-daemonset.yml" + +while test $# -gt 0; do + case "$1" in + --device-netns-switcher) + INTERFACES_SWITCHER="$2" + if [[ ! "${SUPPORTED_INTERFACE_SWTICHER_MODES[@]}" =~ "${INTERFACES_SWITCHER}" ]]; then + echo "Error: unsupported interface switching mode: ${INTERFACES_SWITCHER}!" + echo "Supported modes are: ${SUPPORTED_INTERFACE_SWTICHER_MODES[@]}" + exit 1 + fi + shift + shift + ;; + *) + if [[ -z "$test_pf_pci_addr" ]];then + test_pf_pci_addr=$1 + fi + shift + ;; + esac +done check_requirements() { for cmd in docker kind kubectl ip; do @@ -50,13 +73,14 @@ retry() { echo "## checking requirements" check_requirements echo "## delete any existing cluster, deploy control & data plane cluster with KinD" -retry kind delete cluster && cat < /etc/vf-switcher/vf-switcher.yaml +[ + { + "netns": "${kind_container}", + "pfs": [ + "${pf}" + ] + } +] +EOF + sudo systemctl restart vf-switcher.service +else + echo "## retrieving netns path from container" + netns_path="$(sudo docker inspect --format '{{ .NetworkSettings.SandboxKey }}' "${kind_container}")" + echo "## exporting test device '${test_pf_pci_addr}' and test netns path '${netns_path}'" + export TEST_PCI_DEVICE="${test_pf_pci_addr}" + export TEST_NETNS_PATH="${netns_path}" +fi echo "## disabling webhooks" export ENABLE_ADMISSION_CONTROLLER=false echo "## deploying SRIOV Network Operator" diff --git a/hack/teardown-e2e-kind-cluster.sh b/hack/teardown-e2e-kind-cluster.sh index 102e601550..feb12ff37c 100755 --- a/hack/teardown-e2e-kind-cluster.sh +++ b/hack/teardown-e2e-kind-cluster.sh @@ -7,3 +7,5 @@ if ! command -v kind &> /dev/null; then fi kind delete cluster +sudo systemctl stop vf-switcher.service + diff --git a/hack/vf-netns-switcher.sh b/hack/vf-netns-switcher.sh new file mode 100755 index 0000000000..9c6285ad15 --- /dev/null +++ b/hack/vf-netns-switcher.sh @@ -0,0 +1,369 @@ +#!/bin/bash + +conf_file="" + +declare -a netnses + +declare -A pfs +declare -A pcis +declare -A pf_port_names +declare -A pf_switch_ids + +TIMEOUT="${TIMEOUT:-2}" +POLL_INTERVAL="${POLL_INTERVAL:-1}" + +while test $# -gt 0; do + case "$1" in + + --netns | -n) + input=$2 + local_netns=$(cut -s -d ':' -f 1 <<< $input) + local_pfs=$(cut -s -d ':' -f 2 <<< $input) + input="" + + if [[ -z "$local_netns" ]];then + echo "Error: flag --netns specified but netns is empty, please \ +provide it in the form --netns :, !" + echo "Exiting!" + exit 1 + fi + + if [[ -z "$local_pfs" ]];then + echo "Error: flag --netns specified but pfs is empty, please \ +provide it in the form --netns :, !" + echo "Exiting!" + exit 1 + fi + + netnses+=("$local_netns") + + pfs["$local_netns"]="$(tr , " " <<< $local_pfs)" + + local_netns="" + local_pfs="" + + shift + shift + ;; + + --conf-file | -c) + conf_file=$2 + if [[ ! -f "$conf_file" ]];then + echo "Error: flag --conf-file specified but file $conf_file \ +not found!" + exit 1 + fi + + shift + shift + ;; + + --help | -h) + echo " +vf-netns-switcher.sh --netns :, [--conf-file <>]: + + --netns | -n The netns and its interfaces to switch the interfaces PFs and VFs to. \ +It must be of the form :,. This flag can be repeated to specify more netnses. + + --conf-file | -c A file to read confs from, this will override cli flags. Conf file should be of the form: + [ + { + "netns": , + "pfs": [ + "pf1", + "pf2" + ] + }, + { + "netns": , + "pfs": [ + "pf3", + "pf4" + ] + } + ] + +" + exit 0 + ;; + + *) + echo "Error: invalid option: $1!" + echo "Exiting..." + exit 1 + esac +done + +get_pcis_from_pfs(){ + local worker_netns="$1" + shift + local interfaces="$@" + for interface in $interfaces; do + pcis["$interface"]="$(get_pci_from_net_name "$interface" "$worker_netns")" + done +} + +get_pci_from_net_name(){ + local interface_name=$1 + local worker_netns="$2" + + if [[ -z "$(ip l show $interface_name)" ]];then + if [[ -n "$(docker exec -t ${worker_netns} ip l show $interface_name)" ]];then + ip netns exec ${worker_netns} bash -c "basename \$(readlink /sys/class/net/${interface_name}/device)" + return 0 + fi + echo "" + return 1 + fi + basename $(readlink /sys/class/net/${interface_name}/device) +} + +netns_create(){ + local worker_netns="$1" + + if [[ ! -e /var/run/netns/$worker_netns ]];then + local pid="$(docker inspect -f '{{.State.Pid}}' $worker_netns)" + + if [[ -z "$pid" ]];then + return 1 + fi + + mkdir -p /var/run/netns/ + rm -rf /var/run/netns/$worker_netns + ln -sf /proc/$pid/ns/net "/var/run/netns/$worker_netns" + + if [[ -z "$(ip netns | grep $worker_netns)" ]];then + return 1 + fi + fi + return 0 +} + +switch_pfs(){ + local worker_netns="$1" + shift + local interfaces="$@" + + echo "Switching \"$interfaces\" into $worker_netns ..." + + for pf in $interfaces;do + switch_pf "$pf" "$worker_netns" + done +} + +switch_pf(){ + local pf_name="$1" + local worker_netns="$2" + + if [[ -z "$(ip netns | grep ${worker_netns})" ]];then + echo "Error: Namespace $worker_netns not found!" + return 1 + fi + + if [[ -z "$(ip l show ${pf_name})" ]];then + if [[ -z "$(docker exec -t ${worker_netns} ip l show ${pf_name})" ]];then + echo "Error: Interface $pf_name not found..." + return 1 + fi + + echo "PF ${pf_name} already in namespace $worker_netns!" + else + if ! ip l set dev $pf_name netns $worker_netns;then + echo "Error: unable to set $pf_name namespace to $worker_netns!" + return 1 + fi + fi + + if ! docker exec -t ${worker_netns} ip l set $pf_name up;then + echo "Error: unable to set $pf_name to up!" + return 1 + fi + +} + +switch_vf(){ + local vf_name="$1" + local worker_netns="$2" + + if [[ -z "$(ip l show $vf_name)" ]];then + return 1 + fi + + if ip link set "$vf_name" netns "$worker_netns"; then + if timeout "$TIMEOUT"s bash -c "until ip netns exec $worker_netns ip link show $vf_name > /dev/null; do sleep $POLL_INTERVAL; done"; then + return 0 + else + return 1 + fi + fi +} + +switch_netns_vfs(){ + local worker_netns="$1" + + for pf in ${pfs["$worker_netns"]};do + echo "Switching interface $pf vfs into $worker_netns...." + switch_interface_vfs "$pf" "$worker_netns" "${pcis[$pf]}" + done +} + +get_pf_switch_dev_info(){ + local worker_netns="$1" + shift + local interfaces="$@" + for interface in $interfaces; do + interface_pci_address="${pcis[$interface]}" + if grep -q 'siwtchdev' <(devlink dev eswitch show pci/$interface_pci_address ); then + continue + fi + pf_port_names["$interface"]="$(cat /sys/class/net/${interface}/phys_port_name)" + pf_switch_ids["$interface"]="$(cat /sys/class/net/${interface}/phys_switch_id)" + done +} + +switch_netns_vf_representors(){ + local worker_netns="$1" + for pf in ${pfs["$worker_netns"]};do + echo "Switching pf $pf vf representors into $worker_netns ..." + switch_interface_vf_representors "$pf" "$worker_netns" + done +} + +switch_interface_vf_representors(){ + local pf_name="$1" + local worker_netns=$2 + + if [[ -z "${pf_switch_ids[$pf_name]}" ]] || [[ -z ${pf_port_names[$pf_name]:1} ]];then + echo "$pf_name does not have pf_switch_id or pf_port_name, assuming not switchdev..." + return 0 + fi + + for interface in $(ls /sys/class/net);do + phys_switch_id=$(cat /sys/class/net/$interface/phys_switch_id) + if [[ "$phys_switch_id" != "${pf_switch_ids[$pf_name]}" ]]; then + continue + fi + phys_port_name=$(cat /sys/class/net/$interface/phys_port_name) + phys_port_name_pf_index=${phys_port_name%vf*} + phys_port_name_pf_index=${phys_port_name_pf_index#pf} + if [[ "$phys_port_name_pf_index" != "${pf_port_names[$pf_name]:1}" ]]; then + continue + fi + echo "Switching VF representor $interface of PF $pf_name to netns $worker_netns" + switch_vf $interface $worker_netns + done +} + +switch_interface_vfs(){ + local pf_name="$1" + local worker_netns="$2" + local pci="$3" + + vfs_list=$(ls /sys/bus/pci/devices/$pci | grep virtfn) + + if [[ -z "${vfs_list}" ]];then + echo "Warning: No VFs found for interface $pf_name!!" + return 0 + fi + + for vf in $vfs_list;do + local vf_interface="$(ls /sys/bus/pci/devices/$pci/$vf/net)" + + if [[ -n "$vf_interface" ]];then + echo "Switching $vf_interface to namespace $worker_netns..." + sleep 2 + if ! switch_vf "$vf_interface" "$worker_netns";then + echo "Error: could not switch $vf_interface to namespace $worker_netns!" + else + echo "Successfully switched $vf_interface to namespace $worker_netns" + fi + fi + done +} + +read_confs(){ + local conf_file="$1" + + let number_of_netns=$(jq length "${conf_file}")-1 + + for index in $(seq 0 $number_of_netns);do + netnses+=("$(jq -r .[${index}].netns $conf_file)") + let number_of_pfs=$(jq .[$index].pfs $conf_file | jq length)-1 + for pf_index in $(seq 0 $number_of_pfs);do + pfs[${netnses[-1]}]+="$(jq -r .[$index].pfs[$pf_index] $conf_file) " + done + done +} + +variables_check(){ + local status=0 + + check_empty_var "netnses" + let status=$status+$? + check_empty_var "pfs" + let status=$status+$? + + return $status +} + +check_empty_var(){ + local var_name="$1" + + if [[ -z "${!var_name[@]}" ]];then + echo "Error: $var_name is empty..." + return 1 + fi + + return 0 +} + +main(){ + while true;do + for netns in ${netnses[@]};do + switch_pfs "$netns" "${pfs[$netns]}" + sleep 2 + switch_netns_vfs "$netns" + sleep 2 + switch_netns_vf_representors "$netns" + done + sleep $TIMEOUT + done +} + +if [[ -n "$conf_file" ]];then + unset netnses + unset pfs + + declare -a netnses + declare -A pfs + + read_confs "$conf_file" +fi + +variables_check +let status=$? +if [[ "$status" != "0" ]];then + echo "Error: empty var..." + exit $status +fi + +for netns in ${netnses[@]};do + netns_create "$netns" + let status=$status+$? + if [[ "$status" != "0" ]];then + echo "Error: failed to create netns..." + exit $status + fi +done + +for netns in ${netnses[@]};do + get_pcis_from_pfs "$netns" "${pfs[$netns]}" + get_pf_switch_dev_info "$netns" "${pfs[$netns]}" +done + +if [[ "${#pcis[@]}" == "0" ]];then + echo "Error: could not get pci address of interface $pf!!" + exit 1 +fi + +main diff --git a/hack/vf-switcher.service b/hack/vf-switcher.service new file mode 100644 index 0000000000..4872d26172 --- /dev/null +++ b/hack/vf-switcher.service @@ -0,0 +1,12 @@ +[Unit] +Description=Switch all VFs of the configured driver to the specified namespace +After=network.target + +[Service] +Type=simple +ExecStart=/usr/bin/vf-netns-switcher.sh -c /etc/vf-switcher/vf-switcher.yaml +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=network-online.target