From 4fbdf0423d393210b3801588d9934710ce06f1b8 Mon Sep 17 00:00:00 2001 From: liornoy Date: Sun, 27 Feb 2022 15:16:25 +0200 Subject: [PATCH] Add SR-IOV gather script this script is gathering the following: * openshift-sriov-network-operator namespace. * the following resources: - sriovnetworknodepolicies. - sriovnetworknodestates. - sriovnetworkpoolconfigs. - sriovnetworks. - sriovoperatorconfigs. - sriovibnetworks. * the following logs from the sriov-config-daemon: - /etc/sno-initial-node-state.json. (*) - /proc/cmdline. - dmseg. - ip link. - var/multus.log (if exists). - netns and output of `ip a` fron every ns. - NIC firmware and driver from ethtool. (*) Need to update the path to /tmp/sno-initial-node-state.json after: https://github.com/k8snetworkplumbingwg/sriov-network-operator/pull/262 --- collection-scripts/gather | 3 + collection-scripts/gather_sriov | 111 ++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100755 collection-scripts/gather_sriov diff --git a/collection-scripts/gather b/collection-scripts/gather index 17ce0596b..4300fbada 100755 --- a/collection-scripts/gather +++ b/collection-scripts/gather @@ -85,5 +85,8 @@ oc adm inspect --dest-dir must-gather --rotated-pod-logs "${group_resources_text # Gather NMState /usr/bin/gather_nmstate +# Gather SR-IOV resources +/usr/bin/gather_sriov + # force disk flush to ensure that all data gathered is accessible in the copy container sync diff --git a/collection-scripts/gather_sriov b/collection-scripts/gather_sriov new file mode 100755 index 000000000..2feab3c67 --- /dev/null +++ b/collection-scripts/gather_sriov @@ -0,0 +1,111 @@ +#!/bin/bash + +BASE_COLLECTION_PATH="must-gather" +SRIOV_NS="$(oc get subs -A -o template --template '{{range .items}}{{if eq .spec.name "sriov-network-operator"}}{{.metadata.namespace}}{{end}}{{end}}')" +SRIOV_LOG_PATH="${BASE_COLLECTION_PATH}/namespaces/openshift-sriov-network-operator" + + +if [ -z "${SRIOV_NS}" ]; then + echo "INFO: SR-IOV not detected. Skipping." + exit 0 +fi + +# resource list +resources=() + +# sriov network operator namespace +resources+=(ns/openshift-sriov-network-operator) + +# sriovnetwork.openshift.io +resources+=(sriovnetworknodepolicies sriovnetworknodestates sriovnetworkpoolconfigs sriovnetworks sriovoperatorconfigs sriovibnetworks) + +# run the collection of resources using must-gather +for resource in ${resources[@]}; do + oc adm inspect --dest-dir must-gather --all-namespaces ${resource} +done + +CONFIG_DAEMON_PODS="${@:-$(oc -n openshift-sriov-network-operator get pods -l app=sriov-network-config-daemon -o jsonpath='{.items[*].metadata.name}')}" +PIDS=() + +# gather_netns_ip_a runs netns, and for every net namespace runs `ip netns exec ip a` +function gather_netns_ip_a(){ + CONFIG_DAEMON_POD_LOG_PATH="${SRIOV_LOG_PATH}/pods/${1}" + NETNS_LOG_PATH="${CONFIG_DAEMON_POD_LOG_PATH}/netns" + NETNS_IP_A_LOG_PATH="${CONFIG_DAEMON_POD_LOG_PATH}/netns_ip_a" + + oc exec -n openshift-sriov-network-operator ${1} -c sriov-network-config-daemon -- chroot /host /bin/bash -c "ip netns" > "${NETNS_LOG_PATH}" 2>&1 + + while IFS= read -r id; do + OUT="$(oc exec -n openshift-sriov-network-operator "${1}" -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "ip netns exec ${id} ip a" 2>/dev/null)" + if [ -z "${OUT}" ]; then + continue + fi + echo "> ip netns exec ${id} ip a " >> "${NETNS_IP_A_LOG_PATH}" && + echo "${OUT}" >> "${NETNS_IP_A_LOG_PATH}" + done < "${NETNS_LOG_PATH}" +} + +function gather_ethtool(){ + CONFIG_DAEMON_POD_LOG_PATH="${SRIOV_LOG_PATH}/pods/${1}" + ETHTOOL_LOG_PATH="${CONFIG_DAEMON_POD_LOG_PATH}/ethtool" + + # Get ip -o link show output. + OUT="$(oc exec -n openshift-sriov-network-operator ${1} -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "ip -o link show 2>/dev/null")" + + # Cut long interfaces names. + INTERFACES="$(echo "$OUT" | awk -F': ' '{print $2}' | cut -d '@' -f 1)" + + # Run ethtool for each interface except for lo. + while IFS= read -r interface; do + if [ -z "$interface" ] || [ "$interface" = "lo" ]; then + continue + fi + echo "> ethtool -i ${interface}" >> "${ETHTOOL_LOG_PATH}" + oc exec -n openshift-sriov-network-operator ${1} -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "ethtool -i ${interface}" >> "${ETHTOOL_LOG_PATH}" + done <<< "$INTERFACES" +} + +for CONFIG_DAEMON_POD in ${CONFIG_DAEMON_PODS[@]}; do + + CONFIG_DAEMON_POD_LOG_PATH="${SRIOV_LOG_PATH}/pods/${CONFIG_DAEMON_POD}" + SNO_INITIAL_NODE_STATE_LOG_PATH="${CONFIG_DAEMON_POD_LOG_PATH}/sno-initial-node-state.json" + KERNEL_CMDLINE_LOG_PATH="${CONFIG_DAEMON_POD_LOG_PATH}/kernel-cmdline" + IP_LINK_LOG_PATH="${CONFIG_DAEMON_POD_LOG_PATH}/ip_link" + DMSEG_LOG_PAT="${CONFIG_DAEMON_POD_LOG_PATH}/dmseg" + MULTUS_LOG_PATH="${CONFIG_DAEMON_POD_LOG_PATH}/multus-log" + + # Collect sno-initial-node-state.json. + oc exec -n openshift-sriov-network-operator "${CONFIG_DAEMON_POD}" -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "cat /etc/sno-initial-node-state.json" > ${SNO_INITIAL_NODE_STATE_LOG_PATH} & PIDS+=($!) + + # Collect kernel cmdline. + oc exec -n openshift-sriov-network-operator "${CONFIG_DAEMON_POD}" -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "cat /proc/cmdline" > "${KERNEL_CMDLINE_LOG_PATH}" & PIDS+=($!) + + # Collect ip link. + oc exec -n openshift-sriov-network-operator "${CONFIG_DAEMON_POD}" -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "ip link" > "${IP_LINK_LOG_PATH}" 2>&1 & PIDS+=($!) + + # Collect dmesg. + oc exec -n openshift-sriov-network-operator "${CONFIG_DAEMON_POD}" -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "journalctl -k" > "${DMSEG_LOG_PAT}" & PIDS+=($!) + + # Collect var/log/multus.log if exists. + out=$(oc exec -n openshift-sriov-network-operator "${CONFIG_DAEMON_POD}" -c sriov-network-config-daemon -- chroot /host \ + /bin/bash -c "cat var/log/multus.log" 2>/dev/null) && echo "$out" 1> "${MULTUS_LOG_PATH}" & PIDS+=($!) + + gather_netns_ip_a "${CONFIG_DAEMON_POD}" & PIDS+=($!) + + gather_ethtool "${CONFIG_DAEMON_POD}" & PIDS+=($!) + +done + +echo "INFO: Waiting for sriov info collection to complete ..." +wait "${PIDS[@]}" +echo "INFO: sriov info collection complete." + +# force disk flush to ensure that all data gathered are written +sync