From 38427e19fdf5b2b8abbf08cc05dc51f637df801c Mon Sep 17 00:00:00 2001 From: hzma Date: Wed, 20 Apr 2022 18:07:42 +0800 Subject: [PATCH] add env-check --- dist/images/Dockerfile.base | 2 +- dist/images/env-check.sh | 82 +++++++++++++++++++++++++++++++++++++ dist/images/install.sh | 22 +++++++++- 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100755 dist/images/env-check.sh diff --git a/dist/images/Dockerfile.base b/dist/images/Dockerfile.base index bab9aefe3c7..cf34ad13dec 100644 --- a/dist/images/Dockerfile.base +++ b/dist/images/Dockerfile.base @@ -49,7 +49,7 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt update && apt upgrade -y && apt install ca-certificates python3 hostname libunwind8 netbase \ ethtool iproute2 ncat libunbound-dev procps libatomic1 kmod iptables \ tcpdump ipset curl uuid-runtime openssl inetutils-ping arping ndisc6 \ - logrotate libjemalloc2 dnsutils -y --no-install-recommends && \ + logrotate libjemalloc2 dnsutils net-tools nmap -y --no-install-recommends && \ rm -rf /var/lib/apt/lists/* && \ cd /usr/sbin && \ ln -sf /usr/sbin/iptables-legacy iptables && \ diff --git a/dist/images/env-check.sh b/dist/images/env-check.sh new file mode 100755 index 00000000000..a2b00892a89 --- /dev/null +++ b/dist/images/env-check.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +set +e +checkresult=/var/log/kube-ovn/env-check.log +echo "Start environment check" > $checkresult +echo "1) check cni configuration" >> $checkresult +if [ ! -e "/etc/cni/net.d" ]; then + echo "Directory /etc/cni/net.d does not exist, please check kube-ovn-cni pod status" >> $checkresult +fi +for file in $(ls "/etc/cni/net.d") +do + if [[ ! $file =~ "kube-ovn.conflist" ]]; then + echo "Check files in /etc/cni/net.d, make sure if the config file $file should be deleted" >> $checkresult + fi +done + +echo "2) check system ipv4 config" >> $checkresult +probe_mtu=`cat /proc/sys/net/ipv4/tcp_mtu_probing` +if [ $probe_mtu == 0 ]; then + echo "The 'tcp_mtu_probing' config may affect traffic, make sure if /proc/sys/net/ipv4/tcp_mtu_probing should be set to 1" >> $checkresult +fi +recycle=`cat /proc/sys/net/ipv4/tcp_tw_recycle` +if [ $recycle == 1 ]; then + echo "The 'tcp_tw_recycle' config affects nodeport service, make sure change /proc/sys/net/ipv4/tcp_tw_recycle to 0" >> $checkresult +fi + +echo "3) check checksum value" >> $checkresult +netstat -s > /dev/null +if [[ $? != 0 ]]; then + echo "The netstat cmd not found, maybe can be installed mannully and exec 'netstat -s' to check if there is 'InCsumErrors'" >> $checkresult + echo "If there's 'InCsumErrors' and the value is increasing, should exec cmd 'ethtool -K ETH tx off' to disable checksum, where 'ETH' is the nic used for traffics" >> $checkresult +else + result=`netstat -s` + if [[ $result =~ "InCsumErrors" ]]; then + echo "Found 'InCsumErrors' para after exec 'netstat -s' cmd, check if the value is increasing, maybe should exec cmd 'ethtool -K ETH tx off' to disable checksum, where 'ETH' is the nic used for traffics" >> $checkresult + fi +fi + +echo "4) check dns config" >> $checkresult +result=`cat /etc/resolv.conf` +if [[ $result =~ ".com" ]]; then + echo "There's *.com in dns search name, make sure the config /etc/resolv.conf is right" >> $checkresult +fi + +echo "5) check firewall config" >> $checkresult +result=`systemctl status firewalld` +if [[ $result =~ "running" ]]; then + echo "The firewalld is running, make sure it has no effect on traffics across nodes" >> $checkresult +fi + +result=`ps -ef | grep security | wc -l` +if [[ $result > 1 ]]; then + echo "Found pid with '*security*' name, make sure it has no effect on traffics" >> $checkresult +fi +result=`ps -ef | grep qax | wc -l` +if [[ $result > 1 ]]; then + echo "Found pid with '*qax*' name, make sure it has no effect on traffics" >> $checkresult +fi +result=`ps -ef | grep safe | wc -l` +if [[ $result > 1 ]]; then + echo "Found pid with '*safe*' name, make sure it has no effect on traffics" >> $checkresult +fi +result=`ps -ef | grep defence | wc -l` +if [[ $result > 1 ]]; then + echo "Found pid with '*defence*' name, make sure it has no effect on traffics" >> $checkresult +fi +result=`ps -ef | grep vmsec | wc -l` +if [[ $result > 1 ]]; then + echo "Found pid with '*vmsec*' name, make sure it has no effect on traffics" >> $checkresult +fi + +echo "6) check geneve 6081 connection" >> $checkresult +nmap -sU 127.0.0.1 -p 6081 > /dev/null +if [[ $? != 0 ]]; then + echo "The nmap cmd not found, maybe can be installed mannully and exec 'nmap -sU 127.0.0.1 -p 6081' to check port connection" >> $checkresult +else + result=`nmap -sU 127.0.0.1 -p 6081` + wait + if [[ ! $result =~ "open" ]]; then + echo "The 6081 port for geneve encapsulation may be not available, please check if ovs-ovn pod is health" >> $checkresult + fi +fi diff --git a/dist/images/install.sh b/dist/images/install.sh index 2f9e1867f29..ac05874753e 100755 --- a/dist/images/install.sh +++ b/dist/images/install.sh @@ -2748,6 +2748,7 @@ showHelp(){ echo " diagnose {all|node} [nodename] diagnose connectivity of all nodes or a specific node" echo " tuning {install-fastpath|local-install-fastpath|remove-fastpath|install-stt|local-install-stt|remove-stt} {centos7|centos8}} [kernel-devel-version] deploy kernel optimisation components to the system" echo " reload restart all kube-ovn components" + echo " env-check check the environment configuration" } tcpdump(){ @@ -3100,7 +3101,7 @@ getOvnCentralPod(){ if [ -z "$VERSION" ]; then echo "kubeovn version not exists" exit 1 - fi + fi KUBE_OVN_VERSION=$VERSION } @@ -3408,6 +3409,22 @@ reload(){ kubectl rollout status deployment/kube-ovn-monitor -n kube-system } +env-check(){ + set +e + echo "************************************************************************" + echo "The result will be saved to /var/log/kube-ovn/env-check.log on each node" + echo "************************************************************************" + + KUBE_OVN_NS=kube-system + podNames=`kubectl get pod -n $KUBE_OVN_NS | grep kube-ovn-cni | awk '{print $1}'` + for pod in $podNames + do + kubectl exec -it -n $KUBE_OVN_NS $pod -- bash /kube-ovn/env-check.sh + done + echo "*************************************************************************" + echo "Finish environment check, check result in /var/log/kube-ovn/env-check.log" +} + if [ $# -lt 1 ]; then showHelp exit 0 @@ -3445,6 +3462,9 @@ case $subcommand in tuning) tuning "$@" ;; + env-check) + env-check + ;; *) showHelp ;;