From bc3f5540766c5125952e2d4e836459c482ddb574 Mon Sep 17 00:00:00 2001 From: sudhar-krishnakumar Date: Wed, 12 Feb 2025 18:24:34 -0800 Subject: [PATCH] Better error handling in utils, and reduce logging in GetDevices (#340) * Change to use Go Dial api for ssh, for better error messages, when ACC to IMC connection is down. Make utils apis more go based. Per redhat JIRA: Also reduce excessive logging in GetDevices(which gets called periodically by DPU). Fix debug erorr messages, if ssh fails from ACC to IMC, We will throw error message in RunCmdOnImc --- ipu-plugin/ipuplugin/cmd/rootcmd.go | 2 +- ipu-plugin/pkg/ipuplugin/deviceplugin.go | 2 +- ipu-plugin/pkg/ipuplugin/ipuplugin.go | 2 +- ipu-plugin/pkg/ipuplugin/lifecycleservice.go | 9 +- .../pkg/ipuplugin/networkfunctionservice.go | 13 +-- ipu-plugin/pkg/utils/utils.go | 101 +++++++++++++----- 6 files changed, 86 insertions(+), 43 deletions(-) diff --git a/ipu-plugin/ipuplugin/cmd/rootcmd.go b/ipu-plugin/ipuplugin/cmd/rootcmd.go index c8f64d76..725114bb 100644 --- a/ipu-plugin/ipuplugin/cmd/rootcmd.go +++ b/ipu-plugin/ipuplugin/cmd/rootcmd.go @@ -371,7 +371,7 @@ func getPluginMode() string { func convertNameToIpAndPort(p4rtName string) (string, error) { - p4rtIp := "localhost" + p4rtIp := "127.0.0.1" ip, err := net.LookupIP(p4rtName) if err != nil { log.Errorf("Couldn't resolve Name %s to IP: err->%s", p4rtName, err) diff --git a/ipu-plugin/pkg/ipuplugin/deviceplugin.go b/ipu-plugin/pkg/ipuplugin/deviceplugin.go index 2ebd2203..dbec387d 100644 --- a/ipu-plugin/pkg/ipuplugin/deviceplugin.go +++ b/ipu-plugin/pkg/ipuplugin/deviceplugin.go @@ -247,7 +247,7 @@ func discoverHostDevices(mode string) (map[string]*pb.Device, error) { for _, file := range files { deviceCodeByte, err := os.ReadFile(filepath.Join(sysClassNet, file.Name(), "device/device")) if err != nil { - fmt.Printf("Error: %s\n", err) + continue } device_code := strings.TrimSpace(string(deviceCodeByte)) diff --git a/ipu-plugin/pkg/ipuplugin/ipuplugin.go b/ipu-plugin/pkg/ipuplugin/ipuplugin.go index a88331b8..66082689 100644 --- a/ipu-plugin/pkg/ipuplugin/ipuplugin.go +++ b/ipu-plugin/pkg/ipuplugin/ipuplugin.go @@ -176,7 +176,7 @@ func (s *server) Stop() { vfMacList, err := utils.GetVfMacList() if err != nil { - log.Errorf("Unable to reach the IMC %v", err) + log.Errorf("Stop: Error->%v", err) } if len(vfMacList) == 0 || (len(vfMacList) == 1 && vfMacList[0] == "") { log.Errorf("No VFs initialized on the host") diff --git a/ipu-plugin/pkg/ipuplugin/lifecycleservice.go b/ipu-plugin/pkg/ipuplugin/lifecycleservice.go index a165989a..eadbed74 100644 --- a/ipu-plugin/pkg/ipuplugin/lifecycleservice.go +++ b/ipu-plugin/pkg/ipuplugin/lifecycleservice.go @@ -1103,8 +1103,8 @@ func (e *ExecutableHandlerImpl) SetupAccApfs() error { AccApfMacList, err = utils.GetAccApfMacList() if err != nil { - log.Errorf("unable to reach the IMC %v", err) - return fmt.Errorf("unable to reach the IMC %v", err) + log.Errorf("SetupAccApfs: Error-> %v", err) + return fmt.Errorf("SetupAccApfs: Error-> %v", err) } if len(AccApfMacList) != ApfNumber { @@ -1127,11 +1127,10 @@ func CheckAndAddPeerToPeerP4Rules(p types.P4RTClient) { if !PeerToPeerP4RulesAdded { vfMacList, err := utils.GetVfMacList() if err != nil { - log.Errorf("CheckAndAddPeerToPeerP4Rules: unable to reach the IMC %v", err) + log.Errorf("CheckAndAddPeerToPeerP4Rules: Error-> %v", err) return } - //with use of strings.split in utils, we can get list of length 1 with empty string. - if len(vfMacList) == 0 || (len(vfMacList) == 1 && vfMacList[0] == "") { + if len(vfMacList) == 0 { log.Infof("No VFs initialized on the host yet") } else { log.Infof("AddPeerToPeerP4Rules, path->%s, vfMacList->%v", p.GetBin(), vfMacList) diff --git a/ipu-plugin/pkg/ipuplugin/networkfunctionservice.go b/ipu-plugin/pkg/ipuplugin/networkfunctionservice.go index ba954ead..66548a3b 100644 --- a/ipu-plugin/pkg/ipuplugin/networkfunctionservice.go +++ b/ipu-plugin/pkg/ipuplugin/networkfunctionservice.go @@ -45,11 +45,8 @@ func NewNetworkFunctionService(ports map[string]*types.BridgePortInfo, brCtlr ty func (s *NetworkFunctionServiceServer) CreateNetworkFunction(ctx context.Context, in *pb.NFRequest) (*pb.Empty, error) { vfMacList, err := utils.GetVfMacList() if err != nil { - return nil, status.Errorf(codes.Internal, "Unable to reach the IMC %v", err) - } - - if len(vfMacList) == 0 { - return nil, status.Error(codes.Internal, "No NFs initialized on the host") + log.Errorf("CreateNetworkFunction: Error-> %v", err) + return nil, status.Errorf(codes.Internal, "Error-> %v", err) } CheckAndAddPeerToPeerP4Rules(s.p4rtClient) @@ -78,12 +75,10 @@ func (s *NetworkFunctionServiceServer) DeleteNetworkFunction(ctx context.Context vfMacList, err := utils.GetVfMacList() if err != nil { - return nil, status.Errorf(codes.Internal, "Unable to reach the IMC %v", err) + log.Errorf("DeleteNetworkFunction: Error-> %v", err) + return nil, status.Errorf(codes.Internal, "Error-> %v", err) } - if len(vfMacList) == 0 { - return nil, status.Error(codes.Internal, "No NFs initialized on the host") - } if err := s.bridgeCtlr.DeletePort(AccIntfNames[NF_IN_PR_INTF_INDEX]); err != nil { log.Errorf("failed to delete port to bridge: %v, for interface->%v", err, AccIntfNames[NF_IN_PR_INTF_INDEX]) return nil, fmt.Errorf("failed to add port to bridge: %v, for interface->%v", err, AccIntfNames[NF_IN_PR_INTF_INDEX]) diff --git a/ipu-plugin/pkg/utils/utils.go b/ipu-plugin/pkg/utils/utils.go index 827e1d97..0bc1c33a 100644 --- a/ipu-plugin/pkg/utils/utils.go +++ b/ipu-plugin/pkg/utils/utils.go @@ -26,6 +26,7 @@ import ( "github.com/intel/ipu-opi-plugins/ipu-plugin/pkg/types" log "github.com/sirupsen/logrus" + "golang.org/x/crypto/ssh" ) const ( @@ -33,6 +34,7 @@ const ( pbPythonEnvVar = "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python" hostToImcIpAddr = "100.0.0.100" accToImcIpAddr = "192.168.0.1" + imcAddress = "192.168.0.1:22" ) var execCommand = exec.Command @@ -119,48 +121,95 @@ func ExecuteScript(script string) (string, error) { } func ImcQueryfindVsiGivenMacAddr(mode string, mac string) (string, error) { - var ipAddr string - if mode == types.HostMode { - ipAddr = hostToImcIpAddr - } else if mode == types.IpuMode { - ipAddr = accToImcIpAddr + if (mode == types.HostMode) || (mode != types.IpuMode) { + log.Errorf("ImcQueryfindVsiGivenMacAddr: invalid mode-%v. access from host to IMC, not supported", mode) + return "", fmt.Errorf("ImcQueryfindVsiGivenMacAddr: invalid mode-%v. access from host to IMC, not supported", mode) } - runCommand := fmt.Sprintf(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"%s" "/usr/bin/cli_client -cq" \ - | awk '{if(($17 == "%s")) {print $8}}'`, ipAddr, mac) + commands := fmt.Sprintf(`set -o pipefail && cli_client -cq | awk '{if(($17 == "%s")) {print $8}}'`, mac) + outputBytes, err := RunCmdOnImc(commands) - output, err := ExecuteScript(runCommand) - output = strings.TrimSpace(string(output)) - - if err != nil || output == "" { - log.Errorf("unable to reach IMC %v or null output->%v", err, output) - return "", fmt.Errorf("unable to reach IMC %v or null output->%v", err, output) + //Handle case where command ran without error, but empty output, due to config issue. + if (err != nil) || (len(outputBytes) == 0) { + log.Errorf("ImcQueryfindVsiGivenMacAddr: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes)) + return "", fmt.Errorf("ImcQueryfindVsiGivenMacAddr: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes)) } - return output, nil + + outputStr := strings.TrimSpace(string(outputBytes)) + log.Infof("ImcQueryfindVsiGivenMacAddr: %s, len(output)-%v", outputStr, len(outputStr)) + + return outputStr, err } -func GetVfMacList() ([]string, error) { - // reach out to the IMC to get the mac addresses of the VFs - output, err := ExecuteScript(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@192.168.0.1 "/usr/bin/cli_client -cq" \ - | awk '{if(($4 == "0x0") && ($6 == "yes")) {print $17}}'`) +func RunCmdOnImc(cmd string) ([]byte, error) { + + config := &ssh.ClientConfig{ + User: "root", + Auth: []ssh.AuthMethod{ + ssh.Password(""), + }, + HostKeyCallback: ssh.InsecureIgnoreHostKey(), + } + // Connect to the remote server. + client, err := ssh.Dial("tcp", imcAddress, config) + if err != nil { + log.Errorf("failed to dial remote server(%s): %s", imcAddress, err) + return nil, fmt.Errorf("failed to dial remote server(%s): %s", imcAddress, err) + } + defer client.Close() + + // Start a session. + session, err := client.NewSession() + if err != nil { + log.Errorf("failed to create ssh session: %s", err) + return nil, fmt.Errorf("failed to create ssh session: %s", err) + } + defer session.Close() + // Run a command on the remote server and capture the output. + outputBytes, err := session.CombinedOutput(cmd) if err != nil { - return nil, fmt.Errorf("unable to reach the IMC %v", err) + log.Errorf("cmd error: %s", err) + return nil, fmt.Errorf("cmd error: %s", err) } - return strings.Split(strings.TrimSpace(output), "\n"), nil + return outputBytes, nil + } func GetAccApfMacList() ([]string, error) { - // reach out to the IMC to get the mac addresses of the VFs - output, err := ExecuteScript(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@192.168.0.1 "/usr/bin/cli_client -cq" \ - | awk '{if(($2 == "0x4") && ($4 == "0x4")) {print $17}}'`) + commands := `set -o pipefail && cli_client -cq | awk '{if(($2 == "0x4") && ($4 == "0x4")) {print $17}}'` + outputBytes, err := RunCmdOnImc(commands) + + var outputStr []string + //Handle case where command ran without error, but empty output, due to config issue. + if (err != nil) || (len(outputBytes) == 0) { + log.Errorf("GetAccApfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes)) + return outputStr, fmt.Errorf("GetAccApfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes)) + } - if err != nil { - return nil, fmt.Errorf("unable to reach the IMC %v", err) + outputStr = strings.Split(strings.TrimSpace(string(outputBytes)), "\n") + log.Infof("GetAccApfMacList: %s, len(output)-%v", outputStr, len(outputStr)) + + return outputStr, err +} + +func GetVfMacList() ([]string, error) { + // reach out to the IMC to get the mac addresses of the VFs + commands := `set -o pipefail && cli_client -cq | awk '{if(($4 == "0x0") && ($6 == "yes")) {print $17}}'` + outputBytes, err := RunCmdOnImc(commands) + + var outputStr []string + //Handle case where command ran without error, but empty output, due to config issue. + if (err != nil) || (len(outputBytes) == 0) { + log.Errorf("GetVfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes)) + return outputStr, fmt.Errorf("GetVfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes)) } - return strings.Split(strings.TrimSpace(output), "\n"), nil + outputStr = strings.Split(strings.TrimSpace(string(outputBytes)), "\n") + log.Infof("GetVfMacList: %s, len(output)-%v", outputStr, len(outputStr)) + + return outputStr, err } // Taken from the IPDK k8s-infra-offload project instead of including the full project as a module