Skip to content

Commit

Permalink
Better error handling in utils, and reduce logging in GetDevices (#340)
Browse files Browse the repository at this point in the history
* Change to use Go Dial api for ssh, for better error
messages, when ACC to IMC connection is down.
Make utils apis more go based.
Per redhat JIRA: Also reduce excessive logging in GetDevices(which gets
called periodically by DPU).
Fix debug erorr messages, if ssh fails from ACC to IMC,
We will throw error message in RunCmdOnImc
  • Loading branch information
sudhar-krishnakumar authored Feb 13, 2025
1 parent 56622a7 commit bc3f554
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 43 deletions.
2 changes: 1 addition & 1 deletion ipu-plugin/ipuplugin/cmd/rootcmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ func getPluginMode() string {

func convertNameToIpAndPort(p4rtName string) (string, error) {

p4rtIp := "localhost"
p4rtIp := "127.0.0.1"
ip, err := net.LookupIP(p4rtName)
if err != nil {
log.Errorf("Couldn't resolve Name %s to IP: err->%s", p4rtName, err)
Expand Down
2 changes: 1 addition & 1 deletion ipu-plugin/pkg/ipuplugin/deviceplugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func discoverHostDevices(mode string) (map[string]*pb.Device, error) {
for _, file := range files {
deviceCodeByte, err := os.ReadFile(filepath.Join(sysClassNet, file.Name(), "device/device"))
if err != nil {
fmt.Printf("Error: %s\n", err)
continue
}

device_code := strings.TrimSpace(string(deviceCodeByte))
Expand Down
2 changes: 1 addition & 1 deletion ipu-plugin/pkg/ipuplugin/ipuplugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ func (s *server) Stop() {

vfMacList, err := utils.GetVfMacList()
if err != nil {
log.Errorf("Unable to reach the IMC %v", err)
log.Errorf("Stop: Error->%v", err)
}
if len(vfMacList) == 0 || (len(vfMacList) == 1 && vfMacList[0] == "") {
log.Errorf("No VFs initialized on the host")
Expand Down
9 changes: 4 additions & 5 deletions ipu-plugin/pkg/ipuplugin/lifecycleservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -1103,8 +1103,8 @@ func (e *ExecutableHandlerImpl) SetupAccApfs() error {
AccApfMacList, err = utils.GetAccApfMacList()

if err != nil {
log.Errorf("unable to reach the IMC %v", err)
return fmt.Errorf("unable to reach the IMC %v", err)
log.Errorf("SetupAccApfs: Error-> %v", err)
return fmt.Errorf("SetupAccApfs: Error-> %v", err)
}

if len(AccApfMacList) != ApfNumber {
Expand All @@ -1127,11 +1127,10 @@ func CheckAndAddPeerToPeerP4Rules(p types.P4RTClient) {
if !PeerToPeerP4RulesAdded {
vfMacList, err := utils.GetVfMacList()
if err != nil {
log.Errorf("CheckAndAddPeerToPeerP4Rules: unable to reach the IMC %v", err)
log.Errorf("CheckAndAddPeerToPeerP4Rules: Error-> %v", err)
return
}
//with use of strings.split in utils, we can get list of length 1 with empty string.
if len(vfMacList) == 0 || (len(vfMacList) == 1 && vfMacList[0] == "") {
if len(vfMacList) == 0 {
log.Infof("No VFs initialized on the host yet")
} else {
log.Infof("AddPeerToPeerP4Rules, path->%s, vfMacList->%v", p.GetBin(), vfMacList)
Expand Down
13 changes: 4 additions & 9 deletions ipu-plugin/pkg/ipuplugin/networkfunctionservice.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,8 @@ func NewNetworkFunctionService(ports map[string]*types.BridgePortInfo, brCtlr ty
func (s *NetworkFunctionServiceServer) CreateNetworkFunction(ctx context.Context, in *pb.NFRequest) (*pb.Empty, error) {
vfMacList, err := utils.GetVfMacList()
if err != nil {
return nil, status.Errorf(codes.Internal, "Unable to reach the IMC %v", err)
}

if len(vfMacList) == 0 {
return nil, status.Error(codes.Internal, "No NFs initialized on the host")
log.Errorf("CreateNetworkFunction: Error-> %v", err)
return nil, status.Errorf(codes.Internal, "Error-> %v", err)
}

CheckAndAddPeerToPeerP4Rules(s.p4rtClient)
Expand Down Expand Up @@ -78,12 +75,10 @@ func (s *NetworkFunctionServiceServer) DeleteNetworkFunction(ctx context.Context
vfMacList, err := utils.GetVfMacList()

if err != nil {
return nil, status.Errorf(codes.Internal, "Unable to reach the IMC %v", err)
log.Errorf("DeleteNetworkFunction: Error-> %v", err)
return nil, status.Errorf(codes.Internal, "Error-> %v", err)
}

if len(vfMacList) == 0 {
return nil, status.Error(codes.Internal, "No NFs initialized on the host")
}
if err := s.bridgeCtlr.DeletePort(AccIntfNames[NF_IN_PR_INTF_INDEX]); err != nil {
log.Errorf("failed to delete port to bridge: %v, for interface->%v", err, AccIntfNames[NF_IN_PR_INTF_INDEX])
return nil, fmt.Errorf("failed to add port to bridge: %v, for interface->%v", err, AccIntfNames[NF_IN_PR_INTF_INDEX])
Expand Down
101 changes: 75 additions & 26 deletions ipu-plugin/pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ import (

"github.com/intel/ipu-opi-plugins/ipu-plugin/pkg/types"
log "github.com/sirupsen/logrus"
"golang.org/x/crypto/ssh"
)

const (
vsiToVportOffset = 16
pbPythonEnvVar = "PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python"
hostToImcIpAddr = "100.0.0.100"
accToImcIpAddr = "192.168.0.1"
imcAddress = "192.168.0.1:22"
)

var execCommand = exec.Command
Expand Down Expand Up @@ -119,48 +121,95 @@ func ExecuteScript(script string) (string, error) {
}

func ImcQueryfindVsiGivenMacAddr(mode string, mac string) (string, error) {
var ipAddr string
if mode == types.HostMode {
ipAddr = hostToImcIpAddr
} else if mode == types.IpuMode {
ipAddr = accToImcIpAddr
if (mode == types.HostMode) || (mode != types.IpuMode) {
log.Errorf("ImcQueryfindVsiGivenMacAddr: invalid mode-%v. access from host to IMC, not supported", mode)
return "", fmt.Errorf("ImcQueryfindVsiGivenMacAddr: invalid mode-%v. access from host to IMC, not supported", mode)
}

runCommand := fmt.Sprintf(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"%s" "/usr/bin/cli_client -cq" \
| awk '{if(($17 == "%s")) {print $8}}'`, ipAddr, mac)
commands := fmt.Sprintf(`set -o pipefail && cli_client -cq | awk '{if(($17 == "%s")) {print $8}}'`, mac)
outputBytes, err := RunCmdOnImc(commands)

output, err := ExecuteScript(runCommand)
output = strings.TrimSpace(string(output))

if err != nil || output == "" {
log.Errorf("unable to reach IMC %v or null output->%v", err, output)
return "", fmt.Errorf("unable to reach IMC %v or null output->%v", err, output)
//Handle case where command ran without error, but empty output, due to config issue.
if (err != nil) || (len(outputBytes) == 0) {
log.Errorf("ImcQueryfindVsiGivenMacAddr: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes))
return "", fmt.Errorf("ImcQueryfindVsiGivenMacAddr: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes))
}
return output, nil

outputStr := strings.TrimSpace(string(outputBytes))
log.Infof("ImcQueryfindVsiGivenMacAddr: %s, len(output)-%v", outputStr, len(outputStr))

return outputStr, err
}

func GetVfMacList() ([]string, error) {
// reach out to the IMC to get the mac addresses of the VFs
output, err := ExecuteScript(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 [email protected] "/usr/bin/cli_client -cq" \
| awk '{if(($4 == "0x0") && ($6 == "yes")) {print $17}}'`)
func RunCmdOnImc(cmd string) ([]byte, error) {

config := &ssh.ClientConfig{
User: "root",
Auth: []ssh.AuthMethod{
ssh.Password(""),
},
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
// Connect to the remote server.
client, err := ssh.Dial("tcp", imcAddress, config)
if err != nil {
log.Errorf("failed to dial remote server(%s): %s", imcAddress, err)
return nil, fmt.Errorf("failed to dial remote server(%s): %s", imcAddress, err)
}
defer client.Close()

// Start a session.
session, err := client.NewSession()
if err != nil {
log.Errorf("failed to create ssh session: %s", err)
return nil, fmt.Errorf("failed to create ssh session: %s", err)
}
defer session.Close()

// Run a command on the remote server and capture the output.
outputBytes, err := session.CombinedOutput(cmd)
if err != nil {
return nil, fmt.Errorf("unable to reach the IMC %v", err)
log.Errorf("cmd error: %s", err)
return nil, fmt.Errorf("cmd error: %s", err)
}

return strings.Split(strings.TrimSpace(output), "\n"), nil
return outputBytes, nil

}

func GetAccApfMacList() ([]string, error) {
// reach out to the IMC to get the mac addresses of the VFs
output, err := ExecuteScript(`ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 [email protected] "/usr/bin/cli_client -cq" \
| awk '{if(($2 == "0x4") && ($4 == "0x4")) {print $17}}'`)
commands := `set -o pipefail && cli_client -cq | awk '{if(($2 == "0x4") && ($4 == "0x4")) {print $17}}'`
outputBytes, err := RunCmdOnImc(commands)

var outputStr []string
//Handle case where command ran without error, but empty output, due to config issue.
if (err != nil) || (len(outputBytes) == 0) {
log.Errorf("GetAccApfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes))
return outputStr, fmt.Errorf("GetAccApfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes))
}

if err != nil {
return nil, fmt.Errorf("unable to reach the IMC %v", err)
outputStr = strings.Split(strings.TrimSpace(string(outputBytes)), "\n")
log.Infof("GetAccApfMacList: %s, len(output)-%v", outputStr, len(outputStr))

return outputStr, err
}

func GetVfMacList() ([]string, error) {
// reach out to the IMC to get the mac addresses of the VFs
commands := `set -o pipefail && cli_client -cq | awk '{if(($4 == "0x0") && ($6 == "yes")) {print $17}}'`
outputBytes, err := RunCmdOnImc(commands)

var outputStr []string
//Handle case where command ran without error, but empty output, due to config issue.
if (err != nil) || (len(outputBytes) == 0) {
log.Errorf("GetVfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes))
return outputStr, fmt.Errorf("GetVfMacList: Error %v, from RunCmdOnImc (OR) empty (output)-%v", err, len(outputBytes))
}

return strings.Split(strings.TrimSpace(output), "\n"), nil
outputStr = strings.Split(strings.TrimSpace(string(outputBytes)), "\n")
log.Infof("GetVfMacList: %s, len(output)-%v", outputStr, len(outputStr))

return outputStr, err
}

// Taken from the IPDK k8s-infra-offload project instead of including the full project as a module
Expand Down

0 comments on commit bc3f554

Please sign in to comment.