Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check VBM node type automatically and support csi rund 3.0 protocol #1177

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/multi/Dockerfile.multi
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ LABEL maintainers="Alibaba Cloud Authors" description="Alibaba Cloud CSI Plugin"

ARG TARGETARCH
RUN --mount=type=cache,target=/var/cache/dnf,sharing=locked,id=dnf-cache-$TARGETARCH \
dnf install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace cloud-utils-growpart gdisk nc telnet tar cpio && \
dnf install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace cloud-utils-growpart gdisk nc telnet tar cpio lsof && \
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone

FROM base as build-util-linux
Expand Down
2 changes: 1 addition & 1 deletion build/multi/Dockerfile.multi.asi
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
FROM registry.eu-west-1.aliyuncs.com/acs/alinux:3-update as base
LABEL maintainers="Alibaba Cloud Authors" description="Alibaba Cloud CSI Plugin"

RUN yum install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace cloud-utils-growpart gdisk nc telnet tar cpio && \
RUN yum install -y ca-certificates file tzdata nfs-utils xfsprogs e4fsprogs pciutils iputils strace cloud-utils-growpart gdisk nc telnet tar cpio lsof && \
yum clean all
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' >/etc/timezone

Expand Down
4 changes: 4 additions & 0 deletions pkg/cloud/metadata/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,17 @@ var (
"alibabacloud.com/ecs-instance-id",
"sigma.ali/ecs-instance-id",
}
VmocLabels = []string{
"rm.alibaba-inc.com/vbm",
}
)

var MetadataLabels = map[MetadataKey][]string{
RegionID: RegionIDLabels,
ZoneID: ZoneIDLabels,
InstanceType: InstanceTypeLabels,
InstanceID: InstanceIdLabels,
VmocType: VmocLabels,
}

func init() {
Expand Down
15 changes: 9 additions & 6 deletions pkg/cloud/metadata/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ import (
type MetadataKey int

const (
RegionID MetadataKey = iota
ZoneID MetadataKey = iota
InstanceID MetadataKey = iota
InstanceType MetadataKey = iota
AccountID MetadataKey = iota
ClusterID MetadataKey = iota
RegionID MetadataKey = iota
ZoneID
InstanceID
InstanceType
AccountID
ClusterID
VmocType
)

func (k MetadataKey) String() string {
Expand All @@ -38,6 +39,8 @@ func (k MetadataKey) String() string {
return "AccountID"
case ClusterID:
return "ClusterID"
case VmocType:
return "VmocType"
default:
return fmt.Sprintf("MetadataKey(%d)", k)
}
Expand Down
115 changes: 105 additions & 10 deletions pkg/disk/bdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -78,6 +79,13 @@ func IsNoSuchDeviceErr(err error) bool {
return strings.Contains(strings.ToLower(err.Error()), "no such device")
}

func IsNoSuchFileErr(err error) bool {
if err == nil {
return false
}
return strings.Contains(strings.ToLower(err.Error()), "no such file or directory")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't check error string. It is subject to change. Use errors.Is instead.

}

// IohubSriovBind io hub bind
func IohubSriovBind(bdf string) error {
return ioutil.WriteFile(iohubSriovAction+"bind", []byte(bdf), 0600)
Expand Down Expand Up @@ -491,6 +499,10 @@ const (
DFBus // 1
)

const (
dfBusDevicePathPattern = "/sys/bus/dragonfly/devices/dfvirtio*/type"
)

func (_type MachineType) BusName() string {
busNames := [...]string{
BDFTypeBus,
Expand All @@ -504,40 +516,89 @@ func (_type MachineType) BusName() string {
return busNames[_type]
}

func (_type MachineType) BusPrefix() string {
busPrefixes := [...]string{
func (_type MachineType) BusRegex() (*regexp.Regexp, error) {
busRegexes := [...]*regexp.Regexp{
BDFTypeDevice,
DFBusTypeDevice,
}

if _type < BDF || _type > DFBus {
return fmt.Sprintf("Unknown(%d)", _type)
return nil, fmt.Errorf("Unknown(%d)", _type)
}

return busPrefixes[_type]
return busRegexes[_type], nil
}

type Driver interface {
CurentDriver() (string, error)
UnbindDriver() error
BindDriver(targetDriver string) error
GetDeviceNumber() string
GetPCIDeviceDriverType() string
CheckVFIOUsage() error
}

func NewDeviceDriver(blockDevice, deviceNumber string, _type MachineType, extras map[string]string) (Driver, error) {
func NewDeviceDriver(volumeId, blockDevice, deviceNumber string, _type MachineType, extras map[string]string) (Driver, error) {
d := &driver{
blockDevice: blockDevice,
deviceNumber: deviceNumber,
machineType: _type,
extras: extras,
}
if d.deviceNumber == "" {
deviceNumber, err := DefaultDeviceManager.GetDeviceNumberFromBlockDevice(blockDevice, d.machineType.BusPrefix())
deviceNumberFromDevice := ""
if blockDevice != "" {
klog.Infof("NewDeviceDriver: start to get deviceNumber from device: %s", blockDevice)
busRegex, err := d.machineType.BusRegex()
if err != nil {
klog.Errorf("NewDeviceDriver: get device number from block device err: %v", err)
klog.Errorf("NewDeviceDriver: get bus type: %v", err)
return nil, err
}
d.deviceNumber = deviceNumber
deviceNumberFromDevice, err = DefaultDeviceManager.GetDeviceNumberFromBlockDevice(blockDevice, busRegex)
if err != nil {
klog.Errorf("NewDeviceDriver: get device number from block device err: %v", err)
if deviceNumber == "" {
return nil, err
}
}
}
if deviceNumberFromDevice != "" {
if deviceNumber != "" && deviceNumberFromDevice != deviceNumber {
klog.Warningf("NewDeviceDriver: newGeneratedDeviceNumber: %s is different from the one from exists file: %s, override with new deviceNumber", deviceNumberFromDevice, deviceNumber)
}
d.deviceNumber = deviceNumberFromDevice
}
if d.deviceNumber != "" {
return d, nil
}
if _type == DFBus {
matchesFile, err := filepath.Glob(dfBusDevicePathPattern)
if err != nil {
return nil, fmt.Errorf("Failed to list DFbus type files path. err: %v", err)
}
for _, path := range matchesFile {
body, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("Dfbus read type file %q failed: %v", path, err)
}
infos := strings.Split(string(body), " ")
if len(infos) != 2 {
return nil, fmt.Errorf("Dfbus type file format error")
}
if infos[0] != "block" {
continue
}
if infos[1] == strings.TrimPrefix(volumeId, "d-") {
DFNumber := filepath.Base(filepath.Dir(path))
d.deviceNumber = DFNumber
return d, nil
}
}
} else {
output, err := utils.CommandOnNode("xdragon-bdf", "--nvme", "-id=%s", volumeId).CombinedOutput()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: new usage of nsenter. We should try to remove nsenter usage.

if err != nil {
return nil, fmt.Errorf("Failed to excute bdf command: %s, err: %v", volumeId, err)
}
d.deviceNumber = string(output)
}
return d, nil
}
Expand Down Expand Up @@ -570,5 +631,39 @@ func (d *driver) UnbindDriver() error {
}

func (d *driver) BindDriver(targetDriver string) error {
return utilsio.WriteTrunc(unix.AT_FDCWD, filepath.Join(sysPrefix, "sys/bus", d.machineType.BusName(), "drivers", targetDriver, "bind"), []byte(d.deviceNumber))
err := utilsio.WriteTrunc(unix.AT_FDCWD, filepath.Join(sysPrefix, "sys/bus", d.machineType.BusName(), "devices", d.deviceNumber, "driver_override"), []byte(targetDriver))
if err != nil {
return err
}
if d.machineType == BDF {
return utilsio.WriteTrunc(unix.AT_FDCWD, filepath.Join(sysPrefix, "sys/bus", d.machineType.BusName(), "drivers_probe"), []byte(d.deviceNumber))
}
return nil
}

func (d *driver) GetPCIDeviceDriverType() string {
output, _ := exec.Command("lspci", "-s", d.deviceNumber, "-n").CombinedOutput()
klog.InfoS("GetDeviceDriverType: get driver type output", "deviceNumber", d.deviceNumber, "output", output)
// #define PCI_DEVICE_ID_VIRTIO_BLOCK 0x1001
// #define PCI_DEVICE_ID_ALIBABA_NVME 0×5004
if strings.HasSuffix(strings.TrimSpace(string(output)), "1001") {
return PCITypeVIRTIO
} else {
return PCITypeNVME
}
}

func (d *driver) CheckVFIOUsage() error {
actualPath, err := filepath.EvalSymlinks(filepath.Join(sysPrefix, "sys/bus", d.machineType.BusName(), "devices", d.deviceNumber, "iommu_group"))
if err != nil {
return err
}
klog.V(5).InfoS("CheckVFIOUsage: eval symlink success", "path", actualPath)
groupNumber := filepath.Base(actualPath)
// the command returns -1 if nothing is returned
output, _ := exec.Command("lsof", filepath.Join("/dev/vfio", groupNumber)).CombinedOutput()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is expensive, I would strongly recommend not using this. Instead, ask the user to lock this file, so that we can use lock to detect usage. Or ask the user to write a PID file, and we can just check if the process is alive.

if strings.TrimSpace(string(output)) != "" {
return errors.Errorf("CheckVFIOUsage: device: %s is still be in used, output: %s", d.deviceNumber, output)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

convert output to string before logging. Or it will output a list of number.

}
return nil
}
10 changes: 7 additions & 3 deletions pkg/disk/device_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"regexp"
"strings"

utilsio "github.com/kubernetes-sigs/alibaba-cloud-csi-driver/pkg/utils/io"
Expand Down Expand Up @@ -253,7 +254,7 @@ func (m *DeviceManager) WriteSysfs(devicePath, name string, value []byte) error
return nil
}

func (m *DeviceManager) GetDeviceNumberFromBlockDevice(blockDevice, busPrefix string) (string, error) {
func (m *DeviceManager) GetDeviceNumberFromBlockDevice(blockDevice string, busRegex *regexp.Regexp) (string, error) {

major, minor, err := m.DevTmpFS.DevFor(blockDevice)
if err != nil {
Expand All @@ -266,11 +267,14 @@ func (m *DeviceManager) GetDeviceNumberFromBlockDevice(blockDevice, busPrefix st
}
for {
klog.Infof("NewDeviceDriver: get symlink dir: %s", dirEntry)
if dirEntry == ".." || dirEntry == "." {
if dirEntry == ".." || dirEntry == "." || dirEntry == "/" {
return "", fmt.Errorf("NewDeviceDriver: not found device number, blockDevice: %s", blockDevice)
}
parentDir := filepath.Base(filepath.Dir(dirEntry))
if strings.HasPrefix(parentDir, busPrefix) {

matched := busRegex.MatchString(parentDir)
klog.Infof("NewDeviceDriver: busPrefix: %s, parentDir: %s, matched: %v", busRegex.String(), parentDir, matched)
if matched {
return parentDir, nil
} else {
dirEntry = filepath.Dir(dirEntry)
Expand Down
Loading