Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for generic PCI hotplug/hotremove #1111

Merged
merged 5 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions internal/server/device/pci.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ func (d *pci) Start() (*deviceConfig.RunConfig, error) {
saveData["last_state.pci.slot.name"] = pciDev.SlotName
saveData["last_state.pci.driver"] = pciDev.Driver

pciIOMMUGroup, err := pcidev.DeviceIOMMUGroup(saveData["last_state.pci.slot.name"])
if err != nil {
return nil, err
}

err = pcidev.DeviceDriverOverride(pciDev, "vfio-pci")
if err != nil {
return nil, fmt.Errorf("Failed to override IOMMU group driver: %w", err)
Expand All @@ -82,6 +87,7 @@ func (d *pci) Start() (*deviceConfig.RunConfig, error) {
[]deviceConfig.RunConfigItem{
{Key: "devName", Value: d.name},
{Key: "pciSlotName", Value: saveData["last_state.pci.slot.name"]},
{Key: "pciIOMMUGroup", Value: fmt.Sprintf("%d", pciIOMMUGroup)},
}...)

err = d.volatileSet(saveData)
Expand All @@ -92,6 +98,11 @@ func (d *pci) Start() (*deviceConfig.RunConfig, error) {
return &runConf, nil
}

// CanHotPlug returns whether the device can be managed whilst the instance is running.
func (d *pci) CanHotPlug() bool {
return true
}

// Stop is run when the device is removed from the instance.
func (d *pci) Stop() (*deviceConfig.RunConfig, error) {
runConf := deviceConfig.RunConfig{
Expand Down
165 changes: 145 additions & 20 deletions internal/server/instance/drivers/driver_qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -2168,14 +2168,15 @@ func (d *qemu) deviceStart(dev device.Device, instanceRunning bool) (*deviceConf
if runConf != nil {
// If instance is running and then live attach device.
if instanceRunning {
// Attach network interface if requested.
// Attach NIC to running instance.
if len(runConf.NetworkInterface) > 0 {
err = d.deviceAttachNIC(dev.Name(), configCopy, runConf.NetworkInterface)
if err != nil {
return nil, err
}
}

// Attach disk to running instance.
for _, mount := range runConf.Mounts {
if mount.FSType == "9p" {
err = d.deviceAttachPath(dev.Name(), configCopy, mount)
Expand All @@ -2190,13 +2191,22 @@ func (d *qemu) deviceStart(dev device.Device, instanceRunning bool) (*deviceConf
}
}

// Attach USB to running instance.
for _, usbDev := range runConf.USBDevice {
err = d.deviceAttachUSB(usbDev)
if err != nil {
return nil, err
}
}

// Attach PCI to running instance.
if len(runConf.PCIDevice) > 0 {
err = d.deviceAttachPCI(dev.Name(), configCopy, runConf.PCIDevice)
if err != nil {
return nil, err
}
}

// If running, run post start hooks now (if not, they will be run
// once the instance is started).
err = d.runHooks(runConf.PostHooks)
Expand Down Expand Up @@ -2470,6 +2480,83 @@ func (d *qemu) deviceAttachNIC(deviceName string, configCopy map[string]string,
return nil
}

// deviceAttachPCI live attaches a generic PCI device to the instance.
func (d *qemu) deviceAttachPCI(deviceName string, configCopy map[string]string, pciConfig []deviceConfig.RunConfigItem) error {
reverter := revert.New()
defer reverter.Fail()

// Check if the agent is running.
monitor, err := qmp.Connect(d.monitorPath(), qemuSerialChardevName, d.getMonitorEventHandler())
if err != nil {
return err
}

// Get the device config.
var devName, pciSlotName, pciIOMMUGroup string
for _, pciItem := range pciConfig {
if pciItem.Key == "devName" {
devName = pciItem.Value
} else if pciItem.Key == "pciSlotName" {
pciSlotName = pciItem.Value
} else if pciItem.Key == "pciIOMMUGroup" {
pciIOMMUGroup = pciItem.Value
}
}

// PCIe and PCI require a port device name to hotplug the NIC into.
_, qemuBus, err := d.qemuArchConfig(d.architecture)
if err != nil {
return err
}

if !slices.Contains([]string{"pcie", "pci"}, qemuBus) {
return fmt.Errorf("Attempting PCI passthrough on a non-PCI system")
}

qemuDev := make(map[string]string)
escapedDeviceName := linux.PathNameEncode(devName)

// Iterate through all the instance devices in the same sorted order as is used when allocating the
// boot time devices in order to find the PCI bus slot device we would have used at boot time.
// Then attempt to use that same device, assuming it is available.
pciDevID := qemuPCIDeviceIDStart
for _, dev := range d.expandedDevices.Sorted() {
if dev.Name == deviceName {
break // Found our device.
}

pciDevID++
}

pciDeviceName := fmt.Sprintf("%s%d", busDevicePortPrefix, pciDevID)
d.logger.Debug("Using PCI bus device to hotplug NIC into", logger.Ctx{"device": deviceName, "port": pciDeviceName})

qemuDev["bus"] = pciDeviceName
qemuDev["addr"] = "00.0"
qemuDev["driver"] = "vfio-pci"
qemuDev["id"] = fmt.Sprintf("%s%s", qemuDeviceIDPrefix, escapedDeviceName)
qemuDev["host"] = pciSlotName

if d.state.OS.UnprivUser != "" {
if pciIOMMUGroup == "" {
return fmt.Errorf("No PCI IOMMU group supplied")
}

vfioGroupFile := fmt.Sprintf("/dev/vfio/%s", pciIOMMUGroup)
err := os.Chown(vfioGroupFile, int(d.state.OS.UnprivUID), -1)
if err != nil {
return fmt.Errorf("Failed to chown vfio group device %q: %w", vfioGroupFile, err)
}
}

err = monitor.AddDevice(qemuDev)
if err != nil {
return fmt.Errorf("Failed setting up device %q: %w", devName, err)
}

return nil
}

// deviceStop loads a new device and calls its Stop() function.
func (d *qemu) deviceStop(dev device.Device, instanceRunning bool, _ string) error {
configCopy := dev.Config()
Expand Down Expand Up @@ -2525,6 +2612,14 @@ func (d *qemu) deviceStop(dev device.Device, instanceRunning bool, _ string) err
}
}
}

// Detach generic PCI device from running instance.
if configCopy["type"] == "pci" {
err = d.deviceDetachPCI(dev.Name())
if err != nil {
return err
}
}
}

if runConf != nil {
Expand All @@ -2546,24 +2641,6 @@ func (d *qemu) deviceDetachNIC(deviceName string) error {
return err
}

// pciDeviceExists checks if the deviceID exists as a bridged PCI device.
pciDeviceExists := func(deviceID string) (bool, error) {
pciDevs, err := monitor.QueryPCI()
if err != nil {
return false, err
}

for _, pciDev := range pciDevs {
for _, bridgeDev := range pciDev.Bridge.Devices {
if bridgeDev.DevID == deviceID {
return true, nil
}
}
}

return false, nil
}

escapedDeviceName := linux.PathNameEncode(deviceName)
deviceID := fmt.Sprintf("%s%s", qemuDeviceIDPrefix, escapedDeviceName)
netDevID := fmt.Sprintf("%s%s", qemuNetDevIDPrefix, escapedDeviceName)
Expand All @@ -2589,7 +2666,7 @@ func (d *qemu) deviceDetachNIC(deviceName string) error {
waitDuration := time.Duration(time.Second * time.Duration(10))
waitUntil := time.Now().Add(waitDuration)
for {
devExists, err := pciDeviceExists(deviceID)
devExists, err := monitor.CheckPCIDevice(deviceID)
if err != nil {
return fmt.Errorf("Failed getting PCI devices to check for NIC detach: %w", err)
}
Expand All @@ -2610,6 +2687,54 @@ func (d *qemu) deviceDetachNIC(deviceName string) error {
return nil
}

// deviceDetachPCI detaches a generic PCI device from a running instance.
func (d *qemu) deviceDetachPCI(deviceName string) error {
// Check if the agent is running.
monitor, err := qmp.Connect(d.monitorPath(), qemuSerialChardevName, d.getMonitorEventHandler())
if err != nil {
return err
}

escapedDeviceName := linux.PathNameEncode(deviceName)
deviceID := fmt.Sprintf("%s%s", qemuDeviceIDPrefix, escapedDeviceName)

// Request removal of device.
err = monitor.RemoveDevice(deviceID)
if err != nil {
return fmt.Errorf("Failed removing PCI device: %w", err)
}

_, qemuBus, err := d.qemuArchConfig(d.architecture)
if err != nil {
return err
}

if slices.Contains([]string{"pcie", "pci"}, qemuBus) {
// Wait until the device is actually removed (or we timeout waiting).
waitDuration := time.Duration(time.Second * time.Duration(10))
waitUntil := time.Now().Add(waitDuration)
for {
devExists, err := monitor.CheckPCIDevice(deviceID)
if err != nil {
return fmt.Errorf("Failed getting PCI devices to check for detach: %w", err)
}

if !devExists {
break
}

if time.Now().After(waitUntil) {
return fmt.Errorf("Failed to detach PCI device after %v", waitDuration)
}

d.logger.Debug("Waiting for PCI device to be detached", logger.Ctx{"device": deviceName})
time.Sleep(time.Second * time.Duration(2))
}
}

return nil
}

func (d *qemu) monitorPath() string {
return filepath.Join(d.RunPath(), "qemu.monitor")
}
Expand Down
18 changes: 18 additions & 0 deletions internal/server/instance/drivers/qmp/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -1069,3 +1069,21 @@ func (m *Monitor) SetBlockThrottle(id string, bytesRead int, bytesWrite int, iop

return nil
}

// CheckPCIDevice checks if the deviceID exists as a bridged PCI device.
func (m *Monitor) CheckPCIDevice(deviceID string) (bool, error) {
pciDevs, err := m.QueryPCI()
if err != nil {
return false, err
}

for _, pciDev := range pciDevs {
for _, bridgeDev := range pciDev.Bridge.Devices {
if bridgeDev.DevID == deviceID {
return true, nil
}
}
}

return false, nil
}
Loading