From e3a564c6eab05691b631587adaf6e5f7a1098dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Mon, 12 Aug 2024 01:39:09 -0400 Subject: [PATCH 1/5] incusd/device/pci: Allow hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber --- internal/server/device/pci.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/internal/server/device/pci.go b/internal/server/device/pci.go index a18b5df2a0c..2fb90d4eff0 100644 --- a/internal/server/device/pci.go +++ b/internal/server/device/pci.go @@ -73,6 +73,11 @@ func (d *pci) Start() (*deviceConfig.RunConfig, error) { saveData["last_state.pci.slot.name"] = pciDev.SlotName saveData["last_state.pci.driver"] = pciDev.Driver + pciIOMMUGroup, err := pcidev.DeviceIOMMUGroup(saveData["last_state.pci.slot.name"]) + if err != nil { + return nil, err + } + err = pcidev.DeviceDriverOverride(pciDev, "vfio-pci") if err != nil { return nil, fmt.Errorf("Failed to override IOMMU group driver: %w", err) @@ -82,6 +87,7 @@ func (d *pci) Start() (*deviceConfig.RunConfig, error) { []deviceConfig.RunConfigItem{ {Key: "devName", Value: d.name}, {Key: "pciSlotName", Value: saveData["last_state.pci.slot.name"]}, + {Key: "pciIOMMUGroup", Value: fmt.Sprintf("%d", pciIOMMUGroup)}, }...) err = d.volatileSet(saveData) @@ -92,6 +98,11 @@ func (d *pci) Start() (*deviceConfig.RunConfig, error) { return &runConf, nil } +// CanHotPlug returns whether the device can be managed whilst the instance is running. +func (d *pci) CanHotPlug() bool { + return true +} + // Stop is run when the device is removed from the instance. func (d *pci) Stop() (*deviceConfig.RunConfig, error) { runConf := deviceConfig.RunConfig{ From bed8715809342875bde90c891f3b65cb85302ec1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Mon, 12 Aug 2024 02:11:39 -0400 Subject: [PATCH 2/5] incusd/instance/qmp: Add CheckPCIDevice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber --- .../server/instance/drivers/qmp/commands.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/internal/server/instance/drivers/qmp/commands.go b/internal/server/instance/drivers/qmp/commands.go index 2ca6ef0b488..53a79903797 100644 --- a/internal/server/instance/drivers/qmp/commands.go +++ b/internal/server/instance/drivers/qmp/commands.go @@ -1069,3 +1069,21 @@ func (m *Monitor) SetBlockThrottle(id string, bytesRead int, bytesWrite int, iop return nil } + +// CheckPCIDevice checks if the deviceID exists as a bridged PCI device. +func (m *Monitor) CheckPCIDevice(deviceID string) (bool, error) { + pciDevs, err := m.QueryPCI() + if err != nil { + return false, err + } + + for _, pciDev := range pciDevs { + for _, bridgeDev := range pciDev.Bridge.Devices { + if bridgeDev.DevID == deviceID { + return true, nil + } + } + } + + return false, nil +} From 55173b2f7ef92aa92036e5d4b98c2499d02b0c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Mon, 12 Aug 2024 02:12:55 -0400 Subject: [PATCH 3/5] incusd/instance/qemu: Use monitor.CheckPCIDevice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber --- .../server/instance/drivers/driver_qemu.go | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/internal/server/instance/drivers/driver_qemu.go b/internal/server/instance/drivers/driver_qemu.go index 77a807a3dc1..07a1bbe10e0 100644 --- a/internal/server/instance/drivers/driver_qemu.go +++ b/internal/server/instance/drivers/driver_qemu.go @@ -2546,24 +2546,6 @@ func (d *qemu) deviceDetachNIC(deviceName string) error { return err } - // pciDeviceExists checks if the deviceID exists as a bridged PCI device. - pciDeviceExists := func(deviceID string) (bool, error) { - pciDevs, err := monitor.QueryPCI() - if err != nil { - return false, err - } - - for _, pciDev := range pciDevs { - for _, bridgeDev := range pciDev.Bridge.Devices { - if bridgeDev.DevID == deviceID { - return true, nil - } - } - } - - return false, nil - } - escapedDeviceName := linux.PathNameEncode(deviceName) deviceID := fmt.Sprintf("%s%s", qemuDeviceIDPrefix, escapedDeviceName) netDevID := fmt.Sprintf("%s%s", qemuNetDevIDPrefix, escapedDeviceName) @@ -2589,7 +2571,7 @@ func (d *qemu) deviceDetachNIC(deviceName string) error { waitDuration := time.Duration(time.Second * time.Duration(10)) waitUntil := time.Now().Add(waitDuration) for { - devExists, err := pciDeviceExists(deviceID) + devExists, err := monitor.CheckPCIDevice(deviceID) if err != nil { return fmt.Errorf("Failed getting PCI devices to check for NIC detach: %w", err) } From fbc727afa4d8dace8639aafdb444d9f1d0c6ed9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Mon, 12 Aug 2024 02:40:32 -0400 Subject: [PATCH 4/5] incusd/instance/qemu: Tweak comments on deviceStart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber --- internal/server/instance/drivers/driver_qemu.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/server/instance/drivers/driver_qemu.go b/internal/server/instance/drivers/driver_qemu.go index 07a1bbe10e0..1f548d63d37 100644 --- a/internal/server/instance/drivers/driver_qemu.go +++ b/internal/server/instance/drivers/driver_qemu.go @@ -2168,7 +2168,7 @@ func (d *qemu) deviceStart(dev device.Device, instanceRunning bool) (*deviceConf if runConf != nil { // If instance is running and then live attach device. if instanceRunning { - // Attach network interface if requested. + // Attach NIC to running instance. if len(runConf.NetworkInterface) > 0 { err = d.deviceAttachNIC(dev.Name(), configCopy, runConf.NetworkInterface) if err != nil { @@ -2176,6 +2176,7 @@ func (d *qemu) deviceStart(dev device.Device, instanceRunning bool) (*deviceConf } } + // Attach disk to running instance. for _, mount := range runConf.Mounts { if mount.FSType == "9p" { err = d.deviceAttachPath(dev.Name(), configCopy, mount) @@ -2190,6 +2191,7 @@ func (d *qemu) deviceStart(dev device.Device, instanceRunning bool) (*deviceConf } } + // Attach USB to running instance. for _, usbDev := range runConf.USBDevice { err = d.deviceAttachUSB(usbDev) if err != nil { From 67daa5290277c6f4981e3b9d2b4ad9e2dc190534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= Date: Mon, 12 Aug 2024 02:41:13 -0400 Subject: [PATCH 5/5] incusd/instance/qemu: Add hotplug support for generic PCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber --- .../server/instance/drivers/driver_qemu.go | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/internal/server/instance/drivers/driver_qemu.go b/internal/server/instance/drivers/driver_qemu.go index 1f548d63d37..b757a7ef835 100644 --- a/internal/server/instance/drivers/driver_qemu.go +++ b/internal/server/instance/drivers/driver_qemu.go @@ -2199,6 +2199,14 @@ func (d *qemu) deviceStart(dev device.Device, instanceRunning bool) (*deviceConf } } + // Attach PCI to running instance. + if len(runConf.PCIDevice) > 0 { + err = d.deviceAttachPCI(dev.Name(), configCopy, runConf.PCIDevice) + if err != nil { + return nil, err + } + } + // If running, run post start hooks now (if not, they will be run // once the instance is started). err = d.runHooks(runConf.PostHooks) @@ -2472,6 +2480,83 @@ func (d *qemu) deviceAttachNIC(deviceName string, configCopy map[string]string, return nil } +// deviceAttachPCI live attaches a generic PCI device to the instance. +func (d *qemu) deviceAttachPCI(deviceName string, configCopy map[string]string, pciConfig []deviceConfig.RunConfigItem) error { + reverter := revert.New() + defer reverter.Fail() + + // Check if the agent is running. + monitor, err := qmp.Connect(d.monitorPath(), qemuSerialChardevName, d.getMonitorEventHandler()) + if err != nil { + return err + } + + // Get the device config. + var devName, pciSlotName, pciIOMMUGroup string + for _, pciItem := range pciConfig { + if pciItem.Key == "devName" { + devName = pciItem.Value + } else if pciItem.Key == "pciSlotName" { + pciSlotName = pciItem.Value + } else if pciItem.Key == "pciIOMMUGroup" { + pciIOMMUGroup = pciItem.Value + } + } + + // PCIe and PCI require a port device name to hotplug the NIC into. + _, qemuBus, err := d.qemuArchConfig(d.architecture) + if err != nil { + return err + } + + if !slices.Contains([]string{"pcie", "pci"}, qemuBus) { + return fmt.Errorf("Attempting PCI passthrough on a non-PCI system") + } + + qemuDev := make(map[string]string) + escapedDeviceName := linux.PathNameEncode(devName) + + // Iterate through all the instance devices in the same sorted order as is used when allocating the + // boot time devices in order to find the PCI bus slot device we would have used at boot time. + // Then attempt to use that same device, assuming it is available. + pciDevID := qemuPCIDeviceIDStart + for _, dev := range d.expandedDevices.Sorted() { + if dev.Name == deviceName { + break // Found our device. + } + + pciDevID++ + } + + pciDeviceName := fmt.Sprintf("%s%d", busDevicePortPrefix, pciDevID) + d.logger.Debug("Using PCI bus device to hotplug NIC into", logger.Ctx{"device": deviceName, "port": pciDeviceName}) + + qemuDev["bus"] = pciDeviceName + qemuDev["addr"] = "00.0" + qemuDev["driver"] = "vfio-pci" + qemuDev["id"] = fmt.Sprintf("%s%s", qemuDeviceIDPrefix, escapedDeviceName) + qemuDev["host"] = pciSlotName + + if d.state.OS.UnprivUser != "" { + if pciIOMMUGroup == "" { + return fmt.Errorf("No PCI IOMMU group supplied") + } + + vfioGroupFile := fmt.Sprintf("/dev/vfio/%s", pciIOMMUGroup) + err := os.Chown(vfioGroupFile, int(d.state.OS.UnprivUID), -1) + if err != nil { + return fmt.Errorf("Failed to chown vfio group device %q: %w", vfioGroupFile, err) + } + } + + err = monitor.AddDevice(qemuDev) + if err != nil { + return fmt.Errorf("Failed setting up device %q: %w", devName, err) + } + + return nil +} + // deviceStop loads a new device and calls its Stop() function. func (d *qemu) deviceStop(dev device.Device, instanceRunning bool, _ string) error { configCopy := dev.Config() @@ -2527,6 +2612,14 @@ func (d *qemu) deviceStop(dev device.Device, instanceRunning bool, _ string) err } } } + + // Detach generic PCI device from running instance. + if configCopy["type"] == "pci" { + err = d.deviceDetachPCI(dev.Name()) + if err != nil { + return err + } + } } if runConf != nil { @@ -2594,6 +2687,54 @@ func (d *qemu) deviceDetachNIC(deviceName string) error { return nil } +// deviceDetachPCI detaches a generic PCI device from a running instance. +func (d *qemu) deviceDetachPCI(deviceName string) error { + // Check if the agent is running. + monitor, err := qmp.Connect(d.monitorPath(), qemuSerialChardevName, d.getMonitorEventHandler()) + if err != nil { + return err + } + + escapedDeviceName := linux.PathNameEncode(deviceName) + deviceID := fmt.Sprintf("%s%s", qemuDeviceIDPrefix, escapedDeviceName) + + // Request removal of device. + err = monitor.RemoveDevice(deviceID) + if err != nil { + return fmt.Errorf("Failed removing PCI device: %w", err) + } + + _, qemuBus, err := d.qemuArchConfig(d.architecture) + if err != nil { + return err + } + + if slices.Contains([]string{"pcie", "pci"}, qemuBus) { + // Wait until the device is actually removed (or we timeout waiting). + waitDuration := time.Duration(time.Second * time.Duration(10)) + waitUntil := time.Now().Add(waitDuration) + for { + devExists, err := monitor.CheckPCIDevice(deviceID) + if err != nil { + return fmt.Errorf("Failed getting PCI devices to check for detach: %w", err) + } + + if !devExists { + break + } + + if time.Now().After(waitUntil) { + return fmt.Errorf("Failed to detach PCI device after %v", waitDuration) + } + + d.logger.Debug("Waiting for PCI device to be detached", logger.Ctx{"device": deviceName}) + time.Sleep(time.Second * time.Duration(2)) + } + } + + return nil +} + func (d *qemu) monitorPath() string { return filepath.Join(d.RunPath(), "qemu.monitor") }