From 39ce89d080ec4adf484a018f57a9815bab0d6df4 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Mon, 2 Oct 2023 11:39:13 -0400 Subject: [PATCH 1/3] virtual: get real PCI address for each device found We can't rely on the PCI address from the metadata so we will lookup the real PCI address for the NIC that matches the MAC address. Libvirt/QEMU cannot guarantee that the address specified in the XML will match the address seen by the guest. This is a well known limitation: https://libvirt.org/pci-addresses.html When using the q35 machine type, it highlights this issue due to the change from using PCI to PCI-E bus for virtual devices. With that said, the PCI value in Nova Metadata is a best effort hint due to the limitations mentioned above. Therefore we will lookup the real PCI address for the NIC that matches the MAC address. --- pkg/utils/utils_virtual.go | 61 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/pkg/utils/utils_virtual.go b/pkg/utils/utils_virtual.go index 193e67d53..b6ea15a97 100644 --- a/pkg/utils/utils_virtual.go +++ b/pkg/utils/utils_virtual.go @@ -8,10 +8,12 @@ import ( "os" "path/filepath" "strconv" + "strings" "github.com/golang/glog" "github.com/hashicorp/go-retryablehttp" "github.com/jaypipes/ghw" + "github.com/jaypipes/ghw/pkg/net" dputils "github.com/k8snetworkplumbingwg/sriov-network-device-plugin/pkg/utils" @@ -53,12 +55,15 @@ const ( ospMetaDataBaseURL = "http://169.254.169.254/openstack/2018-08-27" ospHostNetworkDataFile = ospHostMetaDataDir + "/network_data.json" ospHostMetaDataFile = ospHostMetaDataDir + "/meta_data.json" - ospNetworkDataFile = ospMetaDataDir + "/network_data.json" - ospMetaDataFile = ospMetaDataDir + "/meta_data.json" ospNetworkDataURL = ospMetaDataBaseURL + "/network_data.json" ospMetaDataURL = ospMetaDataBaseURL + "/meta_data.json" ) +var ( + ospNetworkDataFile = ospMetaDataDir + "/network_data.json" + ospMetaDataFile = ospMetaDataDir + "/meta_data.json" +) + // OSPMetaDataDevice -- Device structure within meta_data.json type OSPMetaDataDevice struct { Vlan int `json:"vlan,omitempty"` @@ -117,7 +122,39 @@ func GetOpenstackData(useHostPath bool) (metaData *OSPMetaData, networkData *OSP metaData, networkData, err = getOpenstackDataFromConfigDrive(useHostPath) if err != nil { metaData, networkData, err = getOpenstackDataFromMetadataService() + if err != nil { + return metaData, networkData, fmt.Errorf("GetOpenStackData(): error getting OpenStack data: %w", err) + } } + + // We can't rely on the PCI address from the metadata so we will lookup the real PCI address + // for the NIC that matches the MAC address. + // + // Libvirt/QEMU cannot guarantee that the address specified in the XML will match the address seen by the guest. + // This is a well known limitation: https://libvirt.org/pci-addresses.html + // When using the q35 machine type, it highlights this issue due to the change from using PCI to PCI-E bus for virtual devices. + // + // With that said, the PCI value in Nova Metadata is a best effort hint due to the limitations mentioned above. Therefore + // we will lookup the real PCI address for the NIC that matches the MAC address. + netInfo, err := ghw.Network() + if err != nil { + return metaData, networkData, fmt.Errorf("GetOpenStackData(): error getting network info: %w", err) + } + for i, device := range metaData.Devices { + realPCIAddr, err := getPCIAddressFromMACAddress(device.Mac, netInfo.NICs) + if err != nil { + // If we can't find the PCI address, we will just print a warning, return the data as is with no error. + // In the future, we'll want to drain the node if sno-initial-node-state.json doesn't exist when daemon is restarted and when we have SR-IOV + // allocated devices already. + glog.Warningf("GetOpenstackData(): error getting PCI address for device %s: %v", device.Mac, err) + return metaData, networkData, nil + } + if realPCIAddr != device.Address { + glog.V(2).Infof("GetOpenstackData(): PCI address for device %s does not match Nova metadata value %s, it'll be overwritten with %s", device.Mac, device.Address, realPCIAddr) + metaData.Devices[i].Address = realPCIAddr + } + } + return metaData, networkData, err } @@ -205,6 +242,26 @@ func getOpenstackDataFromMetadataService() (metaData *OSPMetaData, networkData * return metaData, networkData, nil } +// getPCIAddressFromMACAddress returns the PCI address of a device given its MAC address +func getPCIAddressFromMACAddress(macAddress string, nics []*net.NIC) (string, error) { + var pciAddress string + for _, nic := range nics { + if strings.EqualFold(nic.MacAddress, macAddress) { + if pciAddress == "" { + pciAddress = *nic.PCIAddress + } else { + return "", fmt.Errorf("more than one device found with MAC address %s is unsupported", macAddress) + } + } + } + + if pciAddress != "" { + return pciAddress, nil + } + + return "", fmt.Errorf("no device found with MAC address %s", macAddress) +} + // CreateOpenstackDevicesInfo create the openstack device info map func CreateOpenstackDevicesInfo(metaData *OSPMetaData, networkData *OSPNetworkData) (OSPDevicesInfo, error) { glog.Infof("CreateOpenstackDevicesInfo()") From 55baf5a3cb9d5638bd07dfcc34f6a95432dc0aef Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Wed, 18 Oct 2023 10:33:51 -0400 Subject: [PATCH 2/3] utils_virtual: unit tests Co-Authored-By: Andrea Panattoni Co-Authored-By: Emilien Macchi --- pkg/utils/testdata/meta_data.json | 25 ++++++++++ pkg/utils/testdata/network_data.json | 71 ++++++++++++++++++++++++++++ pkg/utils/utils_virtual_test.go | 58 +++++++++++++++++++++++ 3 files changed, 154 insertions(+) create mode 100644 pkg/utils/testdata/meta_data.json create mode 100644 pkg/utils/testdata/network_data.json create mode 100644 pkg/utils/utils_virtual_test.go diff --git a/pkg/utils/testdata/meta_data.json b/pkg/utils/testdata/meta_data.json new file mode 100644 index 000000000..684e1ca78 --- /dev/null +++ b/pkg/utils/testdata/meta_data.json @@ -0,0 +1,25 @@ +{ + "uuid": "7114a452-2e00-4ade-856a-42d4dc7c894f", + "name": "1etsl9fpzrhocpnfv-7bhdj-worker-0-fnz68", + "launch_index": 0, + "availability_zone": "worker", + "project_id": "00552bf9217648d7a5714fbd25f92df2", + "devices": [ + { + "vlan": 177, + "vf_trusted": true, + "type": "nic", + "mac": "fa:16:3e:00:00:00", + "bus": "pci", + "address": "0000:04:00.0" + }, + { + "vlan": 178, + "vf_trusted": true, + "type": "nic", + "mac": "fa:16:3e:11:11:11", + "bus": "pci", + "address": "0000:05:00.0" + } + ] +} diff --git a/pkg/utils/testdata/network_data.json b/pkg/utils/testdata/network_data.json new file mode 100644 index 000000000..738fda734 --- /dev/null +++ b/pkg/utils/testdata/network_data.json @@ -0,0 +1,71 @@ +{ + "links": [ + { + "id": "tapa046a03c-49", + "vif_id": "a046a03c-4996-4212-8f23-3b8f197b03b3", + "type": "ovs", + "mtu": 1500, + "ethernet_mac_address": "fa:16:3e:9c:9f:89" + }, + { + "id": "tapdc2a9cd3-f7", + "vif_id": "dc2a9cd3-f7f5-40df-9d52-328f86e6011b", + "type": "hw_veb", + "mtu": 9000, + "ethernet_mac_address": "fa:16:3e:00:00:00" + }, + { + "id": "tapce5054e4-c6", + "vif_id": "ce5054e4-c65a-4c28-843c-155ab8fed825", + "type": "hw_veb", + "mtu": 9000, + "ethernet_mac_address": "fa:16:3e:11:11:11" + } + ], + "networks": [ + { + "id": "network0", + "type": "ipv4_dhcp", + "link": "tapa046a03c-49", + "network_id": "5765e37b-0a13-49d2-a598-537178ce254f" + }, + { + "id": "network1", + "type": "ipv4", + "link": "tapdc2a9cd3-f7", + "ip_address": "192.168.177.4", + "netmask": "255.255.255.0", + "routes": [ + { + "network": "0.0.0.0", + "netmask": "0.0.0.0", + "gateway": "192.168.177.1" + } + ], + "network_id": "b3ba899a-e06c-49da-93c5-c992048390b2", + "services": [] + }, + { + "id": "network2", + "type": "ipv4", + "link": "tapce5054e4-c6", + "ip_address": "192.168.178.107", + "netmask": "255.255.255.0", + "routes": [ + { + "network": "0.0.0.0", + "netmask": "0.0.0.0", + "gateway": "192.168.178.1" + } + ], + "network_id": "a81317cb-aa3d-4675-99cf-aa049f964a3c", + "services": [] + } + ], + "services": [ + { + "type": "dns", + "address": "10.1.2.3" + } + ] +} diff --git a/pkg/utils/utils_virtual_test.go b/pkg/utils/utils_virtual_test.go new file mode 100644 index 000000000..6890d7304 --- /dev/null +++ b/pkg/utils/utils_virtual_test.go @@ -0,0 +1,58 @@ +package utils + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/jaypipes/ghw" + "github.com/jaypipes/ghw/pkg/net" + "github.com/jaypipes/ghw/pkg/option" + "k8s.io/utils/pointer" +) + +func TestUtils(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Utils") +} + +var _ = Describe("Virtual", func() { + + Context("GetOpenstackData", func() { + It("PCI address replacement based on MAC address", func() { + ospNetworkDataFile = "./testdata/network_data.json" + ospMetaDataFile = "./testdata/meta_data.json" + DeferCleanup(func() { + ospNetworkDataFile = ospMetaDataDir + "/network_data.json" + ospMetaDataFile = ospMetaDataDir + "/meta_data.json" + }) + + ghw.Network = func(opts ...*option.Option) (*net.Info, error) { + return &net.Info{ + NICs: []*net.NIC{{ + MacAddress: "fa:16:3e:00:00:00", + PCIAddress: pointer.String("0000:04:00.0"), + }, { + MacAddress: "fa:16:3e:11:11:11", + PCIAddress: pointer.String("0000:99:99.9"), + }}, + }, nil + } + + DeferCleanup(func() { + ghw.Network = net.New + }) + + metaData, _, err := GetOpenstackData(false) + Expect(err).ToNot(HaveOccurred()) + + Expect(metaData.Devices).To(HaveLen(2)) + Expect(metaData.Devices[0].Mac).To(Equal("fa:16:3e:00:00:00")) + Expect(metaData.Devices[0].Address).To(Equal("0000:04:00.0")) + Expect(metaData.Devices[1].Mac).To(Equal("fa:16:3e:11:11:11")) + Expect(metaData.Devices[1].Address).To(Equal("0000:99:99.9")) + + }) + }) +}) From 2a1bbec5485746eac06382c50af43cd92c7cc061 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Thu, 5 Oct 2023 16:01:10 -0400 Subject: [PATCH 3/3] Add Virtio 1.0 network device as supported https://devicehunt.com/view/type/pci/vendor/1AF4/device/1041 https://qemu.readthedocs.io/en/master/specs/pci-ids.html Modern QEMU brings up virtio 1.0 network device, we need to support that for DPDK. --- deploy/configmap.yaml | 1 + deployment/sriov-network-operator/templates/configmap.yaml | 1 + doc/supported-hardware.md | 2 ++ pkg/daemon/daemon.go | 2 +- 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index 3de5f749f..b21b07ea6 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -32,6 +32,7 @@ data: Broadcom_bnxt_BCM75508_2x100G: "14e4 1750 1806" Qlogic_qede_QL45000_50G: "1077 1654 1664" Red_Hat_Virtio_network_device: "1af4 1000 1000" + Red_Hat_Virtio_1_0_network_device: "1af4 1041 1041" Marvell_OCTEON_TX2_CN96XX: "177d b200 b203" Marvell_OCTEON_TX2_CN98XX: "177d b100 b103" Marvell_OCTEON_Fusion_CNF95XX: "177d b600 b603" diff --git a/deployment/sriov-network-operator/templates/configmap.yaml b/deployment/sriov-network-operator/templates/configmap.yaml index 3de5f749f..b21b07ea6 100644 --- a/deployment/sriov-network-operator/templates/configmap.yaml +++ b/deployment/sriov-network-operator/templates/configmap.yaml @@ -32,6 +32,7 @@ data: Broadcom_bnxt_BCM75508_2x100G: "14e4 1750 1806" Qlogic_qede_QL45000_50G: "1077 1654 1664" Red_Hat_Virtio_network_device: "1af4 1000 1000" + Red_Hat_Virtio_1_0_network_device: "1af4 1041 1041" Marvell_OCTEON_TX2_CN96XX: "177d b200 b203" Marvell_OCTEON_TX2_CN98XX: "177d b100 b103" Marvell_OCTEON_Fusion_CNF95XX: "177d b600 b603" diff --git a/doc/supported-hardware.md b/doc/supported-hardware.md index fde3662d7..446190905 100644 --- a/doc/supported-hardware.md +++ b/doc/supported-hardware.md @@ -70,6 +70,8 @@ The following table depicts the supported SR-IOV hardware features of each suppo | Marvell OCTEON Fusion CNF95XX | V | V | X | | Marvell OCTEON 10 CN10XXX | V | V | X | | Marvell OCTEON Fusion CNF105XX | V | V | X | +| Red_Hat_Virtio_network_device | X | V | X | +| Red_Hat_Virtio_1_0_network_device | X | V | X | # Adding new Hardware diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 5feb37e12..207685180 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -1138,7 +1138,7 @@ func (dn *Daemon) prepareNMUdevRule() error { // we should not destroy the cluster if the operator is installed there supportedVfIds := []string{} for _, vfID := range sriovnetworkv1.GetSupportedVfIds() { - if vfID == "0x1000" { + if vfID == "0x1000" || vfID == "0x1041" { continue } supportedVfIds = append(supportedVfIds, vfID)