diff --git a/README.md b/README.md index f509ec182..50d4cb88c 100644 --- a/README.md +++ b/README.md @@ -121,9 +121,9 @@ metadata: spec: interfaces: - deviceType: vfio-pci - mtu: 1500 - numVfs: 4 - pciAddress: 0000:86:00.0 + mtu: 1500 + numVfs: 4 + pciAddress: 0000:86:00.0 status: interfaces: - deviceID: "1583" @@ -198,6 +198,19 @@ In a virtual deployment: - The numVfs parameter has no effect as there is always 1 VF - The deviceType field depends upon whether the underlying device/driver is [native-bifurcating or non-bifurcating](https://doc.dpdk.org/guides/howto/flow_bifurcation.html) For example, the supported Mellanox devices support native-bifurcating drivers and therefore deviceType should be netdevice (default). The support Intel devices are non-bifurcating and should be set to vfio-pci. +#### Multiple policies + +When multiple SriovNetworkNodeConfigPolicy CRs are present, the `priority` field +(0 is the highest priority) is used to resolve any conflicts. Conflicts occur +only when same PF is referenced by multiple policies. The final desired +configuration is saved in `SriovNetworkNodeState.spec.interfaces`. + +Policies processing order is based on priority (lowest first), followed by `name` +field (starting from `a`). Policies with same **priority** or **non-overlapping +VF groups** (when #-notation is used in pfName field) are merged, otherwise only +the highest priority policy is applied. In case of same-priority policies and +overlapping VF groups, only the last processed policy is applied. + ## Components and design This operator is split into 2 components: diff --git a/api/v1/helper.go b/api/v1/helper.go index 3dee6681d..965e2d69a 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -252,12 +252,12 @@ func UniqueAppend(inSlice []string, strings ...string) []string { } // Apply policy to SriovNetworkNodeState CR -func (p *SriovNetworkNodePolicy) Apply(state *SriovNetworkNodeState, merge bool) { +func (p *SriovNetworkNodePolicy) Apply(state *SriovNetworkNodeState, equalPriority bool) error { s := p.Spec.NicSelector if s.Vendor == "" && s.DeviceID == "" && len(s.RootDevices) == 0 && len(s.PfNames) == 0 && len(s.NetFilter) == 0 { // Empty NicSelector match none - return + return nil } for _, iface := range state.Status.Interfaces { if s.Selected(&iface) { @@ -268,55 +268,76 @@ func (p *SriovNetworkNodePolicy) Apply(state *SriovNetworkNodeState, merge bool) Name: iface.Name, LinkType: p.Spec.LinkType, EswitchMode: p.Spec.EswitchMode, + NumVfs: p.Spec.NumVfs, } - var group *VfGroup if p.Spec.NumVfs > 0 { - result.NumVfs = p.Spec.NumVfs - group, _ = p.generateVfGroup(&iface) + group, err := p.generateVfGroup(&iface) + if err != nil { + return err + } + result.VfGroups = []VfGroup{*group} found := false for i := range state.Spec.Interfaces { if state.Spec.Interfaces[i].PciAddress == result.PciAddress { found = true - // merge PF configurations when: - // 1. SR-IOV partition is configured - // 2. SR-IOV partition policies have the same priority - result = state.Spec.Interfaces[i].mergePfConfigs(result, merge) - result.VfGroups = state.Spec.Interfaces[i].mergeVfGroups(group) + state.Spec.Interfaces[i].mergeConfigs(&result, equalPriority) state.Spec.Interfaces[i] = result break } } if !found { - result.VfGroups = []VfGroup{*group} state.Spec.Interfaces = append(state.Spec.Interfaces, result) } } } } + return nil } -func (iface Interface) mergePfConfigs(input Interface, merge bool) Interface { - if merge { - if input.Mtu < iface.Mtu { - input.Mtu = iface.Mtu - } - if input.NumVfs < iface.NumVfs { - input.NumVfs = iface.NumVfs +// mergeConfigs merges configs from multiple polices where the last one has the +// highest priority. This merge is dependent on: 1. SR-IOV partition is +// configured with the #-notation in pfName, 2. The VF groups are +// non-overlapping or SR-IOV policies have the same priority. +func (iface Interface) mergeConfigs(input *Interface, equalPriority bool) { + m := false + // merge VF groups (input.VfGroups already contains the highest priority): + // - skip group with same ResourceName, + // - skip overlapping groups (use only highest priority) + for _, gr := range iface.VfGroups { + if gr.ResourceName == input.VfGroups[0].ResourceName || gr.isVFRangeOverlapping(input.VfGroups[0]) { + continue } + m = true + input.VfGroups = append(input.VfGroups, gr) + } + + if !equalPriority && !m { + return + } + + // mtu configuration we take the highest value + if input.Mtu < iface.Mtu { + input.Mtu = iface.Mtu + } + if input.NumVfs < iface.NumVfs { + input.NumVfs = iface.NumVfs } - return input } -func (iface Interface) mergeVfGroups(input *VfGroup) []VfGroup { - groups := iface.VfGroups - for i := range groups { - if groups[i].ResourceName == input.ResourceName { - groups[i] = *input - return groups - } +func (gr VfGroup) isVFRangeOverlapping(group VfGroup) bool { + rngSt, rngEnd, err := parseRange(gr.VfRange) + if err != nil { + return false + } + rngSt2, rngEnd2, err := parseRange(group.VfRange) + if err != nil { + return false + } + // compare minimal range has overlap + if rngSt < rngSt2 { + return IndexInRange(rngSt2, gr.VfRange) || IndexInRange(rngEnd2, gr.VfRange) } - groups = append(groups, *input) - return groups + return IndexInRange(rngSt, group.VfRange) || IndexInRange(rngEnd, group.VfRange) } func (p *SriovNetworkNodePolicy) generateVfGroup(iface *InterfaceExt) (*VfGroup, error) { @@ -327,6 +348,7 @@ func (p *SriovNetworkNodePolicy) generateVfGroup(iface *InterfaceExt) (*VfGroup, for _, selector := range p.Spec.NicSelector.PfNames { pfName, rngStart, rngEnd, err = ParsePFName(selector) if err != nil { + log.Error(err, "Unable to parse PF Name.") return nil, err } if pfName == iface.Name { diff --git a/api/v1/helper_test.go b/api/v1/helper_test.go index a8dc9b0ed..2ea80d7ab 100644 --- a/api/v1/helper_test.go +++ b/api/v1/helper_test.go @@ -9,7 +9,10 @@ import ( "path/filepath" "testing" + "github.com/google/go-cmp/cmp" + v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) var update = flag.Bool("updategolden", false, "update .golden files") @@ -19,6 +22,77 @@ func init() { v1.MANIFESTS_PATH = "../../bindata/manifests/cni-config" } +func newNodeState() *v1.SriovNetworkNodeState { + return &v1.SriovNetworkNodeState{ + Spec: v1.SriovNetworkNodeStateSpec{}, + Status: v1.SriovNetworkNodeStateStatus{ + Interfaces: []v1.InterfaceExt{ + { + VFs: []v1.VirtualFunction{ + {}, + }, + DeviceID: "158b", + Driver: "i40e", + Mtu: 1500, + Name: "ens803f0", + PciAddress: "0000:86:00.0", + Vendor: "8086", + NumVfs: 4, + TotalVfs: 64, + }, + { + VFs: []v1.VirtualFunction{ + {}, + }, + DeviceID: "158b", + Driver: "i40e", + Mtu: 1500, + Name: "ens803f1", + PciAddress: "0000:86:00.1", + Vendor: "8086", + NumVfs: 4, + TotalVfs: 64, + }, + { + VFs: []v1.VirtualFunction{ + {}, + }, + DeviceID: "1015", + Driver: "i40e", + Mtu: 1500, + Name: "ens803f2", + PciAddress: "0000:86:00.2", + Vendor: "8086", + NumVfs: 4, + TotalVfs: 64, + }, + }, + }, + } +} + +func newNodePolicy() *v1.SriovNetworkNodePolicy { + return &v1.SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: v1.SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: v1.SriovNetworkNicSelector{ + PfNames: []string{"ens803f1"}, + RootDevices: []string{"0000:86:00.1"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 2, + Priority: 99, + ResourceName: "p1res", + }, + } +} + func TestRendering(t *testing.T) { testtable := []struct { tname string @@ -126,3 +200,415 @@ func TestIBRendering(t *testing.T) { }) } } + +func TestSriovNetworkNodePolicyApply(t *testing.T) { + testtable := []struct { + tname string + currentState *v1.SriovNetworkNodeState + policy *v1.SriovNetworkNodePolicy + expectedInterfaces v1.Interfaces + equalP bool + expectedErr bool + }{ + { + tname: "starting config", + currentState: newNodeState(), + policy: newNodePolicy(), + equalP: false, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 2, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + }, + }, + }, + }, + { + tname: "one policy present different pf", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f0", + NumVfs: 2, + PciAddress: "0000:86:00.0", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "prevres", + VfRange: "0-1", + PolicyName: "p2", + }, + }, + }, + } + return st + }(), + policy: newNodePolicy(), + equalP: false, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f0", + NumVfs: 2, + PciAddress: "0000:86:00.0", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "prevres", + VfRange: "0-1", + PolicyName: "p2", + }, + }, + }, + { + Name: "ens803f1", + NumVfs: 2, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + }, + }, + }, + }, + { + // policy overwrites (applied last has higher priority) what is inside the + // SriovNetworkNodeState + tname: "one policy present same pf different priority", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 3, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "vfio-pci", + ResourceName: "vfiores", + VfRange: "0-1", + PolicyName: "p2", + }, + }, + }, + } + return st + }(), + policy: newNodePolicy(), + equalP: false, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 2, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + }, + }, + }, + }, + { + // policy with same priority, but there is VfRange overlap so + // only the last applied stays, NumVfs and MTU is still merged + tname: "one policy present same pf same priority", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 3, + PciAddress: "0000:86:00.1", + Mtu: 2000, + VfGroups: []v1.VfGroup{ + { + DeviceType: "vfio-pci", + ResourceName: "vfiores", + VfRange: "0-1", + PolicyName: "p2", + }, + }, + }, + } + return st + }(), + policy: newNodePolicy(), + equalP: true, + expectedInterfaces: []v1.Interface{ + { + Mtu: 2000, + Name: "ens803f1", + NumVfs: 3, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + }, + }, + }, + }, + { + // policy with same priority, VfRange's do not overlap so all is merged + tname: "one policy present same pf same priority partitioning", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 5, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "vfio-pci", + ResourceName: "vfiores", + VfRange: "2-4", + PolicyName: "p2", + }, + }, + }, + } + return st + }(), + policy: newNodePolicy(), + equalP: true, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 5, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + { + DeviceType: "vfio-pci", + ResourceName: "vfiores", + VfRange: "2-4", + PolicyName: "p2", + }, + }, + }, + }, + }, + { + // policy with same priority that overwrites the 2 present groups in + // SriovNetworkNodeState because they overlap VfRange + tname: "two policy present same pf same priority overlap", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 2, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "vfio-pci", + ResourceName: "vfiores1", + VfRange: "0-0", + PolicyName: "p2", + }, + { + DeviceType: "vfio-pci", + ResourceName: "vfiores2", + VfRange: "1-1", + PolicyName: "p3", + }, + }, + }, + } + return st + }(), + policy: newNodePolicy(), + equalP: true, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 2, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + }, + }, + }, + }, + { + // policy with same priority that overwrites the present group in + // SriovNetworkNodeState because of same ResourceName + tname: "one policy present same pf same priority same ResourceName", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 4, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "vfio-pci", + ResourceName: "p1res", + VfRange: "2-3", + PolicyName: "p2", + }, + }, + }, + } + return st + }(), + policy: newNodePolicy(), + equalP: true, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 4, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + }, + }, + }, + }, + { + // policy with diff priority that have non-overlapping VF groups will be + // merged + tname: "one policy present same pf diff priority no overlap VFs", + currentState: func() *v1.SriovNetworkNodeState { + st := newNodeState() + st.Spec.Interfaces = []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 4, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "vfio-pci", + ResourceName: "p2res", + VfRange: "2-3", + PolicyName: "p2", + }, + }, + }, + } + return st + }(), + policy: newNodePolicy(), + equalP: false, + expectedInterfaces: []v1.Interface{ + { + Name: "ens803f1", + NumVfs: 4, + PciAddress: "0000:86:00.1", + VfGroups: []v1.VfGroup{ + { + DeviceType: "netdevice", + ResourceName: "p1res", + VfRange: "0-1", + PolicyName: "p1", + }, + { + DeviceType: "vfio-pci", + ResourceName: "p2res", + VfRange: "2-3", + PolicyName: "p2", + }, + }, + }, + }, + }, + { + tname: "no selectors", + currentState: newNodeState(), + policy: &v1.SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: v1.SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: v1.SriovNetworkNicSelector{ + PfNames: []string{}, + RootDevices: []string{}, + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 2, + Priority: 99, + ResourceName: "p1res", + }, + }, + equalP: false, + expectedInterfaces: nil, + }, + { + tname: "bad pf partition", + currentState: newNodeState(), + policy: &v1.SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: v1.SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: v1.SriovNetworkNicSelector{ + PfNames: []string{"ens803f0#a-c"}, + RootDevices: []string{}, + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 2, + Priority: 99, + ResourceName: "p1res", + }, + }, + equalP: false, + expectedInterfaces: nil, + expectedErr: true, + }, + } + for _, tc := range testtable { + t.Run(tc.tname, func(t *testing.T) { + err := tc.policy.Apply(tc.currentState, tc.equalP) + if tc.expectedErr && err == nil { + t.Errorf("Apply expecting error.") + } else if !tc.expectedErr && err != nil { + t.Errorf("Apply error:\n%s", err) + } + if diff := cmp.Diff(tc.expectedInterfaces, tc.currentState.Spec.Interfaces); diff != "" { + t.Errorf("SriovNetworkNodeState spec diff (-want +got):\n%s", diff) + } + }) + } +} diff --git a/controllers/sriovnetworknodepolicy_controller.go b/controllers/sriovnetworknodepolicy_controller.go index 0bc4cead7..39571c0a6 100644 --- a/controllers/sriovnetworknodepolicy_controller.go +++ b/controllers/sriovnetworknodepolicy_controller.go @@ -299,7 +299,10 @@ func (r *SriovNetworkNodePolicyReconciler) syncSriovNetworkNodeState(np *sriovne // Merging only for policies with the same priority (ppp == p.Spec.Priority) // This boolean flag controls merging of PF configuration (e.g. mtu, numvfs etc) // when VF partition is configured. - p.Apply(newVersion, bool(ppp == p.Spec.Priority)) + err = p.Apply(newVersion, ppp == p.Spec.Priority) + if err != nil { + return err + } // record the evaluated policy priority for next loop ppp = p.Spec.Priority }