From 28a6141f396c7ed5bacb9cba9db3bb05a420c44d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Luis=20Ojosnegros=20Manch=C3=B3n?= Date: Wed, 1 Feb 2023 12:41:09 +0100 Subject: [PATCH] topology-updater:compute pod set fingerprint Add an option to compute the fingerprint of the current pod set on each node. Report this new fingerprint using an annotation in NRT object. --- cmd/nfd-topology-updater/main.go | 1 + go.mod | 2 + go.sum | 4 ++ .../nfd-topology-updater.go | 20 +++++++-- pkg/resourcemonitor/podresourcesscanner.go | 37 ++++++++++++++- .../podresourcesscanner_test.go | 45 +++++++++++++++++-- pkg/resourcemonitor/types.go | 2 + 7 files changed, 102 insertions(+), 9 deletions(-) diff --git a/cmd/nfd-topology-updater/main.go b/cmd/nfd-topology-updater/main.go index 514c985530..bc50c979a2 100644 --- a/cmd/nfd-topology-updater/main.go +++ b/cmd/nfd-topology-updater/main.go @@ -143,6 +143,7 @@ func initFlags(flagset *flag.FlagSet) (*topology.Args, *resourcemonitor.Args) { "Pod Resource Socket path to use.") flagset.StringVar(&args.ConfigFile, "config", "/etc/kubernetes/node-feature-discovery/nfd-topology-updater.conf", "Config file to use.") + flagset.BoolVar(&resourcemonitorArgs.PodSetFingerprint, "pods-fingerprint", false, "If enable, compute and report the pod set fingerprint") klog.InitFlags(flagset) diff --git a/go.mod b/go.mod index d9a714f8bc..f299f176cb 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/google/go-cmp v0.5.9 github.com/jaypipes/ghw v0.8.1-0.20210827132705-c7224150a17e github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.13 + github.com/k8stopologyawareschedwg/podfingerprint v0.1.1 github.com/klauspost/cpuid/v2 v2.2.3 github.com/onsi/ginkgo/v2 v2.4.0 github.com/onsi/gomega v1.23.0 @@ -50,6 +51,7 @@ require ( github.com/Microsoft/go-winio v0.4.17 // indirect github.com/Microsoft/hcsshim v0.8.22 // indirect github.com/NYTimes/gziphandler v1.1.1 // indirect + github.com/OneOfOne/xxhash v1.2.8 // indirect github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect github.com/antlr/antlr4/runtime/Go/antlr v1.4.10 // indirect github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e // indirect diff --git a/go.sum b/go.sum index 928bf981cc..afdbc58671 100644 --- a/go.sum +++ b/go.sum @@ -86,6 +86,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0 github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8= +github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d h1:G0m3OIz70MZUWq3EgK3CesDbo8upS2Vm9/P3FtgI+Jk= @@ -422,6 +424,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.13 h1:Y1RjPskyGMkVtNL8lq75bEdjqgq8gi+JJ1oWaz/mIJE= github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.13/go.mod h1:AkACMQGiTgCt0lQw3m7TTU8PLH9lYKNK5e9DqFf5VuM= +github.com/k8stopologyawareschedwg/podfingerprint v0.1.1 h1:uNEj+avp3yJkJMvkmk6iosibVauSo+owEKV2JyuKNsQ= +github.com/k8stopologyawareschedwg/podfingerprint v0.1.1/go.mod h1:C23pM15t06dXg/OihGlqBvnYzLr+MXDXJ7zMfbNAyXI= github.com/karrick/godirwalk v1.17.0 h1:b4kY7nqDdioR/6qnbHQyDvmA17u5G1cZ6J+CZXwSWoI= github.com/karrick/godirwalk v1.17.0/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= diff --git a/pkg/nfd-topology-updater/nfd-topology-updater.go b/pkg/nfd-topology-updater/nfd-topology-updater.go index 024918a872..03384768cd 100644 --- a/pkg/nfd-topology-updater/nfd-topology-updater.go +++ b/pkg/nfd-topology-updater/nfd-topology-updater.go @@ -114,7 +114,7 @@ func (w *nfdTopologyUpdater) Run() error { var resScan resourcemonitor.ResourcesScanner - resScan, err = resourcemonitor.NewPodResourcesScanner(w.resourcemonitorArgs.Namespace, podResClient, w.apihelper) + resScan, err = resourcemonitor.NewPodResourcesScanner(w.resourcemonitorArgs.Namespace, podResClient, w.apihelper, w.resourcemonitorArgs.PodSetFingerprint) if err != nil { return fmt.Errorf("failed to initialize ResourceMonitor instance: %w", err) } @@ -147,7 +147,7 @@ func (w *nfdTopologyUpdater) Run() error { zones = resAggr.Aggregate(scanResponse.PodResources) utils.KlogDump(1, "After aggregating resources identified zones are", " ", zones) if !w.args.NoPublish { - if err = w.updateNodeResourceTopology(zones); err != nil { + if err = w.updateNodeResourceTopology(zones, scanResponse.Annotations); err != nil { return err } } @@ -172,7 +172,7 @@ func (w *nfdTopologyUpdater) Stop() { } } -func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha1.ZoneList) error { +func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha1.ZoneList, annotations map[string]string) error { cli, err := w.apihelper.GetTopologyClient() if err != nil { return err @@ -182,7 +182,8 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha1.ZoneLi if errors.IsNotFound(err) { nrtNew := v1alpha1.NodeResourceTopology{ ObjectMeta: metav1.ObjectMeta{ - Name: w.nodeInfo.nodeName, + Name: w.nodeInfo.nodeName, + Annotations: annotations, }, Zones: zoneInfo, TopologyPolicies: []string{w.nodeInfo.tmPolicy}, @@ -199,6 +200,7 @@ func (w *nfdTopologyUpdater) updateNodeResourceTopology(zoneInfo v1alpha1.ZoneLi nrtMutated := nrt.DeepCopy() nrtMutated.Zones = zoneInfo + nrtMutated.Annotations = mergeAnnotations(nrt.Annotations, annotations) nrtUpdated, err := cli.TopologyV1alpha1().NodeResourceTopologies().Update(context.TODO(), nrtMutated, metav1.UpdateOptions{}) if err != nil { @@ -231,3 +233,13 @@ func (w *nfdTopologyUpdater) configure() error { klog.Infof("configuration file %q parsed:\n %v", w.configFilePath, w.config) return nil } + +func mergeAnnotations(annotations ...map[string]string) map[string]string { + ret := make(map[string]string) + for _, annotation := range annotations { + for k, v := range annotation { + ret[k] = v + } + } + return ret +} diff --git a/pkg/resourcemonitor/podresourcesscanner.go b/pkg/resourcemonitor/podresourcesscanner.go index 9cfdb4648e..f6fc11b38a 100644 --- a/pkg/resourcemonitor/podresourcesscanner.go +++ b/pkg/resourcemonitor/podresourcesscanner.go @@ -27,20 +27,24 @@ import ( podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" "sigs.k8s.io/node-feature-discovery/pkg/apihelper" + + "github.com/k8stopologyawareschedwg/podfingerprint" ) type PodResourcesScanner struct { namespace string podResourceClient podresourcesapi.PodResourcesListerClient apihelper apihelper.APIHelpers + podFingerprint bool } // NewPodResourcesScanner creates a new ResourcesScanner instance -func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.PodResourcesListerClient, kubeApihelper apihelper.APIHelpers) (ResourcesScanner, error) { +func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.PodResourcesListerClient, kubeApihelper apihelper.APIHelpers, podFingerprint bool) (ResourcesScanner, error) { resourcemonitorInstance := &PodResourcesScanner{ namespace: namespace, podResourceClient: podResourceClient, apihelper: kubeApihelper, + podFingerprint: podFingerprint, } if resourcemonitorInstance.namespace != "*" { klog.Infof("watching namespace %q", resourcemonitorInstance.namespace) @@ -198,7 +202,25 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) { } - return ScanResponse{PodResources: podResData}, nil + retVal := ScanResponse{ + PodResources: podResData, + } + + if resMon.podFingerprint && len(retVal.PodResources) > 0 { + var status podfingerprint.Status + podFingerprintSign, err := computePodFingerprint(retVal.PodResources, &status) + if err != nil { + klog.Errorf("podFingerprint: Unable to compute fingerprint %v", err) + } else { + klog.Infof("podFingerprint: " + status.Repr()) + + retVal.Annotations = map[string]string{ + podfingerprint.Annotation: podFingerprintSign, + } + } + } + + return retVal, nil } func hasDevice(podResource *podresourcesapi.PodResources) bool { @@ -225,3 +247,14 @@ func getNumaNodeIds(topologyInfo *podresourcesapi.TopologyInfo) []int { return topology } + +func computePodFingerprint(podResources []PodResources, status *podfingerprint.Status) (string, error) { + fingerprint := podfingerprint.NewTracingFingerprint(len(podResources), status) + for _, podResource := range podResources { + err := fingerprint.Add(podResource.Namespace, podResource.Name) + if err != nil { + return "", err + } + } + return fingerprint.Sign(), nil +} diff --git a/pkg/resourcemonitor/podresourcesscanner_test.go b/pkg/resourcemonitor/podresourcesscanner_test.go index 132372d635..e3eb41c495 100644 --- a/pkg/resourcemonitor/podresourcesscanner_test.go +++ b/pkg/resourcemonitor/podresourcesscanner_test.go @@ -22,6 +22,7 @@ import ( "sort" "testing" + "github.com/k8stopologyawareschedwg/podfingerprint" . "github.com/smartystreets/goconvey/convey" "github.com/stretchr/testify/mock" @@ -44,7 +45,8 @@ func TestPodScanner(t *testing.T) { mockPodResClient := new(podres.MockPodResourcesListerClient) mockAPIHelper := new(apihelper.MockAPIHelpers) mockClient := &k8sclient.Clientset{} - resScan, err = NewPodResourcesScanner("*", mockPodResClient, mockAPIHelper) + computePodFingerprint := true + resScan, err = NewPodResourcesScanner("*", mockPodResClient, mockAPIHelper, computePodFingerprint) Convey("Creating a Resources Scanner using a mock client", func() { So(err, ShouldBeNil) @@ -60,6 +62,9 @@ func TestPodScanner(t *testing.T) { Convey("Return PodResources should be nil", func() { So(res.PodResources, ShouldBeNil) }) + Convey("Return Annotations should be nil", func() { + So(res.Annotations, ShouldBeNil) + }) }) Convey("When I successfully get empty response", func() { @@ -72,6 +77,9 @@ func TestPodScanner(t *testing.T) { Convey("Return PodResources should be zero", func() { So(len(res.PodResources), ShouldEqual, 0) }) + Convey("Return Annotations should be nil", func() { + So(res.Annotations, ShouldBeNil) + }) }) Convey("When I successfully get valid response", func() { @@ -203,6 +211,12 @@ func TestPodScanner(t *testing.T) { } So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) + Convey("Return Annotations should have pod fingerprint annotation with proper value", func() { + So(len(res.Annotations), ShouldEqual, 1) + + expectedFingerprint := "pfp0v001fe53c4dbd2c5f4a0" // precomputed manually + So(res.Annotations[podfingerprint.Annotation], ShouldEqual, expectedFingerprint) + }) }) Convey("When I successfully get valid response without topology", func() { @@ -292,6 +306,12 @@ func TestPodScanner(t *testing.T) { So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) + Convey("Return Annotations should have pod fingerprint annotation with proper value", func() { + So(len(res.Annotations), ShouldEqual, 1) + + expectedFingerprint := "pfp0v001fe53c4dbd2c5f4a0" // precomputed manually + So(res.Annotations[podfingerprint.Annotation], ShouldEqual, expectedFingerprint) + }) }) Convey("When I successfully get valid response without devices", func() { @@ -367,6 +387,12 @@ func TestPodScanner(t *testing.T) { So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) }) + Convey("Return Annotations should have pod fingerprint annotation with proper value", func() { + So(len(res.Annotations), ShouldEqual, 1) + + expectedFingerprint := "pfp0v001fe53c4dbd2c5f4a0" // precomputed manually + So(res.Annotations[podfingerprint.Annotation], ShouldEqual, expectedFingerprint) + }) }) Convey("When I successfully get valid response without cpus", func() { @@ -507,6 +533,12 @@ func TestPodScanner(t *testing.T) { Convey("Return PodResources should have values", func() { So(len(res.PodResources), ShouldBeGreaterThan, 0) }) + Convey("Return Annotations should have pod fingerprint annotation with proper value", func() { + So(len(res.Annotations), ShouldEqual, 1) + + expectedFingerprint := "pfp0v001fe53c4dbd2c5f4a0" // precomputed manually + So(res.Annotations[podfingerprint.Annotation], ShouldEqual, expectedFingerprint) + }) expected := []PodResources{ { @@ -610,15 +642,22 @@ func TestPodScanner(t *testing.T) { }, } So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue) - }) + Convey("Return Annotations should have pod fingerprint annotation with proper value", func() { + So(len(res.Annotations), ShouldEqual, 1) + + expectedFingerprint := "pfp0v001fe53c4dbd2c5f4a0" // precomputed manually + So(res.Annotations[podfingerprint.Annotation], ShouldEqual, expectedFingerprint) + }) + }) }) Convey("When I scan for pod resources using fake client and given namespace", t, func() { mockPodResClient := new(podres.MockPodResourcesListerClient) mockAPIHelper := new(apihelper.MockAPIHelpers) mockClient := &k8sclient.Clientset{} - resScan, err = NewPodResourcesScanner("pod-res-test", mockPodResClient, mockAPIHelper) + computePodFingerprint := false + resScan, err = NewPodResourcesScanner("pod-res-test", mockPodResClient, mockAPIHelper, computePodFingerprint) Convey("Creating a Resources Scanner using a mock client", func() { So(err, ShouldBeNil) diff --git a/pkg/resourcemonitor/types.go b/pkg/resourcemonitor/types.go index f54acbc14d..c9a829d5a0 100644 --- a/pkg/resourcemonitor/types.go +++ b/pkg/resourcemonitor/types.go @@ -31,6 +31,7 @@ type Args struct { Namespace string KubeletConfigURI string APIAuthTokenFile string + PodSetFingerprint bool } // ResourceInfo stores information of resources and their corresponding IDs obtained from PodResource API @@ -55,6 +56,7 @@ type PodResources struct { type ScanResponse struct { PodResources []PodResources + Annotations map[string]string } // ResourcesScanner gathers all the PodResources from the system, using the podresources API client