Skip to content

Commit

Permalink
Merge pull request #109 from k8stopologyawareschedwg/node-signature
Browse files Browse the repository at this point in the history
rte: add option to compute the pod set fingerprint
  • Loading branch information
ffromani authored Jun 15, 2022
2 parents a949c96 + 2004cc7 commit bffcbc8
Show file tree
Hide file tree
Showing 25 changed files with 1,977 additions and 42 deletions.
2 changes: 2 additions & 0 deletions cmd/resource-topology-exporter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ func parseArgs(args ...string) (ProgArgs, error) {

flags.StringVar(&pArgs.Resourcemonitor.Namespace, "watch-namespace", "", "Namespace to watch pods for. Use \"\" for all namespaces.")
flags.StringVar(&pArgs.Resourcemonitor.SysfsRoot, "sysfs", "/sys", "Top-level component path of sysfs.")
flags.BoolVar(&pArgs.Resourcemonitor.PodSetFingerprint, "pods-fingerprint", false, "If enable, compute and report the pod set fingerprint.")
flags.BoolVar(&pArgs.Resourcemonitor.ExposeTiming, "expose-timing", false, "If enable, expose expected and actual sleep interval as annotations.")

flags.StringVar(&configPath, "config", "/etc/resource-topology-exporter/config.yaml", "Configuration file path. Use this to set the exclude list.")

Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/google/go-cmp v0.5.5
github.com/jaypipes/ghw v0.8.1-0.20210609141030-acb1a36eaf89
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.12
github.com/k8stopologyawareschedwg/podfingerprint v0.0.2
github.com/mdomke/git-semver v1.0.0
github.com/onsi/ginkgo v1.14.0
github.com/onsi/gomega v1.10.1
Expand Down Expand Up @@ -45,6 +46,7 @@ require (
github.com/Microsoft/go-winio v0.4.17 // indirect
github.com/Microsoft/hcsshim v0.8.22 // indirect
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/OneOfOne/xxhash v1.2.9-0.20201014161131-8506fca4db5e // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0
github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/OneOfOne/xxhash v1.2.9-0.20201014161131-8506fca4db5e h1:Md6Mtmn7W2mppR3YHYqWe4/R/swzJK0WR/x1U+s+n7I=
github.com/OneOfOne/xxhash v1.2.9-0.20201014161131-8506fca4db5e/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
Expand Down Expand Up @@ -450,6 +452,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.12 h1:NhXbOjO1st8hIcVpegr3zw/AGG12vs3z//tCDDcfPpE=
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.12/go.mod h1:AkACMQGiTgCt0lQw3m7TTU8PLH9lYKNK5e9DqFf5VuM=
github.com/k8stopologyawareschedwg/podfingerprint v0.0.2 h1:VkwY0dWu9dID7aKVbYF/vzCA4Hj8c8oXEWN0ZG/0boE=
github.com/k8stopologyawareschedwg/podfingerprint v0.0.2/go.mod h1:C23pM15t06dXg/OihGlqBvnYzLr+MXDXJ7zMfbNAyXI=
github.com/karrick/godirwalk v1.16.1 h1:DynhcF+bztK8gooS0+NDJFrdNZjJ3gzVzC545UNA9iw=
github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
Expand Down
2 changes: 2 additions & 0 deletions manifests/resource-topology-exporter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ spec:
- --kubelet-config-file=/host-var/lib/kubelet/config.yaml
- --podresources-socket=unix:///host-var/lib/kubelet/pod-resources/kubelet.sock
- --notify-file=/host-run/rte/notify
- --pods-fingerprint
- --expose-timing
env:
- name: NODE_NAME
valueFrom:
Expand Down
23 changes: 23 additions & 0 deletions pkg/k8sannotations/annotations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package k8sannotations

const (
RTEUpdate = "k8stopoawareschedwg/rte-update"
SleepDuration = "k8stopoawareschedwg/sleep-duration"
UpdateInterval = "k8stopoawareschedwg/update-interval"
)
13 changes: 10 additions & 3 deletions pkg/notification/notification.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ const (
type FilterEvent func(event fsnotify.Event) bool

type Event struct {
Timer bool
Timestamp time.Time
Timestamp time.Time
TimerInterval time.Duration
}

func (ev Event) IsTimer() bool {
return ev.TimerInterval > 0
}

type EventSource interface {
Expand Down Expand Up @@ -83,7 +87,10 @@ func (es *UnlimitedEventSource) Run() {
// TODO: what about closed channels?
select {
case tickTs := <-ticker.C:
es.eventChan <- Event{Timer: true, Timestamp: tickTs}
es.eventChan <- Event{
Timestamp: tickTs,
TimerInterval: es.sleepInterval,
}
klog.V(4).Infof("timer update trigger")

case event := <-es.watcher.Events:
Expand Down
26 changes: 16 additions & 10 deletions pkg/nrtupdater/nrtupdater.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@ import (
"github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned"

"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/k8sannotations"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/k8shelpers"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/podreadiness"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/prometheus"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/utils"
)

const (
AnnotationRTEUpdate = "k8stopoawareschedwg/rte-update"
)

const (
RTEUpdatePeriodic = "periodic"
RTEUpdateReactive = "reactive"
Expand All @@ -42,8 +39,9 @@ type NRTUpdater struct {
}

type MonitorInfo struct {
Timer bool
Zones v1alpha1.ZoneList
Timer bool
Zones v1alpha1.ZoneList
Annotations map[string]string
}

func (mi MonitorInfo) UpdateReason() string {
Expand Down Expand Up @@ -103,9 +101,6 @@ func (te *NRTUpdater) UpdateWithClient(cli topologyclientset.Interface, info Mon
}

nrtMutated := nrt.DeepCopy()
if nrtMutated.Annotations == nil {
nrtMutated.Annotations = make(map[string]string)
}
te.updateNRTInfo(nrtMutated, info)

nrtUpdated, err := cli.TopologyV1alpha1().NodeResourceTopologies().Update(context.TODO(), nrtMutated, metav1.UpdateOptions{})
Expand All @@ -117,7 +112,8 @@ func (te *NRTUpdater) UpdateWithClient(cli topologyclientset.Interface, info Mon
}

func (te *NRTUpdater) updateNRTInfo(nrt *v1alpha1.NodeResourceTopology, info MonitorInfo) {
nrt.Annotations[AnnotationRTEUpdate] = info.UpdateReason()
nrt.Annotations = mergeAnnotations(nrt.Annotations, info.Annotations)
nrt.Annotations[k8sannotations.RTEUpdate] = info.UpdateReason()
nrt.TopologyPolicies = []string{te.tmPolicy}
nrt.Zones = info.Zones
}
Expand Down Expand Up @@ -150,3 +146,13 @@ func (te *NRTUpdater) Run(infoChannel <-chan MonitorInfo, condChan chan v1.PodCo
}
}
}

func mergeAnnotations(kvs ...map[string]string) map[string]string {
ret := make(map[string]string)
for _, kv := range kvs {
for key, value := range kv {
ret[key] = value
}
}
return ret
}
35 changes: 28 additions & 7 deletions pkg/resourcemonitor/resourcemonitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"

"github.com/jaypipes/ghw"
"github.com/k8stopologyawareschedwg/podfingerprint"

topologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/prometheus"
Expand All @@ -51,10 +52,12 @@ type Args struct {
SysfsRoot string
ExcludeList ResourceExcludeList
RefreshNodeResources bool
PodSetFingerprint bool
ExposeTiming bool
}

type ResourceMonitor interface {
Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, error)
Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, map[string]string, error)
}

// ToMapSet keeps the original keys, but replaces values with set.String types
Expand Down Expand Up @@ -129,13 +132,13 @@ func NewResourceMonitorWithTopology(nodeName string, topo *ghw.TopologyInfo, pod
return rm, nil
}

func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, error) {
func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, map[string]string, error) {
if rm.args.RefreshNodeResources {
if err := rm.updateNodeCapacity(); err != nil {
return nil, err
return nil, nil, err
}
if err := rm.updateNodeAllocatable(); err != nil {
return nil, err
return nil, nil, err
}
}

Expand All @@ -144,11 +147,13 @@ func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alph
resp, err := rm.podResCli.List(ctx, &podresourcesapi.ListPodResourcesRequest{})
if err != nil {
prometheus.UpdatePodResourceApiCallsFailureMetric("list")
return nil, err
return nil, nil, err
}

allDevs := GetAllContainerDevices(resp.GetPodResources(), rm.args.Namespace, rm.coreIDToNodeIDMap)
respPodRes := resp.GetPodResources()
allDevs := GetAllContainerDevices(respPodRes, rm.args.Namespace, rm.coreIDToNodeIDMap)
allocated := ContainerDevicesToPerNUMAResourceCounters(allDevs)
annotations := rm.annotationForResponse(respPodRes)

excludeSet := excludeList.ToMapSet()
zones := make(topologyv1alpha1.ZoneList, 0)
Expand Down Expand Up @@ -217,7 +222,15 @@ func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alph

zones = append(zones, zone)
}
return zones, nil
return zones, annotations, nil
}

func (rm *resourceMonitor) annotationForResponse(podRes []*podresourcesapi.PodResources) map[string]string {
annotations := make(map[string]string)
if rm.args.PodSetFingerprint {
annotations[podfingerprint.Annotation] = ComputePodFingerprint(podRes)
}
return annotations
}

func (rm *resourceMonitor) updateNodeCapacity() error {
Expand Down Expand Up @@ -273,6 +286,14 @@ func GetAllContainerDevices(podRes []*podresourcesapi.PodResources, namespace st
return allCntRes
}

func ComputePodFingerprint(podRes []*podresourcesapi.PodResources) string {
fp := podfingerprint.NewFingerprint(len(podRes))
for _, pr := range podRes {
fp.AddPod(pr)
}
return fp.Sign()
}

func NormalizeContainerDevices(devices []*podresourcesapi.ContainerDevices, memoryBlocks []*podresourcesapi.ContainerMemory, cpuIds []int64, coreIDToNodeIDMap map[int]int) []*podresourcesapi.ContainerDevices {
contDevs := append([]*podresourcesapi.ContainerDevices{}, devices...)

Expand Down
Loading

0 comments on commit bffcbc8

Please sign in to comment.