Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rte: add option to compute the pod set fingerprint #109

Merged
merged 6 commits into from
Jun 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/resource-topology-exporter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ func parseArgs(args ...string) (ProgArgs, error) {

flags.StringVar(&pArgs.Resourcemonitor.Namespace, "watch-namespace", "", "Namespace to watch pods for. Use \"\" for all namespaces.")
flags.StringVar(&pArgs.Resourcemonitor.SysfsRoot, "sysfs", "/sys", "Top-level component path of sysfs.")
flags.BoolVar(&pArgs.Resourcemonitor.PodSetFingerprint, "pods-fingerprint", false, "If enable, compute and report the pod set fingerprint.")
flags.BoolVar(&pArgs.Resourcemonitor.ExposeTiming, "expose-timing", false, "If enable, expose expected and actual sleep interval as annotations.")

flags.StringVar(&configPath, "config", "/etc/resource-topology-exporter/config.yaml", "Configuration file path. Use this to set the exclude list.")

Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/google/go-cmp v0.5.5
github.com/jaypipes/ghw v0.8.1-0.20210609141030-acb1a36eaf89
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.12
github.com/k8stopologyawareschedwg/podfingerprint v0.0.2
github.com/mdomke/git-semver v1.0.0
github.com/onsi/ginkgo v1.14.0
github.com/onsi/gomega v1.10.1
Expand Down Expand Up @@ -45,6 +46,7 @@ require (
github.com/Microsoft/go-winio v0.4.17 // indirect
github.com/Microsoft/hcsshim v0.8.22 // indirect
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/OneOfOne/xxhash v1.2.9-0.20201014161131-8506fca4db5e // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb0
github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/OneOfOne/xxhash v1.2.9-0.20201014161131-8506fca4db5e h1:Md6Mtmn7W2mppR3YHYqWe4/R/swzJK0WR/x1U+s+n7I=
github.com/OneOfOne/xxhash v1.2.9-0.20201014161131-8506fca4db5e/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
Expand Down Expand Up @@ -450,6 +452,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.12 h1:NhXbOjO1st8hIcVpegr3zw/AGG12vs3z//tCDDcfPpE=
github.com/k8stopologyawareschedwg/noderesourcetopology-api v0.0.12/go.mod h1:AkACMQGiTgCt0lQw3m7TTU8PLH9lYKNK5e9DqFf5VuM=
github.com/k8stopologyawareschedwg/podfingerprint v0.0.2 h1:VkwY0dWu9dID7aKVbYF/vzCA4Hj8c8oXEWN0ZG/0boE=
github.com/k8stopologyawareschedwg/podfingerprint v0.0.2/go.mod h1:C23pM15t06dXg/OihGlqBvnYzLr+MXDXJ7zMfbNAyXI=
github.com/karrick/godirwalk v1.16.1 h1:DynhcF+bztK8gooS0+NDJFrdNZjJ3gzVzC545UNA9iw=
github.com/karrick/godirwalk v1.16.1/go.mod h1:j4mkqPuvaLI8mp1DroR3P6ad7cyYd4c1qeJ3RV7ULlk=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
Expand Down
2 changes: 2 additions & 0 deletions manifests/resource-topology-exporter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ spec:
- --kubelet-config-file=/host-var/lib/kubelet/config.yaml
- --podresources-socket=unix:///host-var/lib/kubelet/pod-resources/kubelet.sock
- --notify-file=/host-run/rte/notify
- --pods-fingerprint
swatisehgal marked this conversation as resolved.
Show resolved Hide resolved
- --expose-timing
env:
- name: NODE_NAME
valueFrom:
Expand Down
23 changes: 23 additions & 0 deletions pkg/k8sannotations/annotations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
Copyright 2022 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package k8sannotations

const (
RTEUpdate = "k8stopoawareschedwg/rte-update"
SleepDuration = "k8stopoawareschedwg/sleep-duration"
UpdateInterval = "k8stopoawareschedwg/update-interval"
)
13 changes: 10 additions & 3 deletions pkg/notification/notification.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ const (
type FilterEvent func(event fsnotify.Event) bool

type Event struct {
Timer bool
Timestamp time.Time
Timestamp time.Time
TimerInterval time.Duration
}

func (ev Event) IsTimer() bool {
return ev.TimerInterval > 0
}

type EventSource interface {
Expand Down Expand Up @@ -83,7 +87,10 @@ func (es *UnlimitedEventSource) Run() {
// TODO: what about closed channels?
select {
case tickTs := <-ticker.C:
es.eventChan <- Event{Timer: true, Timestamp: tickTs}
es.eventChan <- Event{
Timestamp: tickTs,
TimerInterval: es.sleepInterval,
}
klog.V(4).Infof("timer update trigger")

case event := <-es.watcher.Events:
Expand Down
26 changes: 16 additions & 10 deletions pkg/nrtupdater/nrtupdater.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,13 @@ import (
"github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
topologyclientset "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/generated/clientset/versioned"

"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/k8sannotations"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/k8shelpers"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/podreadiness"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/prometheus"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/utils"
)

const (
AnnotationRTEUpdate = "k8stopoawareschedwg/rte-update"
)

const (
RTEUpdatePeriodic = "periodic"
RTEUpdateReactive = "reactive"
Expand All @@ -42,8 +39,9 @@ type NRTUpdater struct {
}

type MonitorInfo struct {
Timer bool
Zones v1alpha1.ZoneList
Timer bool
Zones v1alpha1.ZoneList
Annotations map[string]string
}

func (mi MonitorInfo) UpdateReason() string {
Expand Down Expand Up @@ -103,9 +101,6 @@ func (te *NRTUpdater) UpdateWithClient(cli topologyclientset.Interface, info Mon
}

nrtMutated := nrt.DeepCopy()
if nrtMutated.Annotations == nil {
nrtMutated.Annotations = make(map[string]string)
}
te.updateNRTInfo(nrtMutated, info)

nrtUpdated, err := cli.TopologyV1alpha1().NodeResourceTopologies().Update(context.TODO(), nrtMutated, metav1.UpdateOptions{})
Expand All @@ -117,7 +112,8 @@ func (te *NRTUpdater) UpdateWithClient(cli topologyclientset.Interface, info Mon
}

func (te *NRTUpdater) updateNRTInfo(nrt *v1alpha1.NodeResourceTopology, info MonitorInfo) {
nrt.Annotations[AnnotationRTEUpdate] = info.UpdateReason()
nrt.Annotations = mergeAnnotations(nrt.Annotations, info.Annotations)
nrt.Annotations[k8sannotations.RTEUpdate] = info.UpdateReason()
nrt.TopologyPolicies = []string{te.tmPolicy}
nrt.Zones = info.Zones
}
Expand Down Expand Up @@ -150,3 +146,13 @@ func (te *NRTUpdater) Run(infoChannel <-chan MonitorInfo, condChan chan v1.PodCo
}
}
}

func mergeAnnotations(kvs ...map[string]string) map[string]string {
ret := make(map[string]string)
for _, kv := range kvs {
for key, value := range kv {
ret[key] = value
}
}
return ret
}
35 changes: 28 additions & 7 deletions pkg/resourcemonitor/resourcemonitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"

"github.com/jaypipes/ghw"
"github.com/k8stopologyawareschedwg/podfingerprint"
ffromani marked this conversation as resolved.
Show resolved Hide resolved

topologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
"github.com/k8stopologyawareschedwg/resource-topology-exporter/pkg/prometheus"
Expand All @@ -51,10 +52,12 @@ type Args struct {
SysfsRoot string
ExcludeList ResourceExcludeList
RefreshNodeResources bool
PodSetFingerprint bool
ExposeTiming bool
}

type ResourceMonitor interface {
Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, error)
Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, map[string]string, error)
}

// ToMapSet keeps the original keys, but replaces values with set.String types
Expand Down Expand Up @@ -129,13 +132,13 @@ func NewResourceMonitorWithTopology(nodeName string, topo *ghw.TopologyInfo, pod
return rm, nil
}

func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, error) {
func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alpha1.ZoneList, map[string]string, error) {
if rm.args.RefreshNodeResources {
if err := rm.updateNodeCapacity(); err != nil {
return nil, err
return nil, nil, err
}
if err := rm.updateNodeAllocatable(); err != nil {
return nil, err
return nil, nil, err
}
}

Expand All @@ -144,11 +147,13 @@ func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alph
resp, err := rm.podResCli.List(ctx, &podresourcesapi.ListPodResourcesRequest{})
if err != nil {
prometheus.UpdatePodResourceApiCallsFailureMetric("list")
return nil, err
return nil, nil, err
}

allDevs := GetAllContainerDevices(resp.GetPodResources(), rm.args.Namespace, rm.coreIDToNodeIDMap)
respPodRes := resp.GetPodResources()
allDevs := GetAllContainerDevices(respPodRes, rm.args.Namespace, rm.coreIDToNodeIDMap)
allocated := ContainerDevicesToPerNUMAResourceCounters(allDevs)
annotations := rm.annotationForResponse(respPodRes)

excludeSet := excludeList.ToMapSet()
zones := make(topologyv1alpha1.ZoneList, 0)
Expand Down Expand Up @@ -217,7 +222,15 @@ func (rm *resourceMonitor) Scan(excludeList ResourceExcludeList) (topologyv1alph

zones = append(zones, zone)
}
return zones, nil
return zones, annotations, nil
}

func (rm *resourceMonitor) annotationForResponse(podRes []*podresourcesapi.PodResources) map[string]string {
annotations := make(map[string]string)
if rm.args.PodSetFingerprint {
annotations[podfingerprint.Annotation] = ComputePodFingerprint(podRes)
}
return annotations
}

func (rm *resourceMonitor) updateNodeCapacity() error {
Expand Down Expand Up @@ -273,6 +286,14 @@ func GetAllContainerDevices(podRes []*podresourcesapi.PodResources, namespace st
return allCntRes
}

func ComputePodFingerprint(podRes []*podresourcesapi.PodResources) string {
fp := podfingerprint.NewFingerprint(len(podRes))
Tal-or marked this conversation as resolved.
Show resolved Hide resolved
for _, pr := range podRes {
fp.AddPod(pr)
}
return fp.Sign()
swatisehgal marked this conversation as resolved.
Show resolved Hide resolved
}

func NormalizeContainerDevices(devices []*podresourcesapi.ContainerDevices, memoryBlocks []*podresourcesapi.ContainerMemory, cpuIds []int64, coreIDToNodeIDMap map[int]int) []*podresourcesapi.ContainerDevices {
contDevs := append([]*podresourcesapi.ContainerDevices{}, devices...)

Expand Down
Loading