Skip to content

Commit

Permalink
Merge pull request #211 from k8stopologyawareschedwg/drop-state-dirs
Browse files Browse the repository at this point in the history
notification: statedirs: drop feature
  • Loading branch information
ffromani authored Aug 22, 2023
2 parents c4c166e + 22aa904 commit c26ee82
Show file tree
Hide file tree
Showing 8 changed files with 3 additions and 216 deletions.
70 changes: 0 additions & 70 deletions manifests/resource-topology-exporter-ds-kube-watch.yaml

This file was deleted.

1 change: 0 additions & 1 deletion manifests/resource-topology-exporter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ spec:
- -v=${RTE_VERBOSE}
- --sleep-interval=${RTE_POLL_INTERVAL}
- --sysfs=/host-sys
- --kubelet-state-dir=/host-var/lib/kubelet
- --kubelet-config-file=/host-var/lib/kubelet/config.yaml
- --podresources-socket=unix:///host-var/lib/kubelet/pod-resources/kubelet.sock
- --notify-file=/host-run/rte/notify
Expand Down
15 changes: 0 additions & 15 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"flag"
"fmt"
"os"
"strings"
"time"

"k8s.io/klog/v2"
Expand Down Expand Up @@ -113,7 +112,6 @@ func LoadArgs(args ...string) (ProgArgs, error) {
flags.StringVar(&pArgs.RTE.PodResourcesSocketPath, "podresources-socket", "unix:///podresources/kubelet.sock", "Pod Resource Socket path to use.")
flags.BoolVar(&pArgs.RTE.PodReadinessEnable, "podreadiness", true, "Custom condition injection using Podreadiness.")

kubeletStateDirs := flags.String("kubelet-state-dir", "", "Kubelet state directory (RO access needed), for smart polling. **DEPRECATED** please use notify-file")
refCnt := flags.String("reference-container", "", "Reference container, used to learn about the shared cpu pool\n See: https://github.com/kubernetes/kubernetes/issues/102190\n format of spec is namespace/podname/containername.\n Alternatively, you can use the env vars REFERENCE_NAMESPACE, REFERENCE_POD_NAME, REFERENCE_CONTAINER_NAME.")

flags.StringVar(&pArgs.RTE.NotifyFilePath, "notify-file", "", "Notification file path.")
Expand Down Expand Up @@ -141,11 +139,6 @@ Special targets:
return pArgs, err
}

pArgs.RTE.KubeletStateDirs, err = setKubeletStateDirs(*kubeletStateDirs)
if err != nil {
return pArgs, err
}

pArgs.RTE.ReferenceContainer, err = setContainerIdent(*refCnt)
if err != nil {
return pArgs, err
Expand Down Expand Up @@ -191,14 +184,6 @@ func setupArgsFromConfig(pArgs *ProgArgs, conf config) error {
return nil
}

func setKubeletStateDirs(value string) ([]string, error) {
ksd := make([]string, 0)
for _, s := range strings.Split(value, " ") {
ksd = append(ksd, s)
}
return ksd, nil
}

func setContainerIdent(value string) (*sharedcpuspool.ContainerIdent, error) {
ci, err := sharedcpuspool.ContainerIdentFromString(value)
if err != nil {
Expand Down
7 changes: 1 addition & 6 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,11 @@ func TestReferenceContainer(t *testing.T) {
closer := setupTest(t)
t.Cleanup(closer)

pArgs, err := LoadArgs("--kubelet-state-dir=dir1 dir2 dir3", "--reference-container=ns/pod/cont")
pArgs, err := LoadArgs("--reference-container=ns/pod/cont")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

expectedDirs := []string{"dir1", "dir2", "dir3"}
if !reflect.DeepEqual(pArgs.RTE.KubeletStateDirs, expectedDirs) {
t.Errorf("invalid data, got %v expected %v", pArgs.RTE.KubeletStateDirs, expectedDirs)
}

expectedRefCnt := sharedcpuspool.ContainerIdent{Namespace: "ns", PodName: "pod", ContainerName: "cont"}
if pArgs.RTE.ReferenceContainer.String() != expectedRefCnt.String() {
t.Errorf("invalid data, got %v expected %v", pArgs.RTE.ReferenceContainer, expectedRefCnt)
Expand Down
43 changes: 0 additions & 43 deletions pkg/notification/notification.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,49 +157,6 @@ func (es *UnlimitedEventSource) AddFile(notifyFilePath string) error {
return nil
}

func (es *UnlimitedEventSource) AddDirs(kubeletStateDirs []string) error {
if len(kubeletStateDirs) == 0 {
return nil
}

klog.Infof("**DEPRECATED** watching state directories is insecure and has known issues")
klog.Infof("**DEPRECATED** watching state directories will be removed in a future version")
klog.Infof("**DEPRECATED** please use notification file instead")

dirCount := 0
for _, stateDir := range kubeletStateDirs {
klog.Infof("kubelet state dir: [%s]", stateDir)
if stateDir == "" {
continue
}

tryToWatch(es.watcher, stateDir)
dirCount++
}

if dirCount == 0 {
// well, still legal
klog.Infof("no valid directory to monitor given")
return nil
}

es.filters = append(es.filters, func(event fsnotify.Event) bool {
filename := filepath.Base(event.Name)
if filename != stateCPUManager &&
filename != stateMemoryManager &&
filename != stateDeviceManager {
return false
}
// turns out rename is reported as
// 1. RENAME (old) <- unpredictable
// 2. CREATE (new) <- we trigger here
// admittedly we can get some false positives, but that
// is expected to be not that big of a deal.
return (event.Op & fsnotify.Create) == fsnotify.Create
})
return nil
}

// AnyFilter is a cumulative filter which returns true (hence passes)
// only ifat least one of the provided filters pass.
func AnyFilter(filters []FilterEvent, event fsnotify.Event) bool {
Expand Down
6 changes: 0 additions & 6 deletions pkg/resourcetopologyexporter/resourcetopologyexporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ type Args struct {
TopologyManagerPolicy string
TopologyManagerScope string
KubeletConfigFile string
KubeletStateDirs []string
PodResourcesSocketPath string
SleepInterval time.Duration
PodReadinessEnable bool
Expand Down Expand Up @@ -90,11 +89,6 @@ func createEventSource(rteArgs *Args) (notification.EventSource, error) {
return nil, err
}

err = eventSource.AddDirs(rteArgs.KubeletStateDirs)
if err != nil {
return nil, err
}

es = eventSource

// If rate limit parameters are configured set it up
Expand Down
2 changes: 1 addition & 1 deletion test/data/TestDefaults.expected.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"NRTupdater":{"NoPublish":false,"Oneshot":false,"Hostname":"TEST_NODE"},"Resourcemonitor":{"Namespace":"","SysfsRoot":"/sys","ResourceExclude":null,"RefreshNodeResources":false,"PodSetFingerprint":true,"PodSetFingerprintMethod":"with-exclusive-resources","ExposeTiming":false,"PodSetFingerprintStatusFile":"","PodExclude":null,"ExcludeTerminalPods":false},"RTE":{"Debug":false,"ReferenceContainer":{"Namespace":"TEST_NS","PodName":"TEST_POD","ContainerName":"TEST_CONT"},"TopologyManagerPolicy":"","TopologyManagerScope":"","KubeletConfigFile":"/podresources/config.yaml","KubeletStateDirs":[""],"PodResourcesSocketPath":"unix:///podresources/kubelet.sock","SleepInterval":60000000000,"PodReadinessEnable":true,"NotifyFilePath":"","MaxEventsPerTimeUnit":1,"TimeUnitToLimitEvents":1000000000},"Version":false,"DumpConfig":""}
{"NRTupdater":{"NoPublish":false,"Oneshot":false,"Hostname":"TEST_NODE"},"Resourcemonitor":{"Namespace":"","SysfsRoot":"/sys","ResourceExclude":null,"RefreshNodeResources":false,"PodSetFingerprint":true,"PodSetFingerprintMethod":"with-exclusive-resources","ExposeTiming":false,"PodSetFingerprintStatusFile":"","PodExclude":null,"ExcludeTerminalPods":false},"RTE":{"Debug":false,"ReferenceContainer":{"Namespace":"TEST_NS","PodName":"TEST_POD","ContainerName":"TEST_CONT"},"TopologyManagerPolicy":"","TopologyManagerScope":"","KubeletConfigFile":"/podresources/config.yaml","PodResourcesSocketPath":"unix:///podresources/kubelet.sock","SleepInterval":60000000000,"PodReadinessEnable":true,"NotifyFilePath":"","MaxEventsPerTimeUnit":1,"TimeUnitToLimitEvents":1000000000},"Version":false,"DumpConfig":""}
75 changes: 1 addition & 74 deletions test/e2e/rte/rte.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,80 +94,6 @@ var _ = ginkgo.Describe("[RTE][InfraConsuming] Resource topology exporter", func
})

ginkgo.Context("with cluster configured", func() {
ginkgo.It("[DEPRECATED][StateDirectories] it should react to pod changes using the smart poller", func() {
nodes, err := e2enodes.FilterNodesWithEnoughCores(workerNodes, "1000m")
gomega.Expect(err).ToNot(gomega.HaveOccurred())
if len(nodes) < 1 {
ginkgo.Skip("not enough allocatable cores for this test")
}

initialNodeTopo := e2enodetopology.GetNodeTopology(f.TopoCli, topologyUpdaterNode.Name)
ginkgo.By("creating a pod consuming the shared pool")
sleeperPod := e2epods.MakeGuaranteedSleeperPod("1000m")

updateInterval, method, err := estimateUpdateInterval(*initialNodeTopo)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
klog.Infof("%s update interval: %s", method, updateInterval)

// wait interval exactly multiple of the poll interval makes the test racier and less robust, so
// add a little skew. We pick 1 second randomly, but the idea is that small (2, 3, 5) multipliers
// should again not cause a total multiple of the poll interval.
pollingInterval := updateInterval + time.Second

stopChan := make(chan struct{})
doneChan := make(chan struct{})
started := false

go func(f *fixture.Fixture, refPod *corev1.Pod) {
defer ginkgo.GinkgoRecover()

<-stopChan

pod, err := e2epods.CreateSync(f, refPod)
gomega.Expect(err).ToNot(gomega.HaveOccurred())
ginkgo.By("waiting for at least poll interval seconds with the test pod running...")
time.Sleep(updateInterval * 3)
gomega.Expect(e2epods.DeletePodSyncByName(f, pod.Namespace, pod.Name)).ToNot(gomega.HaveOccurred())

doneChan <- struct{}{}
}(f, sleeperPod)

ginkgo.By("getting the updated topology")
var finalNodeTopo *v1alpha2.NodeResourceTopology
gomega.Eventually(func() bool {
if !started {
stopChan <- struct{}{}
started = true
}

finalNodeTopo, err = f.TopoCli.TopologyV1alpha2().NodeResourceTopologies().Get(context.TODO(), topologyUpdaterNode.Name, metav1.GetOptions{})
if err != nil {
klog.Infof("failed to get the node topology resource: %v", err)
return false
}
if finalNodeTopo.ObjectMeta.ResourceVersion == initialNodeTopo.ObjectMeta.ResourceVersion {
klog.Infof("resource %s not yet updated - resource version not bumped (old %v new %v)", topologyUpdaterNode.Name, initialNodeTopo.ObjectMeta.ResourceVersion, finalNodeTopo.ObjectMeta.ResourceVersion)
return false
}
klog.Infof("resource %s updated! - resource version bumped (old %v new %v)", topologyUpdaterNode.Name, initialNodeTopo.ObjectMeta.ResourceVersion, finalNodeTopo.ObjectMeta.ResourceVersion)

reason, ok := finalNodeTopo.Annotations[k8sannotations.RTEUpdate]
if !ok {
klog.Infof("resource %s missing annotation!", topologyUpdaterNode.Name)
return false
}
klog.Infof("resource %s reason %v expected %v", topologyUpdaterNode.Name, reason, nrtupdater.RTEUpdateReactive)
return reason == nrtupdater.RTEUpdateReactive
}).WithTimeout(updateInterval*9).WithPolling(pollingInterval).Should(gomega.BeTrue(), "didn't get updated node topology info") // 5x timeout is a random "long enough" period
ginkgo.By("checking the topology was updated for the right reason")

<-doneChan

gomega.Expect(finalNodeTopo.Annotations).ToNot(gomega.BeNil(), "missing annotations entirely")
reason := finalNodeTopo.Annotations[k8sannotations.RTEUpdate]
gomega.Expect(reason).To(gomega.Equal(nrtupdater.RTEUpdateReactive), "update reason error: expected %q got %q", nrtupdater.RTEUpdateReactive, reason)
})

ginkgo.It("[NotificationFile] it should react to pod changes using the smart poller with notification file", func() {
initialNodeTopo := e2enodetopology.GetNodeTopology(f.TopoCli, topologyUpdaterNode.Name)

Expand Down Expand Up @@ -233,6 +159,7 @@ var _ = ginkgo.Describe("[RTE][InfraConsuming] Resource topology exporter", func
gomega.Expect(reason).To(gomega.Equal(nrtupdater.RTEUpdateReactive), "update reason error: expected %q got %q", nrtupdater.RTEUpdateReactive, reason)
})
})

ginkgo.Context("with pod fingerprinting enabled", func() {
ginkgo.It("[PodFingerprint] it should report the computation method in the attributes", func() {
nrt := e2enodetopology.GetNodeTopology(f.TopoCli, topologyUpdaterNode.Name)
Expand Down

0 comments on commit c26ee82

Please sign in to comment.