From 143f8310134136760a5082c520e060780ac1f869 Mon Sep 17 00:00:00 2001 From: Sargun Narula Date: Fri, 21 Jun 2024 15:24:35 +0530 Subject: [PATCH] E2E: Add test to verify runc uses valid cpus Adding a test to verify that runc does not use CPUs assigned to guaranteed pods. Signed-off-by: Sargun Narula Updated cpu checking as per container, runc will provide config.json to each type of pod But runc will have its own container always using reserved cpus Signed-off-by: Sargun Narula --- .../functests/1_performance/cpu_management.go | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 1ed65efb1..8aa555893 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -2,6 +2,7 @@ package __performance import ( "context" + "encoding/json" "fmt" "os" "regexp" @@ -50,6 +51,34 @@ var profile *performancev2.PerformanceProfile const restartCooldownTime = 1 * time.Minute const cgroupRoot string = "/sys/fs/cgroup" +type CPUVals struct { + CPUs string `json:"cpus"` +} + +type CPUResources struct { + CPU CPUVals `json:"cpu"` +} + +type LinuxResources struct { + Resources CPUResources `json:"resources"` +} + +type Process struct { + Args []string `json:"args"` +} + +type Annotations struct { + ContainerName string `json:"io.kubernetes.container.name"` + PodName string `json:"io.kubernetes.pod.name"` +} + +type ContainerConfig struct { + Process Process `json:"process"` + Hostname string `json:"hostname"` + Annotations Annotations `json:"annotations"` + Linux LinuxResources `json:"linux"` +} + var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { var ( balanceIsolated bool @@ -892,8 +921,156 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { }) }) + Context("Check container runtimes cpu usage", Label(string(label.OpenShift)), func() { + var guaranteedPod, bestEffortPod *corev1.Pod + var guaranteedPodCpus, guaranteedInitPodCpus cpuset.CPUSet + var bestEffortPodCpus, bestEffortInitPodCpus cpuset.CPUSet + + It("[test_id: 74461] Verify that runc excludes the cpus used by guaranteed pod", func() { + By("Creating a guaranteed pod") + guaranteedPod = makePod(ctx, workerRTNode, true) + err := testclient.Client.Create(ctx, guaranteedPod) + Expect(err).ToNot(HaveOccurred(), "Failed to create guaranteed pod") + defer func() { + if guaranteedPod != nil { + testlog.Infof("deleting pod %q", guaranteedPod.Name) + deleteTestPod(ctx, guaranteedPod) + } + }() + + By("Waiting for guaranteed pod to be ready") + _, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(guaranteedPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute) + Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time") + Expect(guaranteedPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed), "Guaranteed pod does not have the correct QOSClass") + testlog.Infof("Guaranteed pod %s/%s was successfully created", guaranteedPod.Namespace, guaranteedPod.Name) + + By("Creating a best-effort pod") + bestEffortPod = makePod(ctx, workerRTNode, false) + err = testclient.Client.Create(ctx, bestEffortPod) + Expect(err).ToNot(HaveOccurred(), "Failed to create best-effort pod") + defer func() { + if bestEffortPod != nil { + testlog.Infof("deleting pod %q", bestEffortPod.Name) + deleteTestPod(ctx, bestEffortPod) + } + }() + + By("Waiting for best-effort pod to be ready") + _, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(bestEffortPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute) + Expect(err).ToNot(HaveOccurred(), "Best-effort pod did not become ready in time") + testlog.Infof("BestEffort pod %s/%s was successfully created", bestEffortPod.Namespace, bestEffortPod.Name) + + By("Getting Information for guaranteed POD containers") + GuPods := getConfigJsonInfo(guaranteedPod, "test", workerRTNode) + for _, pod := range GuPods { + if pod.Annotations.ContainerName == "test" { + guaranteedPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) + } else if pod.Annotations.ContainerName == "POD" { + guaranteedInitPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) + } + Expect(err).ToNot(HaveOccurred(), "Failed to parse GU POD cpus") + } + + By("Getting Information for BestEffort POD containers") + BEPods := getConfigJsonInfo(bestEffortPod, "test", workerRTNode) + for _, pod := range BEPods { + if pod.Annotations.ContainerName == "test" { + bestEffortPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) + } else if pod.Annotations.ContainerName == "POD" { + bestEffortInitPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) + } + Expect(err).ToNot(HaveOccurred(), "Failed to parse BE POD cpus") + } + + By("Validating CPU allocation for Guaranteed and Best-Effort pod containers") + isolatedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Isolated)) + Expect(err).ToNot(HaveOccurred(), "Failed to parse isolated CPU set from performance profile") + reservedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved)) + Expect(err).ToNot(HaveOccurred(), "Failed to parse reserved CPU set from performance profile") + + Expect(guaranteedInitPodCpus.IsSubsetOf(reservedCpus)). + To(BeTrue(), "Guaranteed Init pod CPUs (%s) are not strictly within the reserved set (%s)", guaranteedInitPodCpus, reservedCpus) + Expect(guaranteedInitPodCpus.IsSubsetOf(isolatedCpus)). + To(BeFalse(), "Guaranteed Init pod CPUs (%s) are within the isolated cpu set (%s)", guaranteedInitPodCpus, isolatedCpus) + Expect(guaranteedPodCpus.IsSubsetOf(isolatedCpus)). + To(BeTrue(), "Guaranteed pod CPUs (%s) are not strictly within the isolated set (%s)", guaranteedPodCpus, isolatedCpus) + + availableForBestEffort := isolatedCpus.Union(reservedCpus).Difference(guaranteedPodCpus) + Expect(bestEffortInitPodCpus.IsSubsetOf(reservedCpus)). + To(BeTrue(), "Best-Effort Init pod CPUs (%s) include CPUs not allowed (%s)", bestEffortInitPodCpus, availableForBestEffort) + Expect(bestEffortPodCpus.IsSubsetOf(availableForBestEffort)). + To(BeTrue(), "Best-Effort pod CPUs (%s) include CPUs not allowed (%s)", bestEffortPodCpus, availableForBestEffort) + }) + }) + }) +func extractConfigInfo(output string) (*ContainerConfig, error) { + var config ContainerConfig + output = strings.TrimSpace(output) + err := json.Unmarshal([]byte(output), &config) + if err != nil { + return nil, fmt.Errorf("failed to unmarshal config.json: %v", err) + } + return &config, nil +} + +func getConfigJsonInfo(pod *corev1.Pod, containerName string, workerRTNode *corev1.Node) []*ContainerConfig { + var pods []*ContainerConfig + path := "/rootfs/var/lib/containers/storage/overlay-containers/" + podName := pod.Name + cmd := []string{ + "/bin/bash", "-c", + fmt.Sprintf( + `find %s -type f -exec grep -lP '\"io.kubernetes.pod.name\": \"%s\"' {} \; -exec grep -l '\"io.kubernetes.container.name\": \"%s\"' {} \; | sort -u`, + path, podName, containerName, + ), + } + output, err := nodes.ExecCommand(context.TODO(), workerRTNode, cmd) + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to search for config.json with podName %s and containerName %s", podName, containerName)) + filePaths := strings.Split(string(output), "\n") + for _, filePath := range filePaths { + if filePath == "" { + continue + } + cmd = []string{"/bin/bash", "-c", fmt.Sprintf("cat %s", filePath)} + output, err = nodes.ExecCommand(context.TODO(), workerRTNode, cmd) + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to read config.json for container : %s", filePath)) + + configData := testutils.ToString(output) + config, err := extractConfigInfo(configData) + if err != nil { + testlog.Errorf("Error extracting config info:", err) + continue + } + pods = append(pods, config) + testlog.Infof("Pod Name: %s", config.Annotations.PodName) + testlog.Infof("Container Name: %s", config.Annotations.ContainerName) + testlog.Infof("Hostname: %s", config.Hostname) + testlog.Infof("Arguments: %s", config.Process.Args) + testlog.Infof("CPUs: %s", config.Linux.Resources.CPU.CPUs) + } + return pods +} + +func makePod(ctx context.Context, workerRTNode *corev1.Node, guaranteed bool) *corev1.Pod { + testPod := pods.GetTestPod() + testPod.Namespace = testutils.NamespaceTesting + testPod.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name} + if guaranteed { + testPod.Spec.Containers[0].Resources = corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("200Mi"), + }, + } + } + profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels) + runtimeClass := components.GetComponentName(profile.Name, components.ComponentNamePrefix) + testPod.Spec.RuntimeClassName = &runtimeClass + return testPod +} + func checkForWorkloadPartitioning(ctx context.Context) bool { // Look for the correct Workload Partition annotation in // a crio configuration file on the target node