Skip to content

Commit

Permalink
E2E: Add test to verify runc uses valid cpus
Browse files Browse the repository at this point in the history
Adding a test to verify that runc does not use CPUs assigned to guaranteed pods.

Signed-off-by: Sargun Narula <[email protected]>

Updated cpu checking as per container, runc will provide config.json to each type of pod
But runc will have its own container always using reserved cpus

Signed-off-by: Sargun Narula <[email protected]>
  • Loading branch information
SargunNarula committed Dec 9, 2024
1 parent b5a338f commit 4e72640
Showing 1 changed file with 175 additions and 0 deletions.
175 changes: 175 additions & 0 deletions test/e2e/performanceprofile/functests/1_performance/cpu_management.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package __performance

import (
"context"
"encoding/json"
"fmt"
"os"
"regexp"
Expand Down Expand Up @@ -50,6 +51,34 @@ var profile *performancev2.PerformanceProfile
const restartCooldownTime = 1 * time.Minute
const cgroupRoot string = "/sys/fs/cgroup"

type CPUVals struct {
CPUs string `json:"cpus"`
}

type CPUResources struct {
CPU CPUVals `json:"cpu"`
}

type LinuxResources struct {
Resources CPUResources `json:"resources"`
}

type Process struct {
Args []string `json:"args"`
}

type Annotations struct {
ContainerName string `json:"io.kubernetes.container.name"`
PodName string `json:"io.kubernetes.pod.name"`
}

type ContainerConfig struct {
Process Process `json:"process"`
Hostname string `json:"hostname"`
Annotations Annotations `json:"annotations"`
Linux LinuxResources `json:"linux"`
}

var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
var (
balanceIsolated bool
Expand Down Expand Up @@ -889,8 +918,154 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
})
})

Context("Check container runtimes cpu usage", func() {
var guaranteedPod, bestEffortPod *corev1.Pod
var guaranteedPodCpus, guaranteedInitPodCpus cpuset.CPUSet
var bestEffortPodCpus, bestEffortInitPodCpus cpuset.CPUSet

It("[test_id: 74461] Verify that runc excludes the cpus used by guaranteed pod", func() {
By("Creating a guaranteed pod")
guaranteedPod = makePod(ctx, workerRTNode, true)
err := testclient.Client.Create(ctx, guaranteedPod)
Expect(err).ToNot(HaveOccurred(), "Failed to create guaranteed pod")

By("Waiting for guaranteed pod to be ready")
_, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(guaranteedPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")
Expect(guaranteedPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed), "Guaranteed pod does not have the correct QOSClass")
testlog.Infof("Guranteed pod %s/%s was successfully created", guaranteedPod.Namespace, guaranteedPod.Name)
defer func() {
if guaranteedPod != nil {
testlog.Infof("deleting pod %q", guaranteedPod.Name)
deleteTestPod(ctx, guaranteedPod)
}
}()

By("Creating a best-effort pod")
bestEffortPod = makePod(ctx, workerRTNode, false)
err = testclient.Client.Create(ctx, bestEffortPod)
Expect(err).ToNot(HaveOccurred(), "Failed to create best-effort pod")
defer func() {
if bestEffortPod != nil {
testlog.Infof("deleting pod %q", bestEffortPod.Name)
deleteTestPod(ctx, bestEffortPod)
}
}()

By("Waiting for best-effort pod to be ready")
_, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(bestEffortPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Best-effort pod did not become ready in time")
testlog.Infof("BestEffort pod %s/%s was successfully created", bestEffortPod.Namespace, bestEffortPod.Name)

By("Geting Information for guaranteed POD containers")
GuPods := getConfigJsonInfo(guaranteedPod, "test", workerRTNode)
for _, pod := range GuPods {
if pod.Annotations.ContainerName == "test" {
guaranteedPodCpus, err = cpuset.Parse(string(pod.Linux.Resources.CPU.CPUs))
} else if pod.Annotations.ContainerName == "POD" {
guaranteedInitPodCpus, err = cpuset.Parse(string(pod.Linux.Resources.CPU.CPUs))
}
Expect(err).ToNot(HaveOccurred(), "Failed to parse GU POD cpus")
}

By("Geting Information for BestEffort POD containers")
BEPods := getConfigJsonInfo(bestEffortPod, "test", workerRTNode)
for _, pod := range BEPods {
if pod.Annotations.ContainerName == "test" {
bestEffortPodCpus, err = cpuset.Parse(string(pod.Linux.Resources.CPU.CPUs))
} else if pod.Annotations.ContainerName == "POD" {
bestEffortInitPodCpus, err = cpuset.Parse(string(pod.Linux.Resources.CPU.CPUs))
}
Expect(err).ToNot(HaveOccurred(), "Failed to parse BE POD cpus")
}

By("Validating CPU allocation for Guaranteed and Best-Effort pod containers")
isolatedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Isolated))
Expect(err).ToNot(HaveOccurred(), "Failed to parse isolated CPU set from performance profile")
reservedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved))
Expect(err).ToNot(HaveOccurred(), "Failed to parse reserved CPU set from performance profile")

Expect(guaranteedInitPodCpus.IsSubsetOf(reservedCpus)).
To(BeTrue(), "Guaranteed Init pod CPUs (%s) are not strictly within the reserved set (%s)", guaranteedInitPodCpus, reservedCpus)
Expect(guaranteedPodCpus.IsSubsetOf(isolatedCpus)).
To(BeTrue(), "Guaranteed pod CPUs (%s) are not strictly within the isolated set (%s)", guaranteedPodCpus, isolatedCpus)

availableForBestEffort := isolatedCpus.Union(reservedCpus).Difference(guaranteedPodCpus)
Expect(bestEffortInitPodCpus.IsSubsetOf(reservedCpus)).
To(BeTrue(), "Best-Effort Init pod CPUs (%s) include CPUs not allowed (%s)", bestEffortInitPodCpus, availableForBestEffort)
Expect(bestEffortPodCpus.IsSubsetOf(availableForBestEffort)).
To(BeTrue(), "Best-Effort pod CPUs (%s) include CPUs not allowed (%s)", bestEffortPodCpus, availableForBestEffort)
})
})

})

func extractConfigInfo(output string) (*ContainerConfig, error) {
var config ContainerConfig
output = strings.TrimSpace(output)
err := json.Unmarshal([]byte(output), &config)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal config.json: %v", err)
}
return &config, nil
}

func getConfigJsonInfo(pod *corev1.Pod, containerName string, workerRTNode *corev1.Node) []*ContainerConfig {
var pods []*ContainerConfig
path := "/rootfs/var/lib/containers/storage/overlay-containers/"
podName := pod.Name
cmd := []string{
"/bin/bash", "-c",
fmt.Sprintf(
`find %s -type f -exec grep -lP '\"io.kubernetes.pod.name\": \"%s\"' {} \; -exec grep -l '\"io.kubernetes.container.name\": \"%s\"' {} \; | sort -u`,
path, podName, containerName,
),
}
output, err := nodes.ExecCommand(context.TODO(), workerRTNode, cmd)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to search for config.json with podName %s and containerName %s", podName, containerName))
filePaths := strings.Split(string(output), "\n")
for _, filePath := range filePaths {
if filePath == "" {
continue
}
cmd = []string{"/bin/bash", "-c", fmt.Sprintf("cat %s", filePath)}
output, err = nodes.ExecCommand(context.TODO(), workerRTNode, cmd)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to read config.json for container : %s", filePath))

configData := testutils.ToString(output)
config, err := extractConfigInfo(configData)
if err != nil {
testlog.Errorf("Error extracting config info:", err)
continue
}
pods = append(pods, config)
testlog.Infof("Pod Name: %s", config.Annotations.PodName)
testlog.Infof("Container Name: %s", config.Annotations.ContainerName)
testlog.Infof("Hostname: %s", config.Hostname)
testlog.Infof("Arguments: %s", config.Process.Args)
testlog.Infof("CPUs: %s", config.Linux.Resources.CPU.CPUs)
}
return pods
}

func makePod(ctx context.Context, workerRTNode *corev1.Node, guaranteed bool) *corev1.Pod {
testPod := pods.GetTestPod()
testPod.Namespace = testutils.NamespaceTesting
testPod.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name}
if guaranteed {
testPod.Spec.Containers[0].Resources = corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("200Mi"),
},
}
}
profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
runtimeClass := components.GetComponentName(profile.Name, components.ComponentNamePrefix)
testPod.Spec.RuntimeClassName = &runtimeClass
return testPod
}

func checkForWorkloadPartitioning(ctx context.Context) bool {
// Look for the correct Workload Partition annotation in
// a crio configuration file on the target node
Expand Down

0 comments on commit 4e72640

Please sign in to comment.