Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-35911: E2E: Add test to verify runc process excludes the cpus used by pod. #1088

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions test/e2e/performanceprofile/functests/1_performance/cpu_management.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package __performance

import (
"context"
"encoding/json"
"fmt"
"os"
"regexp"
Expand Down Expand Up @@ -50,6 +51,34 @@ var profile *performancev2.PerformanceProfile
const restartCooldownTime = 1 * time.Minute
const cgroupRoot string = "/sys/fs/cgroup"

type CPUVals struct {
CPUs string `json:"cpus"`
}

type CPUResources struct {
CPU CPUVals `json:"cpu"`
}

type LinuxResources struct {
Resources CPUResources `json:"resources"`
}

type Process struct {
Args []string `json:"args"`
}

type Annotations struct {
ContainerName string `json:"io.kubernetes.container.name"`
PodName string `json:"io.kubernetes.pod.name"`
}

type ContainerConfig struct {
Process Process `json:"process"`
Hostname string `json:"hostname"`
Annotations Annotations `json:"annotations"`
Linux LinuxResources `json:"linux"`
}

var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
var (
balanceIsolated bool
Expand Down Expand Up @@ -892,8 +921,156 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
})
})

Context("Check container runtimes cpu usage", Label(string(label.OpenShift)), func() {
var guaranteedPod, bestEffortPod *corev1.Pod
var guaranteedPodCpus, guaranteedInitPodCpus cpuset.CPUSet
var bestEffortPodCpus, bestEffortInitPodCpus cpuset.CPUSet
Comment on lines +926 to +927
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we ever use or need init containers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Init containers here refer to the runc container used to initialize and deploy the main pod container.


It("[test_id: 74461] Verify that runc excludes the cpus used by guaranteed pod", func() {
SargunNarula marked this conversation as resolved.
Show resolved Hide resolved
By("Creating a guaranteed pod")
guaranteedPod = makePod(ctx, workerRTNode, true)
err := testclient.Client.Create(ctx, guaranteedPod)
Expect(err).ToNot(HaveOccurred(), "Failed to create guaranteed pod")
defer func() {
if guaranteedPod != nil {
testlog.Infof("deleting pod %q", guaranteedPod.Name)
deleteTestPod(ctx, guaranteedPod)
}
}()

SargunNarula marked this conversation as resolved.
Show resolved Hide resolved
By("Waiting for guaranteed pod to be ready")
_, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(guaranteedPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")
Expect(guaranteedPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed), "Guaranteed pod does not have the correct QOSClass")
testlog.Infof("Guaranteed pod %s/%s was successfully created", guaranteedPod.Namespace, guaranteedPod.Name)

By("Creating a best-effort pod")
bestEffortPod = makePod(ctx, workerRTNode, false)
err = testclient.Client.Create(ctx, bestEffortPod)
SargunNarula marked this conversation as resolved.
Show resolved Hide resolved
Expect(err).ToNot(HaveOccurred(), "Failed to create best-effort pod")
defer func() {
if bestEffortPod != nil {
testlog.Infof("deleting pod %q", bestEffortPod.Name)
deleteTestPod(ctx, bestEffortPod)
}
}()

By("Waiting for best-effort pod to be ready")
_, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(bestEffortPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Best-effort pod did not become ready in time")
testlog.Infof("BestEffort pod %s/%s was successfully created", bestEffortPod.Namespace, bestEffortPod.Name)

By("Getting Information for guaranteed POD containers")
GuPods := getConfigJsonInfo(guaranteedPod, "test", workerRTNode)
for _, pod := range GuPods {
if pod.Annotations.ContainerName == "test" {
guaranteedPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs)
} else if pod.Annotations.ContainerName == "POD" {
guaranteedInitPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs)
}
Expect(err).ToNot(HaveOccurred(), "Failed to parse GU POD cpus")
}

By("Getting Information for BestEffort POD containers")
BEPods := getConfigJsonInfo(bestEffortPod, "test", workerRTNode)
for _, pod := range BEPods {
if pod.Annotations.ContainerName == "test" {
bestEffortPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs)
} else if pod.Annotations.ContainerName == "POD" {
bestEffortInitPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs)
}
SargunNarula marked this conversation as resolved.
Show resolved Hide resolved
Expect(err).ToNot(HaveOccurred(), "Failed to parse BE POD cpus")
}

By("Validating CPU allocation for Guaranteed and Best-Effort pod containers")
isolatedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Isolated))
Expect(err).ToNot(HaveOccurred(), "Failed to parse isolated CPU set from performance profile")
reservedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved))
Expect(err).ToNot(HaveOccurred(), "Failed to parse reserved CPU set from performance profile")

Expect(guaranteedInitPodCpus.IsSubsetOf(reservedCpus)).
To(BeTrue(), "Guaranteed Init pod CPUs (%s) are not strictly within the reserved set (%s)", guaranteedInitPodCpus, reservedCpus)
Expect(guaranteedInitPodCpus.IsSubsetOf(isolatedCpus)).
To(BeFalse(), "Guaranteed Init pod CPUs (%s) are within the isolated cpu set (%s)", guaranteedInitPodCpus, isolatedCpus)
Expect(guaranteedPodCpus.IsSubsetOf(isolatedCpus)).
To(BeTrue(), "Guaranteed pod CPUs (%s) are not strictly within the isolated set (%s)", guaranteedPodCpus, isolatedCpus)

availableForBestEffort := isolatedCpus.Union(reservedCpus).Difference(guaranteedPodCpus)
Expect(bestEffortInitPodCpus.IsSubsetOf(reservedCpus)).
To(BeTrue(), "Best-Effort Init pod CPUs (%s) include CPUs not allowed (%s)", bestEffortInitPodCpus, availableForBestEffort)
Expect(bestEffortPodCpus.IsSubsetOf(availableForBestEffort)).
To(BeTrue(), "Best-Effort pod CPUs (%s) include CPUs not allowed (%s)", bestEffortPodCpus, availableForBestEffort)
})
})

})

func extractConfigInfo(output string) (*ContainerConfig, error) {
var config ContainerConfig
output = strings.TrimSpace(output)
err := json.Unmarshal([]byte(output), &config)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal config.json: %v", err)
}
return &config, nil
}

func getConfigJsonInfo(pod *corev1.Pod, containerName string, workerRTNode *corev1.Node) []*ContainerConfig {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the best we can do but at the same time AFAIK this is not sufficient to ensure that runc (or crun) never actually run on any isolated CPU.
If, as IIRC, the goal is to test that crun (or runc) never run on isolated CPU, then this test helps but is not sufficient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One approach to testing is to verify whether runc/crun isolates itself from the isolated CPUs by following the method defined in the OCI specification, which involves generating a config.json and adhering to the specified CPUs based on the applied profile.

Alternatively, a tracing tool like bpftrace can be used to monitor all runc calls and inspect their CPU affinity. However, this method is quite invasive and may be challenging to implement IMO.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One approach to testing is to verify whether runc/crun isolates itself from the isolated CPUs by following the method defined in the OCI specification, which involves generating a config.json and adhering to the specified CPUs based on the applied profile.

Well, does the process which writes config.json run only on reserved CPUs? And does crun (or runc) honor the settings on config.json when setting its own affinity? IOW, who decides and who enforces that runc (or crun) only runs on reserved CPUs?

your test seems fine for workload, but there are open questions for the infra proper

Alternatively, a tracing tool like bpftrace can be used to monitor all runc calls and inspect their CPU affinity. However, this method is quite invasive and may be challenging to implement IMO.

I agree, but the problem here is first and foremost testing the right thing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ffromani Do you have any other way in mind?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To ensure the check is robust, I have added another Expect to verify whether it honors the provided CPUs through the profile. You can review the change here: Line The latest commit addresses your concerns.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, I'll defer the decision to @yanirq and @Tal-or

var pods []*ContainerConfig
path := "/rootfs/var/lib/containers/storage/overlay-containers/"
podName := pod.Name
cmd := []string{
"/bin/bash", "-c",
fmt.Sprintf(
`find %s -type f -exec grep -lP '\"io.kubernetes.pod.name\": \"%s\"' {} \; -exec grep -l '\"io.kubernetes.container.name\": \"%s\"' {} \; | sort -u`,
path, podName, containerName,
),
}
output, err := nodes.ExecCommand(context.TODO(), workerRTNode, cmd)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to search for config.json with podName %s and containerName %s", podName, containerName))
filePaths := strings.Split(string(output), "\n")
for _, filePath := range filePaths {
if filePath == "" {
continue
}
cmd = []string{"/bin/bash", "-c", fmt.Sprintf("cat %s", filePath)}
output, err = nodes.ExecCommand(context.TODO(), workerRTNode, cmd)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to read config.json for container : %s", filePath))

configData := testutils.ToString(output)
config, err := extractConfigInfo(configData)
if err != nil {
testlog.Errorf("Error extracting config info:", err)
continue
}
pods = append(pods, config)
testlog.Infof("Pod Name: %s", config.Annotations.PodName)
testlog.Infof("Container Name: %s", config.Annotations.ContainerName)
testlog.Infof("Hostname: %s", config.Hostname)
testlog.Infof("Arguments: %s", config.Process.Args)
testlog.Infof("CPUs: %s", config.Linux.Resources.CPU.CPUs)
}
return pods
}

func makePod(ctx context.Context, workerRTNode *corev1.Node, guaranteed bool) *corev1.Pod {
testPod := pods.GetTestPod()
testPod.Namespace = testutils.NamespaceTesting
testPod.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name}
if guaranteed {
testPod.Spec.Containers[0].Resources = corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("200Mi"),
},
}
}
profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
runtimeClass := components.GetComponentName(profile.Name, components.ComponentNamePrefix)
testPod.Spec.RuntimeClassName = &runtimeClass
return testPod
}

func checkForWorkloadPartitioning(ctx context.Context) bool {
// Look for the correct Workload Partition annotation in
// a crio configuration file on the target node
Expand Down