-
Notifications
You must be signed in to change notification settings - Fork 105
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OCPBUGS-35911: E2E: Add test to verify runc process excludes the cpus used by pod. #1088
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ package __performance | |
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"fmt" | ||
"os" | ||
"regexp" | ||
|
@@ -50,6 +51,34 @@ var profile *performancev2.PerformanceProfile | |
const restartCooldownTime = 1 * time.Minute | ||
const cgroupRoot string = "/sys/fs/cgroup" | ||
|
||
type CPUVals struct { | ||
CPUs string `json:"cpus"` | ||
} | ||
|
||
type CPUResources struct { | ||
CPU CPUVals `json:"cpu"` | ||
} | ||
|
||
type LinuxResources struct { | ||
Resources CPUResources `json:"resources"` | ||
} | ||
|
||
type Process struct { | ||
Args []string `json:"args"` | ||
} | ||
|
||
type Annotations struct { | ||
ContainerName string `json:"io.kubernetes.container.name"` | ||
PodName string `json:"io.kubernetes.pod.name"` | ||
} | ||
|
||
type ContainerConfig struct { | ||
Process Process `json:"process"` | ||
Hostname string `json:"hostname"` | ||
Annotations Annotations `json:"annotations"` | ||
Linux LinuxResources `json:"linux"` | ||
} | ||
|
||
var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { | ||
var ( | ||
balanceIsolated bool | ||
|
@@ -892,8 +921,156 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() { | |
}) | ||
}) | ||
|
||
Context("Check container runtimes cpu usage", Label(string(label.OpenShift)), func() { | ||
var guaranteedPod, bestEffortPod *corev1.Pod | ||
var guaranteedPodCpus, guaranteedInitPodCpus cpuset.CPUSet | ||
var bestEffortPodCpus, bestEffortInitPodCpus cpuset.CPUSet | ||
|
||
It("[test_id: 74461] Verify that runc excludes the cpus used by guaranteed pod", func() { | ||
SargunNarula marked this conversation as resolved.
Show resolved
Hide resolved
|
||
By("Creating a guaranteed pod") | ||
guaranteedPod = makePod(ctx, workerRTNode, true) | ||
err := testclient.Client.Create(ctx, guaranteedPod) | ||
Expect(err).ToNot(HaveOccurred(), "Failed to create guaranteed pod") | ||
defer func() { | ||
if guaranteedPod != nil { | ||
testlog.Infof("deleting pod %q", guaranteedPod.Name) | ||
deleteTestPod(ctx, guaranteedPod) | ||
} | ||
}() | ||
|
||
SargunNarula marked this conversation as resolved.
Show resolved
Hide resolved
|
||
By("Waiting for guaranteed pod to be ready") | ||
_, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(guaranteedPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute) | ||
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time") | ||
Expect(guaranteedPod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed), "Guaranteed pod does not have the correct QOSClass") | ||
testlog.Infof("Guaranteed pod %s/%s was successfully created", guaranteedPod.Namespace, guaranteedPod.Name) | ||
|
||
By("Creating a best-effort pod") | ||
bestEffortPod = makePod(ctx, workerRTNode, false) | ||
err = testclient.Client.Create(ctx, bestEffortPod) | ||
SargunNarula marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Expect(err).ToNot(HaveOccurred(), "Failed to create best-effort pod") | ||
defer func() { | ||
if bestEffortPod != nil { | ||
testlog.Infof("deleting pod %q", bestEffortPod.Name) | ||
deleteTestPod(ctx, bestEffortPod) | ||
} | ||
}() | ||
|
||
By("Waiting for best-effort pod to be ready") | ||
_, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(bestEffortPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute) | ||
Expect(err).ToNot(HaveOccurred(), "Best-effort pod did not become ready in time") | ||
testlog.Infof("BestEffort pod %s/%s was successfully created", bestEffortPod.Namespace, bestEffortPod.Name) | ||
|
||
By("Getting Information for guaranteed POD containers") | ||
GuPods := getConfigJsonInfo(guaranteedPod, "test", workerRTNode) | ||
for _, pod := range GuPods { | ||
if pod.Annotations.ContainerName == "test" { | ||
guaranteedPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) | ||
} else if pod.Annotations.ContainerName == "POD" { | ||
guaranteedInitPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) | ||
} | ||
Expect(err).ToNot(HaveOccurred(), "Failed to parse GU POD cpus") | ||
} | ||
|
||
By("Getting Information for BestEffort POD containers") | ||
BEPods := getConfigJsonInfo(bestEffortPod, "test", workerRTNode) | ||
for _, pod := range BEPods { | ||
if pod.Annotations.ContainerName == "test" { | ||
bestEffortPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) | ||
} else if pod.Annotations.ContainerName == "POD" { | ||
bestEffortInitPodCpus, err = cpuset.Parse(pod.Linux.Resources.CPU.CPUs) | ||
} | ||
SargunNarula marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Expect(err).ToNot(HaveOccurred(), "Failed to parse BE POD cpus") | ||
} | ||
|
||
By("Validating CPU allocation for Guaranteed and Best-Effort pod containers") | ||
isolatedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Isolated)) | ||
Expect(err).ToNot(HaveOccurred(), "Failed to parse isolated CPU set from performance profile") | ||
reservedCpus, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved)) | ||
Expect(err).ToNot(HaveOccurred(), "Failed to parse reserved CPU set from performance profile") | ||
|
||
Expect(guaranteedInitPodCpus.IsSubsetOf(reservedCpus)). | ||
To(BeTrue(), "Guaranteed Init pod CPUs (%s) are not strictly within the reserved set (%s)", guaranteedInitPodCpus, reservedCpus) | ||
Expect(guaranteedInitPodCpus.IsSubsetOf(isolatedCpus)). | ||
To(BeFalse(), "Guaranteed Init pod CPUs (%s) are within the isolated cpu set (%s)", guaranteedInitPodCpus, isolatedCpus) | ||
Expect(guaranteedPodCpus.IsSubsetOf(isolatedCpus)). | ||
To(BeTrue(), "Guaranteed pod CPUs (%s) are not strictly within the isolated set (%s)", guaranteedPodCpus, isolatedCpus) | ||
|
||
availableForBestEffort := isolatedCpus.Union(reservedCpus).Difference(guaranteedPodCpus) | ||
Expect(bestEffortInitPodCpus.IsSubsetOf(reservedCpus)). | ||
To(BeTrue(), "Best-Effort Init pod CPUs (%s) include CPUs not allowed (%s)", bestEffortInitPodCpus, availableForBestEffort) | ||
Expect(bestEffortPodCpus.IsSubsetOf(availableForBestEffort)). | ||
To(BeTrue(), "Best-Effort pod CPUs (%s) include CPUs not allowed (%s)", bestEffortPodCpus, availableForBestEffort) | ||
}) | ||
}) | ||
|
||
}) | ||
|
||
func extractConfigInfo(output string) (*ContainerConfig, error) { | ||
var config ContainerConfig | ||
output = strings.TrimSpace(output) | ||
err := json.Unmarshal([]byte(output), &config) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to unmarshal config.json: %v", err) | ||
} | ||
return &config, nil | ||
} | ||
|
||
func getConfigJsonInfo(pod *corev1.Pod, containerName string, workerRTNode *corev1.Node) []*ContainerConfig { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is the best we can do but at the same time AFAIK this is not sufficient to ensure that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One approach to testing is to verify whether Alternatively, a tracing tool like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Well, does the process which writes your test seems fine for workload, but there are open questions for the infra proper
I agree, but the problem here is first and foremost testing the right thing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ffromani Do you have any other way in mind? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To ensure the check is robust, I have added another There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
var pods []*ContainerConfig | ||
path := "/rootfs/var/lib/containers/storage/overlay-containers/" | ||
podName := pod.Name | ||
cmd := []string{ | ||
"/bin/bash", "-c", | ||
fmt.Sprintf( | ||
`find %s -type f -exec grep -lP '\"io.kubernetes.pod.name\": \"%s\"' {} \; -exec grep -l '\"io.kubernetes.container.name\": \"%s\"' {} \; | sort -u`, | ||
path, podName, containerName, | ||
), | ||
} | ||
output, err := nodes.ExecCommand(context.TODO(), workerRTNode, cmd) | ||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to search for config.json with podName %s and containerName %s", podName, containerName)) | ||
filePaths := strings.Split(string(output), "\n") | ||
for _, filePath := range filePaths { | ||
if filePath == "" { | ||
continue | ||
} | ||
cmd = []string{"/bin/bash", "-c", fmt.Sprintf("cat %s", filePath)} | ||
output, err = nodes.ExecCommand(context.TODO(), workerRTNode, cmd) | ||
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Failed to read config.json for container : %s", filePath)) | ||
|
||
configData := testutils.ToString(output) | ||
config, err := extractConfigInfo(configData) | ||
if err != nil { | ||
testlog.Errorf("Error extracting config info:", err) | ||
continue | ||
} | ||
pods = append(pods, config) | ||
testlog.Infof("Pod Name: %s", config.Annotations.PodName) | ||
testlog.Infof("Container Name: %s", config.Annotations.ContainerName) | ||
testlog.Infof("Hostname: %s", config.Hostname) | ||
testlog.Infof("Arguments: %s", config.Process.Args) | ||
testlog.Infof("CPUs: %s", config.Linux.Resources.CPU.CPUs) | ||
} | ||
return pods | ||
} | ||
|
||
func makePod(ctx context.Context, workerRTNode *corev1.Node, guaranteed bool) *corev1.Pod { | ||
testPod := pods.GetTestPod() | ||
testPod.Namespace = testutils.NamespaceTesting | ||
testPod.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name} | ||
if guaranteed { | ||
testPod.Spec.Containers[0].Resources = corev1.ResourceRequirements{ | ||
Limits: corev1.ResourceList{ | ||
corev1.ResourceCPU: resource.MustParse("2"), | ||
corev1.ResourceMemory: resource.MustParse("200Mi"), | ||
}, | ||
} | ||
} | ||
profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels) | ||
runtimeClass := components.GetComponentName(profile.Name, components.ComponentNamePrefix) | ||
testPod.Spec.RuntimeClassName = &runtimeClass | ||
return testPod | ||
} | ||
|
||
func checkForWorkloadPartitioning(ctx context.Context) bool { | ||
// Look for the correct Workload Partition annotation in | ||
// a crio configuration file on the target node | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we ever use or need init containers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Init containers here refer to the
runc
container used to initialize and deploy the main pod container.