Skip to content

Commit

Permalink
Added node inspector for inspecting nodes configuration
Browse files Browse the repository at this point in the history
On hypershift there is no MCO, hence there are no machine-config-daemon pods.
A different resolution is needed for accessing the underlying node for inspecting configurations.
This commit introduces a node inspector implemented as a daemonset.
Upon execution of test suites, a pod with elevated privileges and host filesystem mounted will be deployed on every node.
Also I have added Z-deconfig suite ('Z' prefix, will guarantee that it will be the last suite run) that will be used for cleanup.
This API will be used for both hypershift and non-hypershift systems.

Signed-off-by: Ronny Baturov <[email protected]>
  • Loading branch information
rbaturov committed Apr 15, 2024
1 parent 87d8185 commit 7473d7c
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 5 deletions.
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ pao-functests: cluster-label-worker-cnf pao-functests-only
pao-functests-only:
@echo "Cluster Version"
hack/show-cluster-version.sh
hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/1_performance test/e2e/performanceprofile/functests/6_mustgather_testing test/e2e/performanceprofile/functests/10_performance_ppc" -p "-v -r --fail-fast --flake-attempts=2 --junit-report=report.xml" -m "Running Functional Tests"
hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/1_performance test/e2e/performanceprofile/functests/6_mustgather_testing test/e2e/performanceprofile/functests/10_performance_ppc test/e2e/performanceprofile/functests/Z_deconfig/deconfig.go" -p "-v -r --fail-fast --flake-attempts=2 --junit-report=report.xml" -m "Running Functional Tests"

.PHONY: pao-functests-updating-profile
pao-functests-updating-profile: cluster-label-worker-cnf pao-functests-update-only
Expand All @@ -231,7 +231,7 @@ pao-functests-updating-profile: cluster-label-worker-cnf pao-functests-update-on
pao-functests-update-only:
@echo "Cluster Version"
hack/show-cluster-version.sh
hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/2_performance_update test/e2e/performanceprofile/functests/3_performance_status test/e2e/performanceprofile/functests/7_performance_kubelet_node test/e2e/performanceprofile/functests/9_reboot" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functional Tests"
hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/2_performance_update test/e2e/performanceprofile/functests/3_performance_status test/e2e/performanceprofile/functests/7_performance_kubelet_node test/e2e/performanceprofile/functests/9_reboot test/e2e/performanceprofile/functests/Z_deconfig/deconfig.go" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functional Tests"

.PHONY: pao-functests-performance-workloadhints
pao-functests-performance-workloadhints: cluster-label-worker-cnf pao-functests-performance-workloadhints-only
Expand All @@ -240,19 +240,19 @@ pao-functests-performance-workloadhints: cluster-label-worker-cnf pao-functests-
pao-functests-performance-workloadhints-only:
@echo "Cluster Version"
hack/show-cluster-version.sh
hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/8_performance_workloadhints" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functional WorkloadHints Tests"
hack/run-test.sh -t "test/e2e/performanceprofile/functests/0_config test/e2e/performanceprofile/functests/8_performance_workloadhints test/e2e/performanceprofile/functests/Z_deconfig/deconfig.go" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functional WorkloadHints Tests"

.PHONY: pao-functests-latency-testing
pao-functests-latency-testing: dist-latency-tests
@echo "Cluster Version"
hack/show-cluster-version.sh
hack/run-test.sh -t "./test/e2e/performanceprofile/functests/0_config ./test/e2e/performanceprofile/functests/5_latency_testing" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functionalconfiguration latency Tests"
hack/run-test.sh -t "./test/e2e/performanceprofile/functests/0_config ./test/e2e/performanceprofile/functests/5_latency_testing test/e2e/performanceprofile/functests/Z_deconfig/deconfig.go" -p "-v -r --fail-fast --flake-attempts=2 --timeout=5h --junit-report=report.xml" -m "Running Functionalconfiguration latency Tests"

.PHONY: pao-functests-mixedcpus
pao-functests-mixedcpus:
@echo "Cluster Version"
hack/show-cluster-version.sh
hack/run-test.sh -t "./test/e2e/performanceprofile/functests/0_config ./test/e2e/performanceprofile/functests/11_mixedcpus" -p "-v -r --fail-fast --flake-attempts=2 --junit-report=report.xml" -m "Running MixedCPUs Tests"
hack/run-test.sh -t "./test/e2e/performanceprofile/functests/0_config ./test/e2e/performanceprofile/functests/11_mixedcpus test/e2e/performanceprofile/functests/Z_deconfig/deconfig.go" -p "-v -r --fail-fast --flake-attempts=2 --junit-report=report.xml" -m "Running MixedCPUs Tests"

.PHONY: pao-functests-hypershift
pao-functests-hypershift:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package __performance_config_test

import (
"context"
"flag"
"log"
"os"
Expand All @@ -14,6 +15,7 @@ import (
. "github.com/onsi/ginkgo/v2"
"github.com/onsi/ginkgo/v2/reporters"
. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/api/errors"

ctrllog "sigs.k8s.io/controller-runtime/pkg/log"

Expand All @@ -22,7 +24,10 @@ import (

testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/images"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/k8sreporter"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/namespaces"
nodeInspector "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/node_inspector"
)

var (
Expand Down Expand Up @@ -59,6 +64,13 @@ func TestPerformanceConfig(t *testing.T) {

var _ = BeforeSuite(func() {
Expect(testclient.ClientsEnabled).To(BeTrue())
// create test namespace
if err := testclient.Client.Create(context.TODO(), namespaces.NodeInspectorNamespace); !errors.IsAlreadyExists(err) {
Expect(err).ToNot(HaveOccurred())
}

err := nodeInspector.Create(testclient.DataPlaneClient, testutils.NodeInspectorNamespace, testutils.NodeInspectorName, images.Test())
Expect(err).ToNot(HaveOccurred())

})

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package Z_deconfig_test

import (
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestZDeconfig(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "ZDeconfig Suite")
}
27 changes: 27 additions & 0 deletions test/e2e/performanceprofile/functests/Z_deconfig/deconfig.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package Z_deconfig

import (
"context"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
nodeInspector "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/node_inspector"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/namespaces"
)

// This test suite is designed to perform cleanup actions that should occur after all test suites have been executed.

var _ = Describe("Deconfig", func() {
It("Should delete the node inspector and its namespace", func() {
err := nodeInspector.Delete(testclient.Client, testutils.NodeInspectorNamespace, testutils.NodeInspectorName)
Expect(err).ToNot(HaveOccurred())
err = testclient.DataPlaneClient.Delete(context.TODO(), namespaces.NodeInspectorNamespace)
Expect(err).ToNot(HaveOccurred())
err = namespaces.WaitForDeletion(testutils.NodeInspectorNamespace, 5*time.Minute)
Expect(err).ToNot(HaveOccurred())
})
})
6 changes: 6 additions & 0 deletions test/e2e/performanceprofile/functests/utils/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ const (
// NamespaceTesting contains the name of the testing namespace
NamespaceTesting = "performance-addon-operators-testing"
)
const (
// NodeInspectorName contains the name of node inspector name
NodeInspectorName = "node-inspector"
// NodeInspectorNamespace contains the name of node inspector namespace
NodeInspectorNamespace = "node-inspector-ns"
)

const (
// FilePathKubeletConfig contains the kubelet.conf file path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ var TestingNamespace = &corev1.Namespace{
},
}

// NodeInspectorNamespace is the namespace used for deploying a daemonset that will be used to executing commands on nodes.
var NodeInspectorNamespace = &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: testutils.NodeInspectorNamespace,
},
}

// WaitForDeletion waits until the namespace will be removed from the cluster
func WaitForDeletion(name string, timeout time.Duration) error {
key := types.NamespacedName{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
package node_inspector

import (
"context"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/daemonset"
)

const serviceAccountSuffix = "sa"
const clusterRoleSuffix = "cr"
const clusterRoleBindingSuffix = "crb"

func Create(cli client.Client, namespace, name, image string) error {
serviceAccountName := name + serviceAccountSuffix
sa := createServiceAccount(serviceAccountName, namespace)
if err := cli.Create(context.Background(), sa); err != nil && !errors.IsAlreadyExists(err) {
return err
}
clusterRoleName := name + clusterRoleSuffix
cr := createClusterRole(clusterRoleName)
if err := cli.Create(context.Background(), cr); err != nil && !errors.IsAlreadyExists(err) {
return err
}
clusterRoleBindingName := name + clusterRoleBindingSuffix
rb := createClusterRoleBinding(clusterRoleBindingName, namespace, serviceAccountName, clusterRoleName)
if err := cli.Create(context.Background(), rb); err != nil && !errors.IsAlreadyExists(err) {
return err
}
ds := createDaemonSet(name, namespace, serviceAccountName, image)
if err := cli.Create(context.Background(), ds); err != nil && !errors.IsAlreadyExists(err) {
return err
}
if err := daemonset.WaitToBeRunning(cli, namespace, name); err != nil {
return err
}

return nil
}

func Delete(cli client.Client, namespace, name string) error {
ds := &appsv1.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}}
if err := cli.Delete(context.Background(), ds); err != nil && !errors.IsNotFound(err) {
return err
}
sa := &corev1.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: name + serviceAccountSuffix, Namespace: namespace}}
if err := cli.Delete(context.Background(), sa); err != nil && !errors.IsNotFound(err) {
return err
}
crb := &rbacv1.ClusterRoleBinding{ObjectMeta: metav1.ObjectMeta{Name: name + clusterRoleBindingSuffix, Namespace: namespace}}
if err := cli.Delete(context.Background(), crb); err != nil && !errors.IsNotFound(err) {
return err
}
cr := &rbacv1.ClusterRole{ObjectMeta: metav1.ObjectMeta{Name: name + clusterRoleSuffix}}
if err := cli.Delete(context.Background(), cr); err != nil && !errors.IsNotFound(err) {
return err
}
return nil
}

func IsRunning(cli client.Client, namespace, name string) (bool,error){
return daemonset.IsRunning(cli, namespace, name)
}

func createDaemonSet(name, namespace, serviceAccountName, image string) *appsv1.DaemonSet {
MountPropagationHostToContainer := corev1.MountPropagationHostToContainer
return &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
Labels: map[string]string{
"name": name,
},
},
Spec: appsv1.DaemonSetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"name": name,
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
"target.workload.openshift.io/management": `{"effect": "PreferredDuringScheduling"}`,
},
Labels: map[string]string{
"name": name,
},
},
Spec: corev1.PodSpec{
HostPID: true,
HostNetwork: true,
ServiceAccountName: serviceAccountName,
TerminationGracePeriodSeconds: pointer.Int64(0),
NodeSelector: map[string]string{"kubernetes.io/os": "linux"},
Containers: []corev1.Container{
{
Name: "node-daemon",
Image: image,
Command: []string{"/bin/bash", "-c", "sleep INF"},
ImagePullPolicy: corev1.PullAlways,
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
"cpu": resource.MustParse("20m"),
"memory": resource.MustParse("50Mi"),
},
},
SecurityContext: &corev1.SecurityContext{
Privileged: pointer.Bool(true),
ReadOnlyRootFilesystem: pointer.Bool(true),
},
VolumeMounts: []corev1.VolumeMount{
{
MountPath: "/rootfs",
Name: "rootfs",
MountPropagation: &MountPropagationHostToContainer,
},
},
},
},
Volumes: []corev1.Volume{
{
Name: "rootfs",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/",
},
},
},
},
Tolerations: []corev1.Toleration{
{
Operator: corev1.TolerationOpExists,
},
},
},
},
},
}
}

func createServiceAccount(name, namespace string) *corev1.ServiceAccount {
return &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
}
}

func createClusterRole(name string) *rbacv1.ClusterRole {
return &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{""},
Resources: []string{"nodes"},
Verbs: []string{"get", "list", "watch"},
},
{
APIGroups: []string{"security.openshift.io"},
ResourceNames: []string{"privileged"},
Resources: []string{"securitycontextconstraints"},
Verbs: []string{"use"},
},
},
}
}

func createClusterRoleBinding(name, namespace, serviceAccountName, clusterRoleName string) *rbacv1.RoleBinding {
return &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
Subjects: []rbacv1.Subject{
{
Kind: "ServiceAccount",
Name: serviceAccountName,
Namespace: namespace,
},
},
RoleRef: rbacv1.RoleRef{
Kind: "ClusterRole",
Name: clusterRoleName,
APIGroup: "rbac.authorization.k8s.io",
},
}
}

0 comments on commit 7473d7c

Please sign in to comment.