Skip to content

Commit

Permalink
add soak test suit for state file
Browse files Browse the repository at this point in the history
  • Loading branch information
M00nF1sh committed Oct 20, 2022
1 parent 5a959aa commit c36075b
Show file tree
Hide file tree
Showing 10 changed files with 459 additions and 60 deletions.
249 changes: 249 additions & 0 deletions test/e2e/soak/single_node_pod_launch_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
package soak_test

import (
"context"
"encoding/json"
"fmt"
"github.com/aws/amazon-vpc-cni-k8s/pkg/ipamd/datastore"
"github.com/aws/amazon-vpc-cni-k8s/test/framework"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"
awssdk "github.com/aws/aws-sdk-go/aws"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/pkg/errors"
appsV1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metaV1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/client-go/util/retry"
"os"
"sigs.k8s.io/controller-runtime/pkg/client"
"strconv"
"time"
)

const (
defaultSoakTestNSName = "cni-automation-soak"
stateFileHostDir = "/var/run/aws-node"
stateFileMountPath = "/var/run/aws-node"
stateFilePathname = "/var/run/aws-node/ipam.json"
)

var _ = Describe("launch Pod on single node", Serial, func() {
var (
nominatedNode corev1.Node
sandBoxNS *corev1.Namespace
)

BeforeEach(func(ctx context.Context) {
By("nominate node for testing", func() {
nodeList, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal)
Expect(err).NotTo(HaveOccurred())
numOfNodes := len(nodeList.Items)
Expect(numOfNodes).Should(BeNumerically(">", 1))
nominatedNode = nodeList.Items[numOfNodes-1]
f.Logger.Info("node nominated", "nodeName", nominatedNode.Name)
})

By("setup sandbox namespace", func() {
ns := &corev1.Namespace{
ObjectMeta: metaV1.ObjectMeta{
Name: defaultSoakTestNSName,
},
}
err := f.K8sClient.Create(ctx, ns)
Expect(err).NotTo(HaveOccurred())
sandBoxNS = ns
})
})

AfterEach(func(ctx context.Context) {
if sandBoxNS != nil {
By("teardown sandbox namespace", func() {
err := f.K8sClient.Delete(ctx, sandBoxNS)
Expect(err).NotTo(HaveOccurred())
err = f.K8sResourceManagers.NamespaceManager().WaitUntilNamespaceDeleted(ctx, sandBoxNS)
Expect(err).NotTo(HaveOccurred())
})
}
})

Describe("state file based checkpoint", func() {
// we create a Pod with state file mount to inspect the content of state file
// this is future-proof as we will switch CNI to a minimal base image without shell tools.
var stateFileInspectorPod *corev1.Pod

BeforeEach(func(ctx context.Context) {
By("setup state file inspector pod", func() {
volume := corev1.Volume{
Name: "run-dir",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: stateFileHostDir,
},
},
}
volumeMount := corev1.VolumeMount{
Name: "run-dir",
MountPath: stateFileMountPath,
}
pod := manifest.NewDefaultPodBuilder().
Namespace(sandBoxNS.Name).
Name("inspector").
Container(manifest.NewBusyBoxContainerBuilder().Build()).
NodeName(nominatedNode.Name).
MountVolume([]corev1.Volume{volume}, []corev1.VolumeMount{volumeMount}).
Build()
err := f.K8sClient.Create(ctx, pod)
Expect(err).NotTo(HaveOccurred())
stateFileInspectorPod = pod
err = f.K8sResourceManagers.PodManager().WaitUntilPodRunning(ctx, stateFileInspectorPod)
Expect(err).NotTo(HaveOccurred())
})
})

AfterEach(func(ctx context.Context) {
if stateFileInspectorPod != nil {
By("teardown state file inspector pod", func() {
err := f.K8sClient.Delete(ctx, stateFileInspectorPod)
Expect(err).NotTo(HaveOccurred())
err = f.K8sResourceManagers.PodManager().WaitUntilPodDeleted(ctx, stateFileInspectorPod)
Expect(err).NotTo(HaveOccurred())
})
}
})

// This test will set up pod and teardown pods by scale a busybox deployment in calculated steps.
// It expects the state file is eventually consist with pod state from APIServer.
It("should remain consistent with pods on node when setup/teardown normal pods", func(ctx context.Context) {
// TODO: eliminate the need of MAX_POD_PER_NODE env by automatically detect maxPod from instance type and CNI configuration
envMaxPodPerNode := os.Getenv("MAX_POD_PER_NODE")
if envMaxPodPerNode == "" {
Skip("MAX_POD_PER_NODE env not set")
}
maxPodPerNode, err := strconv.ParseInt(envMaxPodPerNode, 10, 64)
Expect(err).NotTo(HaveOccurred())

podsOnNominatedNode, err := listPodsWithNodeName(ctx, f, nominatedNode.Name)
Expect(err).NotTo(HaveOccurred())
availablePodCount := int(maxPodPerNode) - len(podsOnNominatedNode)
var deploymentReplicaSteps []int
for count := availablePodCount; count > 0; {
deploymentReplicaSteps = append(deploymentReplicaSteps, count)
count = count / 2
}
deploymentReplicaSteps = append(deploymentReplicaSteps, 0)
f.Logger.Info("planned deployment steps", "deploymentReplicaSteps", deploymentReplicaSteps)

var busyBoxDP *appsV1.Deployment
By("create deployment with 0 replicas", func() {
busyBoxDP = manifest.NewBusyBoxDeploymentBuilder().
Namespace(sandBoxNS.Name).
Name("busybox").
NodeName(nominatedNode.Name).
Replicas(0).
Build()
err := f.K8sClient.Create(ctx, busyBoxDP)
Expect(err).NotTo(HaveOccurred())
_, err = f.K8sResourceManagers.DeploymentManager().WaitUntilDeploymentReady(ctx, busyBoxDP)
Expect(err).NotTo(HaveOccurred())
})

Eventually(validateStateFileConsistency).WithContext(ctx).WithArguments(f, nominatedNode, stateFileInspectorPod).WithTimeout(1 * time.Minute).ShouldNot(HaveOccurred())
for _, replica := range deploymentReplicaSteps {
By(fmt.Sprintf("scale deployment to %d replicas", replica), func() {
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
busyBoxDPKey := utils.NamespacedName(busyBoxDP)
if err := f.K8sClient.Get(ctx, busyBoxDPKey, busyBoxDP); err != nil {
return err
}
oldBusyBoxDP := busyBoxDP.DeepCopy()
busyBoxDP.Spec.Replicas = awssdk.Int32(int32(replica))
return f.K8sClient.Patch(ctx, busyBoxDP, client.MergeFromWithOptions(oldBusyBoxDP, client.MergeFromWithOptimisticLock{}))
})
Expect(err).NotTo(HaveOccurred())
_, err = f.K8sResourceManagers.DeploymentManager().WaitUntilDeploymentReady(ctx, busyBoxDP)
Expect(err).NotTo(HaveOccurred())
Eventually(validateStateFileConsistency).WithContext(ctx).WithArguments(f, nominatedNode, stateFileInspectorPod).WithTimeout(1 * time.Minute).ShouldNot(HaveOccurred())
})
}

By("delete deployment", func() {
err := f.K8sClient.Delete(ctx, busyBoxDP)
Expect(err).NotTo(HaveOccurred())
err = f.K8sResourceManagers.DeploymentManager().WaitUntilDeploymentDeleted(ctx, busyBoxDP)
Expect(err).NotTo(HaveOccurred())
})
})
})
})

// validateStateFileConsistency validates the state file from inspectorPod matches pod state on node.
func validateStateFileConsistency(ctx context.Context, f *framework.Framework, nominatedNode corev1.Node, inspectorPod *corev1.Pod) error {
podsOnNominatedNode, err := listPodsWithNodeName(ctx, f, nominatedNode.Name)
if err != nil {
return err
}
stateFileContent, err := readFileFromPod(ctx, f, inspectorPod, stateFilePathname)
if err != nil {
return err
}
var checkpointData datastore.CheckpointData
if err := json.Unmarshal([]byte(stateFileContent), &checkpointData); err != nil {
return err
}

podIPByPodKey := make(map[string]string)
for _, pod := range podsOnNominatedNode {
if !pod.Spec.HostNetwork {
podKey := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
podIPByPodKey[podKey] = pod.Status.PodIP
}
}
f.Logger.Info("check state file consistency", "podIPByPodKey", podIPByPodKey, "checkpointData", checkpointData)
if len(podIPByPodKey) != len(checkpointData.Allocations) {
return errors.Errorf("allocation count don't match: %v/%v", len(podIPByPodKey), len(checkpointData.Allocations))
}
for podKey, podIP := range podIPByPodKey {
foundPodIPAllocation := false
for _, allocation := range checkpointData.Allocations {
if allocation.IPv4 == podIP || allocation.IPv6 == podIP {
if allocation.Metadata.K8SPodName != "" {
podKeyFromMetadata := fmt.Sprintf("%s/%s", allocation.Metadata.K8SPodNamespace, allocation.Metadata.K8SPodName)
if podKey != podKeyFromMetadata {
return errors.Errorf("allocation metadata don't match for podIP %v: %v/%v", podIP, podKey, podKeyFromMetadata)
}
}

foundPodIPAllocation = true
break
}
}
if !foundPodIPAllocation {
return errors.Errorf("allocation not found for pod %v: %v", podKey, podIP)
}
}
return nil
}

func listPodsWithNodeName(ctx context.Context, f *framework.Framework, nodeName string) ([]corev1.Pod, error) {
podList := &corev1.PodList{}
nodeNameSelector := fields.Set{"spec.nodeName": nodeName}.AsSelector()
if err := f.K8sClient.List(ctx, podList, client.MatchingFieldsSelector{Selector: nodeNameSelector}); err != nil {
return nil, err
}
return podList.Items, nil
}

func readFileFromPod(_ context.Context, f *framework.Framework, pod *corev1.Pod, filepath string) (string, error) {
command := []string{"cat", filepath}
stdOut, stdErr, err := f.K8sResourceManagers.PodManager().PodExec(pod.Namespace, pod.Name, command)
if err != nil {
return "", err
}
if stdErr != "" {
return "", errors.New(stdErr)
}
return stdOut, nil
}
20 changes: 20 additions & 0 deletions test/e2e/soak/soak_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package soak_test

import (
"testing"

"github.com/aws/amazon-vpc-cni-k8s/test/framework"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var f *framework.Framework

func TestSoak(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Soak Suite")
}

var _ = BeforeSuite(func() {
f = framework.New(framework.GlobalOptions)
})
31 changes: 5 additions & 26 deletions test/framework/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,18 @@
package framework

import (
"context"
"log"

eniConfig "github.com/aws/amazon-vpc-cni-k8s/pkg/apis/crd/v1alpha1"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/controller"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/helm"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/aws"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"
sgp "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1"
"github.com/go-logr/logr"
. "github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/clientcmd"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -39,6 +35,7 @@ type Framework struct {
CloudServices aws.Cloud
K8sResourceManagers k8s.ResourceManagers
InstallationManager controller.InstallationManager
Logger logr.Logger
}

func New(options Options) *Framework {
Expand All @@ -56,28 +53,9 @@ func New(options Options) *Framework {
eniConfig.AddToScheme(k8sSchema)
sgp.AddToScheme(k8sSchema)

stopChan := ctrl.SetupSignalHandler()
cache, err := cache.New(config, cache.Options{Scheme: k8sSchema})
Expect(err).NotTo(HaveOccurred())
err = cache.IndexField(context.TODO(), &v1.Event{}, "reason", func(o client.Object) []string {
return []string{o.(*v1.Event).Reason}
}) // default indexing only on ns, need this for ipamd_event_test
Expect(err).NotTo(HaveOccurred())
go func() {
cache.Start(stopChan)
}()
cache.WaitForCacheSync(stopChan)
realClient, err := client.New(config, client.Options{Scheme: k8sSchema})
k8sClient, err := client.New(config, client.Options{Scheme: k8sSchema})
Expect(err).NotTo(HaveOccurred())

k8sClient, err := client.NewDelegatingClient(client.NewDelegatingClientInput{
CacheReader: cache,
Client: realClient,
})
if err != nil {
log.Fatalf("failed to create delegation client: %v", err)
}

cloudConfig := aws.CloudConfig{Region: options.AWSRegion, VpcID: options.AWSVPCID,
EKSEndpoint: options.EKSEndpoint}

Expand All @@ -88,5 +66,6 @@ func New(options Options) *Framework {
K8sResourceManagers: k8s.NewResourceManager(k8sClient, k8sSchema, config),
InstallationManager: controller.NewDefaultInstallationManager(
helm.NewDefaultReleaseManager(options.KubeConfig)),
Logger: utils.NewGinkgoLogger(),
}
}
15 changes: 14 additions & 1 deletion test/framework/resources/k8s/manifest/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ type PodBuilder struct {
nodeName string
restartPolicy v1.RestartPolicy
nodeSelector map[string]string
volume []v1.Volume
volumeMount []v1.VolumeMount
}

func NewDefaultPodBuilder() *PodBuilder {
Expand Down Expand Up @@ -89,8 +91,14 @@ func (p *PodBuilder) RestartPolicy(policy v1.RestartPolicy) *PodBuilder {
return p
}

func (p *PodBuilder) MountVolume(volume []v1.Volume, volumeMount []v1.VolumeMount) *PodBuilder {
p.volume = volume
p.volumeMount = volumeMount
return p
}

func (p *PodBuilder) Build() *v1.Pod {
return &v1.Pod{
podSpec := &v1.Pod{
ObjectMeta: metaV1.ObjectMeta{
Name: p.name,
Namespace: p.namespace,
Expand All @@ -105,4 +113,9 @@ func (p *PodBuilder) Build() *v1.Pod {
NodeSelector: p.nodeSelector,
},
}
if len(p.volume) > 0 && len(p.volumeMount) > 0 {
podSpec.Spec.Volumes = p.volume
podSpec.Spec.Containers[0].VolumeMounts = p.volumeMount
}
return podSpec
}
Loading

0 comments on commit c36075b

Please sign in to comment.