Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

preparations for e2e tests on baremetal SNP #730

Merged
merged 15 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/actionlint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
self-hosted-runner:
labels:
- snp
1 change: 1 addition & 0 deletions .github/workflows/e2e_kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ jobs:
nix shell -L .#contrast.e2e --command ${{ matrix.test_name }}.test -test.v \
--image-replacements workspace/just.containerlookup \
--namespace-file workspace/e2e.namespace \
--platform aks-clh-snp \
--skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}"
- name: Cleanup
if: cancelled() && !inputs.skip-undeploy
Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/e2e_openssl_baremetal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: e2e test openssl baremetal

on:
workflow_dispatch:
inputs:
skip-undeploy:
description: "Skip undeploy"
required: false
type: boolean
default: false
pull_request:
paths-ignore:
- dev-docs/**
- docs/**
- rfc/**

env:
container_registry: ghcr.io/edgelesssys
DO_NOT_TRACK: 1

jobs:
test:
runs-on:
labels: snp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a GHA shortcoming, but it'd be very nice if we could make this test work on both SNP and TDX, without another duplication. But afaict, you cannot have dynamic values (e.g. an input) in runs-on. Not saying this PR should or can do anything about that, but just keeping it here as a note.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand this section in the docs correctly, this might work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would enable us to create tests that run on all platforms unconditionally, but still not one test that runs on one, selectable platform

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also run steps conditionally.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but this won't be a step. We could execute everything but the actual step on GH-hosted runners and then transfer files over, but I fear that this is going to have a higher total cost in the end.

permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- name: Log in to ghcr.io Container registry
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- uses: nicknovitski/nix-develop@a2060d116a50b36dfab02280af558e73ab52427d # v1.1.0
- name: Create justfile.env
run: |
cat <<EOF > justfile.env
container_registry=${{ env.container_registry }}
EOF
- name: Build and prepare deployments
run: |
just coordinator initializer openssl port-forwarder node-installer K3s-QEMU-SNP
- name: E2E Test
run: |
nix shell .#contrast.e2e --command openssl.test -test.v \
--image-replacements workspace/just.containerlookup \
--namespace-file workspace/e2e.namespace \
--platform K3s-QEMU-SNP \
--skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}"
- name: Cleanup
if: cancelled() && !inputs.skip-undeploy
run: |
kubectl delete ns "$(cat workspace/e2e.namespace)" --timeout 5m
1 change: 1 addition & 0 deletions .github/workflows/e2e_regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
nix shell -L .#contrast.e2e --command ${{ matrix.case }}.test -test.v \
--image-replacements workspace/just.containerlookup \
--namespace-file workspace/e2e.namespace \
--platform aks-clh-snp \
--skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}"
- name: Cleanup
if: cancelled() && !inputs.skip-undeploy
Expand Down
3 changes: 2 additions & 1 deletion cli/cmd/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,9 @@ func runGenerate(cmd *cobra.Command, args []string) error {
}

mnf.Policies = policyMap
// Only tell to user to fill in reference values if the manifest is not already valid.
if err := mnf.Validate(); err != nil {
return fmt.Errorf("validating manifest: %w", err)
fmt.Fprintf(cmd.OutOrStdout(), " Please fill in the reference values for %s\n", flags.referenceValuesPlatform.String())
msanft marked this conversation as resolved.
Show resolved Hide resolved
}

if flags.disableUpdates {
Expand Down
12 changes: 6 additions & 6 deletions e2e/genpolicy/genpolicy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,22 @@ import (
)

var (
imageReplacementsFile, namespaceFile string
skipUndeploy bool
imageReplacementsFile, namespaceFile, platformStr string
skipUndeploy bool
)

// TestGenpolicy runs regression tests for generated policies.
func TestGenpolicy(t *testing.T) {
// TODO(msanft): Make this configurable
platform := platforms.AKSCloudHypervisorSNP

testCases := kuberesource.GenpolicyRegressionTests()

platform, err := platforms.FromString(platformStr)
require.NoError(t, err)
runtimeHandler, err := manifest.RuntimeHandler(platform)
require.NoError(t, err)

for name, deploy := range testCases {
t.Run(name, func(t *testing.T) {
ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, skipUndeploy)
ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, platform, skipUndeploy)

ct.Init(t, kuberesource.PatchRuntimeHandlers([]any{deploy}, runtimeHandler))

Expand Down Expand Up @@ -75,6 +74,7 @@ func TestGenpolicy(t *testing.T) {
func TestMain(m *testing.M) {
flag.StringVar(&imageReplacementsFile, "image-replacements", "", "path to image replacements file")
flag.StringVar(&namespaceFile, "namespace-file", "", "file to store the namespace in")
flag.StringVar(&platformStr, "platform", "", "Deployment platform")
flag.BoolVar(&skipUndeploy, "skip-undeploy", false, "skip undeploy step in the test")
flag.Parse()

Expand Down
12 changes: 6 additions & 6 deletions e2e/getdents/getdents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@ const (
)

var (
imageReplacementsFile, namespaceFile string
skipUndeploy bool
imageReplacementsFile, namespaceFile, platformStr string
skipUndeploy bool
)

func TestGetDEnts(t *testing.T) {
ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, skipUndeploy)

// TODO(msanft): Make this configurable
platform := platforms.AKSCloudHypervisorSNP
platform, err := platforms.FromString(platformStr)
require.NoError(t, err)
ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, platform, skipUndeploy)

runtimeHandler, err := manifest.RuntimeHandler(platform)
require.NoError(t, err)
Expand Down Expand Up @@ -92,6 +91,7 @@ func TestGetDEnts(t *testing.T) {
func TestMain(m *testing.M) {
flag.StringVar(&imageReplacementsFile, "image-replacements", "", "path to image replacements file")
flag.StringVar(&namespaceFile, "namespace-file", "", "file to store the namespace in")
flag.StringVar(&platformStr, "platform", "", "Deployment platform")
flag.BoolVar(&skipUndeploy, "skip-undeploy", false, "skip undeploy step in the test")
flag.Parse()

Expand Down
10 changes: 7 additions & 3 deletions e2e/internal/contrasttest/contrasttest.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright 2024 Edgeless Systems GmbH
// SPDX-License-Identifier: AGPL-3.0-only

//go:build e2e

package contrasttest

import (
Expand Down Expand Up @@ -35,6 +37,7 @@ type ContrastTest struct {
WorkDir string
ImageReplacements map[string]string
ImageReplacementsFile string
Platform platforms.Platform
NamespaceFile string
SkipUndeploy bool
Kubeclient *kubeclient.Kubeclient
Expand All @@ -46,11 +49,12 @@ type ContrastTest struct {
}

// New creates a new contrasttest.T object bound to the given test.
func New(t *testing.T, imageReplacements, namespaceFile string, skipUndeploy bool) *ContrastTest {
func New(t *testing.T, imageReplacements, namespaceFile string, platform platforms.Platform, skipUndeploy bool) *ContrastTest {
return &ContrastTest{
Namespace: makeNamespace(t),
WorkDir: t.TempDir(),
ImageReplacementsFile: imageReplacements,
Platform: platform,
NamespaceFile: namespaceFile,
SkipUndeploy: skipUndeploy,
Kubeclient: kubeclient.NewForTest(t),
Expand Down Expand Up @@ -143,7 +147,7 @@ func (ct *ContrastTest) Generate(t *testing.T) {
args := append(
ct.commonArgs(),
"--image-replacements", ct.ImageReplacementsFile,
"--reference-values", "aks-clh-snp",
"--reference-values", ct.Platform.String(),
path.Join(ct.WorkDir, "resources.yaml"),
)

Expand Down Expand Up @@ -247,7 +251,7 @@ func (ct *ContrastTest) commonArgs() []string {
func (ct *ContrastTest) installRuntime(t *testing.T) {
require := require.New(t)

resources, err := kuberesource.Runtime(platforms.AKSCloudHypervisorSNP)
resources, err := kuberesource.Runtime(ct.Platform)
require.NoError(err)
resources = kuberesource.PatchImages(resources, ct.ImageReplacements)
resources = kuberesource.PatchNamespaces(resources, ct.Namespace)
Expand Down
113 changes: 63 additions & 50 deletions e2e/internal/kubeclient/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,63 +140,76 @@ func (c *Kubeclient) WaitForPod(ctx context.Context, namespace, name string) err
// WaitFor watches the given resource kind and blocks until the desired number of pods are
// ready or the context expires (is cancelled or times out).
func (c *Kubeclient) WaitFor(ctx context.Context, resource ResourceWaiter, namespace, name string) error {
watcher, err := resource.watcher(ctx, c.client, namespace, name)
if err != nil {
return err
}
// When the node-installer restarts K3s, the watcher fails. The watcher has
// a retry loop internally, but it only retries starting the request, once
// it has established a request and that request dies spuriously, the
// watcher doesn't reconnect. To fix this we add another retry loop.
retryCounter := 3

retryLoop:
msanft marked this conversation as resolved.
Show resolved Hide resolved
for {
evt, ok := <-watcher.ResultChan()
if !ok {
origErr := ctx.Err()
if origErr == nil {
return fmt.Errorf("watcher for %s %s/%s unexpectedly closed", resource.kind(), namespace, name)
}
logger := c.log.With("namespace", namespace)
logger.Error("resource did not become ready", "kind", resource, "name", name, "contextErr", ctx.Err())
if ctx.Err() != context.DeadlineExceeded {
return ctx.Err()
}
// Fetch and print debug information.
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()
pods, err := resource.getPods(ctx, c, namespace, name) //nolint:contextcheck // The parent context expired.
if err != nil {
logger.Error("could not fetch pods for resource", "kind", resource.kind(), "name", name, "error", err)
watcher, err := resource.watcher(ctx, c.client, namespace, name)
if err != nil {
return err
}

for {
evt, ok := <-watcher.ResultChan()
if !ok {
origErr := ctx.Err()
if origErr == nil {
retryCounter--
if retryCounter != 0 {
continue retryLoop
}
return fmt.Errorf("watcher for %s %s/%s unexpectedly closed", resource.kind(), namespace, name)
}
logger := c.log.With("namespace", namespace)
logger.Error("resource did not become ready", "kind", resource, "name", name, "contextErr", ctx.Err())
if ctx.Err() != context.DeadlineExceeded {
return ctx.Err()
}
// Fetch and print debug information.
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()
pods, err := resource.getPods(ctx, c, namespace, name) //nolint:contextcheck // The parent context expired.
if err != nil {
logger.Error("could not fetch pods for resource", "kind", resource.kind(), "name", name, "error", err)
return origErr
}
for _, pod := range pods {
if !isPodReady(&pod) {
logger.Debug("pod not ready", "name", pod.Name, "status", c.toJSON(pod.Status))
}
}
return origErr
}
for _, pod := range pods {
if !isPodReady(&pod) {
logger.Debug("pod not ready", "name", pod.Name, "status", c.toJSON(pod.Status))
switch evt.Type {
case watch.Added:
fallthrough
case watch.Modified:
pods, err := resource.getPods(ctx, c, namespace, name)
if err != nil {
return err
}
}
return origErr
}
switch evt.Type {
case watch.Added:
fallthrough
case watch.Modified:
pods, err := resource.getPods(ctx, c, namespace, name)
if err != nil {
return err
}
numPodsReady := 0
for _, pod := range pods {
if isPodReady(&pod) {
numPodsReady++
numPodsReady := 0
for _, pod := range pods {
if isPodReady(&pod) {
numPodsReady++
}
}
desiredPods, err := resource.numDesiredPods(evt.Object)
if err != nil {
return err
}
if desiredPods <= numPodsReady {
return nil
}
case watch.Deleted:
return fmt.Errorf("%s %s/%s was deleted while waiting for it", resource.kind(), namespace, name)
default:
return fmt.Errorf("unexpected watch event while waiting for %s %s/%s: type=%s, object=%#v", resource.kind(), namespace, name, evt.Type, evt.Object)
}
desiredPods, err := resource.numDesiredPods(evt.Object)
if err != nil {
return err
}
if desiredPods <= numPodsReady {
return nil
}
case watch.Deleted:
return fmt.Errorf("%s %s/%s was deleted while waiting for it", resource.kind(), namespace, name)
default:
return fmt.Errorf("unexpected watch event while waiting for %s %s/%s: type=%s, object=%#v", resource.kind(), namespace, name, evt.Type, evt.Object)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion e2e/internal/kubeclient/portforward.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (k *Kubeclient) WithForwardedPort(ctx context.Context, namespace, podName,
if funcErr == nil {
return nil
}
log.Error("port-forwarded func failed", "error", err)
log.Error("port-forwarded func failed", "error", funcErr)
select {
case err := <-errorCh:
log.Error("Encountered port forwarding error", "error", err)
Expand Down
Loading