diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 0000000000..246b8296cb --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,3 @@ +self-hosted-runner: + labels: + - snp diff --git a/.github/workflows/e2e_kubernetes.yaml b/.github/workflows/e2e_kubernetes.yaml index 7337c8ef6c..4d1b0e95e6 100644 --- a/.github/workflows/e2e_kubernetes.yaml +++ b/.github/workflows/e2e_kubernetes.yaml @@ -71,6 +71,7 @@ jobs: nix shell -L .#contrast.e2e --command ${{ matrix.test_name }}.test -test.v \ --image-replacements workspace/just.containerlookup \ --namespace-file workspace/e2e.namespace \ + --platform aks-clh-snp \ --skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}" - name: Cleanup if: cancelled() && !inputs.skip-undeploy diff --git a/.github/workflows/e2e_openssl_baremetal.yml b/.github/workflows/e2e_openssl_baremetal.yml new file mode 100644 index 0000000000..23c0a57009 --- /dev/null +++ b/.github/workflows/e2e_openssl_baremetal.yml @@ -0,0 +1,55 @@ +name: e2e test openssl baremetal + +on: + workflow_dispatch: + inputs: + skip-undeploy: + description: "Skip undeploy" + required: false + type: boolean + default: false + pull_request: + paths-ignore: + - dev-docs/** + - docs/** + - rfc/** + +env: + container_registry: ghcr.io/edgelesssys + DO_NOT_TRACK: 1 + +jobs: + test: + runs-on: + labels: snp + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + - name: Log in to ghcr.io Container registry + uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - uses: nicknovitski/nix-develop@a2060d116a50b36dfab02280af558e73ab52427d # v1.1.0 + - name: Create justfile.env + run: | + cat < justfile.env + container_registry=${{ env.container_registry }} + EOF + - name: Build and prepare deployments + run: | + just coordinator initializer openssl port-forwarder node-installer K3s-QEMU-SNP + - name: E2E Test + run: | + nix shell .#contrast.e2e --command openssl.test -test.v \ + --image-replacements workspace/just.containerlookup \ + --namespace-file workspace/e2e.namespace \ + --platform K3s-QEMU-SNP \ + --skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}" + - name: Cleanup + if: cancelled() && !inputs.skip-undeploy + run: | + kubectl delete ns "$(cat workspace/e2e.namespace)" --timeout 5m diff --git a/.github/workflows/e2e_regression.yml b/.github/workflows/e2e_regression.yml index 741537dd2b..ebb3d83a4d 100644 --- a/.github/workflows/e2e_regression.yml +++ b/.github/workflows/e2e_regression.yml @@ -66,6 +66,7 @@ jobs: nix shell -L .#contrast.e2e --command ${{ matrix.case }}.test -test.v \ --image-replacements workspace/just.containerlookup \ --namespace-file workspace/e2e.namespace \ + --platform aks-clh-snp \ --skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}" - name: Cleanup if: cancelled() && !inputs.skip-undeploy diff --git a/cli/cmd/generate.go b/cli/cmd/generate.go index cb89892709..f0a8fbcaa3 100644 --- a/cli/cmd/generate.go +++ b/cli/cmd/generate.go @@ -148,8 +148,9 @@ func runGenerate(cmd *cobra.Command, args []string) error { } mnf.Policies = policyMap + // Only tell to user to fill in reference values if the manifest is not already valid. if err := mnf.Validate(); err != nil { - return fmt.Errorf("validating manifest: %w", err) + fmt.Fprintf(cmd.OutOrStdout(), " Please fill in the reference values for %s\n", flags.referenceValuesPlatform.String()) } if flags.disableUpdates { diff --git a/e2e/genpolicy/genpolicy_test.go b/e2e/genpolicy/genpolicy_test.go index 477c41ef14..8939460e28 100644 --- a/e2e/genpolicy/genpolicy_test.go +++ b/e2e/genpolicy/genpolicy_test.go @@ -25,23 +25,22 @@ import ( ) var ( - imageReplacementsFile, namespaceFile string - skipUndeploy bool + imageReplacementsFile, namespaceFile, platformStr string + skipUndeploy bool ) // TestGenpolicy runs regression tests for generated policies. func TestGenpolicy(t *testing.T) { - // TODO(msanft): Make this configurable - platform := platforms.AKSCloudHypervisorSNP - testCases := kuberesource.GenpolicyRegressionTests() + platform, err := platforms.FromString(platformStr) + require.NoError(t, err) runtimeHandler, err := manifest.RuntimeHandler(platform) require.NoError(t, err) for name, deploy := range testCases { t.Run(name, func(t *testing.T) { - ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, skipUndeploy) + ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, platform, skipUndeploy) ct.Init(t, kuberesource.PatchRuntimeHandlers([]any{deploy}, runtimeHandler)) @@ -75,6 +74,7 @@ func TestGenpolicy(t *testing.T) { func TestMain(m *testing.M) { flag.StringVar(&imageReplacementsFile, "image-replacements", "", "path to image replacements file") flag.StringVar(&namespaceFile, "namespace-file", "", "file to store the namespace in") + flag.StringVar(&platformStr, "platform", "", "Deployment platform") flag.BoolVar(&skipUndeploy, "skip-undeploy", false, "skip undeploy step in the test") flag.Parse() diff --git a/e2e/getdents/getdents_test.go b/e2e/getdents/getdents_test.go index aa8a2783b7..75b5488566 100644 --- a/e2e/getdents/getdents_test.go +++ b/e2e/getdents/getdents_test.go @@ -29,15 +29,14 @@ const ( ) var ( - imageReplacementsFile, namespaceFile string - skipUndeploy bool + imageReplacementsFile, namespaceFile, platformStr string + skipUndeploy bool ) func TestGetDEnts(t *testing.T) { - ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, skipUndeploy) - - // TODO(msanft): Make this configurable - platform := platforms.AKSCloudHypervisorSNP + platform, err := platforms.FromString(platformStr) + require.NoError(t, err) + ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, platform, skipUndeploy) runtimeHandler, err := manifest.RuntimeHandler(platform) require.NoError(t, err) @@ -92,6 +91,7 @@ func TestGetDEnts(t *testing.T) { func TestMain(m *testing.M) { flag.StringVar(&imageReplacementsFile, "image-replacements", "", "path to image replacements file") flag.StringVar(&namespaceFile, "namespace-file", "", "file to store the namespace in") + flag.StringVar(&platformStr, "platform", "", "Deployment platform") flag.BoolVar(&skipUndeploy, "skip-undeploy", false, "skip undeploy step in the test") flag.Parse() diff --git a/e2e/internal/contrasttest/contrasttest.go b/e2e/internal/contrasttest/contrasttest.go index eaa6428590..e157fce17e 100644 --- a/e2e/internal/contrasttest/contrasttest.go +++ b/e2e/internal/contrasttest/contrasttest.go @@ -1,6 +1,8 @@ // Copyright 2024 Edgeless Systems GmbH // SPDX-License-Identifier: AGPL-3.0-only +//go:build e2e + package contrasttest import ( @@ -35,6 +37,7 @@ type ContrastTest struct { WorkDir string ImageReplacements map[string]string ImageReplacementsFile string + Platform platforms.Platform NamespaceFile string SkipUndeploy bool Kubeclient *kubeclient.Kubeclient @@ -46,11 +49,12 @@ type ContrastTest struct { } // New creates a new contrasttest.T object bound to the given test. -func New(t *testing.T, imageReplacements, namespaceFile string, skipUndeploy bool) *ContrastTest { +func New(t *testing.T, imageReplacements, namespaceFile string, platform platforms.Platform, skipUndeploy bool) *ContrastTest { return &ContrastTest{ Namespace: makeNamespace(t), WorkDir: t.TempDir(), ImageReplacementsFile: imageReplacements, + Platform: platform, NamespaceFile: namespaceFile, SkipUndeploy: skipUndeploy, Kubeclient: kubeclient.NewForTest(t), @@ -143,7 +147,7 @@ func (ct *ContrastTest) Generate(t *testing.T) { args := append( ct.commonArgs(), "--image-replacements", ct.ImageReplacementsFile, - "--reference-values", "aks-clh-snp", + "--reference-values", ct.Platform.String(), path.Join(ct.WorkDir, "resources.yaml"), ) @@ -247,7 +251,7 @@ func (ct *ContrastTest) commonArgs() []string { func (ct *ContrastTest) installRuntime(t *testing.T) { require := require.New(t) - resources, err := kuberesource.Runtime(platforms.AKSCloudHypervisorSNP) + resources, err := kuberesource.Runtime(ct.Platform) require.NoError(err) resources = kuberesource.PatchImages(resources, ct.ImageReplacements) resources = kuberesource.PatchNamespaces(resources, ct.Namespace) diff --git a/e2e/internal/kubeclient/deploy.go b/e2e/internal/kubeclient/deploy.go index 356152d7b8..908f1b15dd 100644 --- a/e2e/internal/kubeclient/deploy.go +++ b/e2e/internal/kubeclient/deploy.go @@ -140,63 +140,76 @@ func (c *Kubeclient) WaitForPod(ctx context.Context, namespace, name string) err // WaitFor watches the given resource kind and blocks until the desired number of pods are // ready or the context expires (is cancelled or times out). func (c *Kubeclient) WaitFor(ctx context.Context, resource ResourceWaiter, namespace, name string) error { - watcher, err := resource.watcher(ctx, c.client, namespace, name) - if err != nil { - return err - } + // When the node-installer restarts K3s, the watcher fails. The watcher has + // a retry loop internally, but it only retries starting the request, once + // it has established a request and that request dies spuriously, the + // watcher doesn't reconnect. To fix this we add another retry loop. + retryCounter := 3 +retryLoop: for { - evt, ok := <-watcher.ResultChan() - if !ok { - origErr := ctx.Err() - if origErr == nil { - return fmt.Errorf("watcher for %s %s/%s unexpectedly closed", resource.kind(), namespace, name) - } - logger := c.log.With("namespace", namespace) - logger.Error("resource did not become ready", "kind", resource, "name", name, "contextErr", ctx.Err()) - if ctx.Err() != context.DeadlineExceeded { - return ctx.Err() - } - // Fetch and print debug information. - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - defer cancel() - pods, err := resource.getPods(ctx, c, namespace, name) //nolint:contextcheck // The parent context expired. - if err != nil { - logger.Error("could not fetch pods for resource", "kind", resource.kind(), "name", name, "error", err) + watcher, err := resource.watcher(ctx, c.client, namespace, name) + if err != nil { + return err + } + + for { + evt, ok := <-watcher.ResultChan() + if !ok { + origErr := ctx.Err() + if origErr == nil { + retryCounter-- + if retryCounter != 0 { + continue retryLoop + } + return fmt.Errorf("watcher for %s %s/%s unexpectedly closed", resource.kind(), namespace, name) + } + logger := c.log.With("namespace", namespace) + logger.Error("resource did not become ready", "kind", resource, "name", name, "contextErr", ctx.Err()) + if ctx.Err() != context.DeadlineExceeded { + return ctx.Err() + } + // Fetch and print debug information. + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + pods, err := resource.getPods(ctx, c, namespace, name) //nolint:contextcheck // The parent context expired. + if err != nil { + logger.Error("could not fetch pods for resource", "kind", resource.kind(), "name", name, "error", err) + return origErr + } + for _, pod := range pods { + if !isPodReady(&pod) { + logger.Debug("pod not ready", "name", pod.Name, "status", c.toJSON(pod.Status)) + } + } return origErr } - for _, pod := range pods { - if !isPodReady(&pod) { - logger.Debug("pod not ready", "name", pod.Name, "status", c.toJSON(pod.Status)) + switch evt.Type { + case watch.Added: + fallthrough + case watch.Modified: + pods, err := resource.getPods(ctx, c, namespace, name) + if err != nil { + return err } - } - return origErr - } - switch evt.Type { - case watch.Added: - fallthrough - case watch.Modified: - pods, err := resource.getPods(ctx, c, namespace, name) - if err != nil { - return err - } - numPodsReady := 0 - for _, pod := range pods { - if isPodReady(&pod) { - numPodsReady++ + numPodsReady := 0 + for _, pod := range pods { + if isPodReady(&pod) { + numPodsReady++ + } } + desiredPods, err := resource.numDesiredPods(evt.Object) + if err != nil { + return err + } + if desiredPods <= numPodsReady { + return nil + } + case watch.Deleted: + return fmt.Errorf("%s %s/%s was deleted while waiting for it", resource.kind(), namespace, name) + default: + return fmt.Errorf("unexpected watch event while waiting for %s %s/%s: type=%s, object=%#v", resource.kind(), namespace, name, evt.Type, evt.Object) } - desiredPods, err := resource.numDesiredPods(evt.Object) - if err != nil { - return err - } - if desiredPods <= numPodsReady { - return nil - } - case watch.Deleted: - return fmt.Errorf("%s %s/%s was deleted while waiting for it", resource.kind(), namespace, name) - default: - return fmt.Errorf("unexpected watch event while waiting for %s %s/%s: type=%s, object=%#v", resource.kind(), namespace, name, evt.Type, evt.Object) } } } diff --git a/e2e/internal/kubeclient/portforward.go b/e2e/internal/kubeclient/portforward.go index 09d500db42..d215f67a0c 100644 --- a/e2e/internal/kubeclient/portforward.go +++ b/e2e/internal/kubeclient/portforward.go @@ -34,7 +34,7 @@ func (k *Kubeclient) WithForwardedPort(ctx context.Context, namespace, podName, if funcErr == nil { return nil } - log.Error("port-forwarded func failed", "error", err) + log.Error("port-forwarded func failed", "error", funcErr) select { case err := <-errorCh: log.Error("Encountered port forwarding error", "error", err) diff --git a/e2e/openssl/openssl_test.go b/e2e/openssl/openssl_test.go index 406691016b..e73ac977f9 100644 --- a/e2e/openssl/openssl_test.go +++ b/e2e/openssl/openssl_test.go @@ -36,16 +36,15 @@ const ( ) var ( - imageReplacementsFile, namespaceFile string - skipUndeploy bool + imageReplacementsFile, namespaceFile, platformStr string + skipUndeploy bool ) // TestOpenSSL runs e2e tests on the example OpenSSL deployment. func TestOpenSSL(t *testing.T) { - ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, skipUndeploy) - - // TODO(msanft): Make this configurable - platform := platforms.AKSCloudHypervisorSNP + platform, err := platforms.FromString(platformStr) + require.NoError(t, err) + ct := contrasttest.New(t, imageReplacementsFile, namespaceFile, platform, skipUndeploy) runtimeHandler, err := manifest.RuntimeHandler(platform) require.NoError(t, err) @@ -62,6 +61,8 @@ func TestOpenSSL(t *testing.T) { ct.Init(t, resources) require.True(t, t.Run("generate", ct.Generate), "contrast generate needs to succeed for subsequent tests") + patchReferenceValues(t, platform, ct) + require.True(t, t.Run("apply", ct.Apply), "Kubernetes resources need to be applied for subsequent tests") require.True(t, t.Run("set", ct.Set), "contrast set needs to succeed for subsequent tests") @@ -202,6 +203,12 @@ func TestOpenSSL(t *testing.T) { c := kubeclient.NewForTest(t) require.NoError(c.Restart(ctx, kubeclient.StatefulSet{}, ct.Namespace, "coordinator")) + require.NoError(c.WaitFor(ctx, kubeclient.StatefulSet{}, ct.Namespace, "coordinator")) + + // TODO(freax13): The following verify sometimes fails spuriously due to + // connection issues. Waiting a little bit longer makes + // the whole test less flaky. + time.Sleep(5 * time.Second) require.ErrorContains(ct.RunVerify(), "recovery") @@ -241,6 +248,7 @@ func TestOpenSSL(t *testing.T) { func TestMain(m *testing.M) { flag.StringVar(&imageReplacementsFile, "image-replacements", "", "path to image replacements file") flag.StringVar(&namespaceFile, "namespace-file", "", "file to store the namespace in") + flag.StringVar(&platformStr, "platform", "", "Deployment platform") flag.BoolVar(&skipUndeploy, "skip-undeploy", false, "skip undeploy step in the test") flag.Parse() @@ -252,3 +260,31 @@ func opensslConnectCmd(addr, caCert string) string { `openssl s_client -connect %s -verify_return_error -x509_strict -CAfile /tls-config/%s -cert /tls-config/certChain.pem -key /tls-config/key.pem $out + ''; + + passthru = { + inherit dmVerityArgs; + }; +} diff --git a/packages/by-name/qemu-static/package.nix b/packages/by-name/qemu-static/package.nix index 1e08075908..b3486bca82 100644 --- a/packages/by-name/qemu-static/package.nix +++ b/packages/by-name/qemu-static/package.nix @@ -55,11 +55,11 @@ in hostCpuTargets = [ "x86_64-softmmu" ]; })).overrideAttrs (previousAttrs: rec { - version = "9.1.0-rc0"; + version = "9.1.0-rc1"; src = fetchurl { url = "https://download.qemu.org/qemu-${version}.tar.xz"; - hash = "sha256-3Y3dl1EF18l2axZcHI3JvYY7CNJqzR8G9wxcsDKnnn0="; + hash = "sha256-JDcnzpkwfzwa5ofMjS1HYy7BDJ79EunIdMqW5kdfauk="; }; propagatedBuildInputs = builtins.filter (