diff --git a/coordinator/internal/authority/authority.go b/coordinator/internal/authority/authority.go index 3f11c71494..f86e6df7a4 100644 --- a/coordinator/internal/authority/authority.go +++ b/coordinator/internal/authority/authority.go @@ -183,6 +183,11 @@ func (m *Authority) syncState() error { // No history yet -> nothing to sync. return nil } + se := m.se.Load() + if se == nil { + // There are transitions in history, but we don't have a signing key -> recovery mode. + return ErrNeedsRecovery + } oldState := m.state.Load() latest, err := m.hist.GetLatest() @@ -209,11 +214,11 @@ func (m *Authority) syncState() error { return fmt.Errorf("parsing manifest: %w", err) } - meshKey, err := m.se.Load().DeriveMeshCAKey(latest.TransitionHash) + meshKey, err := se.DeriveMeshCAKey(latest.TransitionHash) if err != nil { return fmt.Errorf("deriving mesh CA key: %w", err) } - ca, err := ca.New(m.se.Load().RootCAKey(), meshKey) + ca, err := ca.New(se.RootCAKey(), meshKey) if err != nil { return fmt.Errorf("creating CA: %w", err) } diff --git a/coordinator/internal/authority/recoveryapi.go b/coordinator/internal/authority/recoveryapi.go index 758c460053..a0c13b3ef5 100644 --- a/coordinator/internal/authority/recoveryapi.go +++ b/coordinator/internal/authority/recoveryapi.go @@ -12,22 +12,28 @@ import ( "google.golang.org/grpc/status" ) -// ErrAlreadyRecovered is returned if recovery was requested but a seed is already set. -var ErrAlreadyRecovered = errors.New("coordinator is already recovered") +var ( + // ErrAlreadyRecovered is returned if seedEngine initialization was requested but a seed is already set. + ErrAlreadyRecovered = errors.New("coordinator is already recovered") + // ErrNeedsRecovery is returned if state exists, but no secrets are available, e.g. after restart. + ErrNeedsRecovery = errors.New("coordinator is in recovery mode") +) // Recover recovers the Coordinator from a seed and salt. func (a *Authority) Recover(_ context.Context, req *recoveryapi.RecoverRequest) (*recoveryapi.RecoverResponse, error) { a.logger.Info("Recover called") - err := a.initSeedEngine(req.Seed, req.Salt) - switch { - case errors.Is(err, ErrAlreadyRecovered): + if err := a.initSeedEngine(req.Seed, req.Salt); errors.Is(err, ErrAlreadyRecovered) { return nil, status.Error(codes.FailedPrecondition, err.Error()) - case err == nil: - return &recoveryapi.RecoverResponse{}, nil - default: + } else if err != nil { // Pretty sure this failed because the seed was bad. - return nil, status.Errorf(codes.InvalidArgument, err.Error()) + return nil, status.Errorf(codes.InvalidArgument, "initializing seed engine: %v", err) + } + if err := a.syncState(); err != nil { + // This recovery attempt did not lead to a good state, let's roll it back. + a.se.Store(nil) + return nil, status.Errorf(codes.InvalidArgument, "recovery failed and was rolled back: %v", err) } + return &recoveryapi.RecoverResponse{}, nil } diff --git a/coordinator/internal/authority/recoveryapi_test.go b/coordinator/internal/authority/recoveryapi_test.go new file mode 100644 index 0000000000..1899e4af22 --- /dev/null +++ b/coordinator/internal/authority/recoveryapi_test.go @@ -0,0 +1,143 @@ +// Copyright 2024 Edgeless Systems GmbH +// SPDX-License-Identifier: AGPL-3.0-only + +package authority + +import ( + "context" + "crypto/rand" + "crypto/rsa" + "encoding/json" + "log/slog" + "testing" + + "github.com/edgelesssys/contrast/internal/manifest" + "github.com/edgelesssys/contrast/internal/recoveryapi" + "github.com/edgelesssys/contrast/internal/userapi" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +func TestRecovery(t *testing.T) { + var seed [32]byte + var salt [32]byte + testCases := []struct { + name string + seed []byte + salt []byte + wantCode codes.Code + }{ + { + name: "empty seed", + salt: salt[:], + wantCode: codes.InvalidArgument, + }, + { + name: "empty salt", + seed: seed[:], + wantCode: codes.InvalidArgument, + }, + { + name: "short seed", + seed: seed[:16], + salt: salt[:], + wantCode: codes.InvalidArgument, + }, + { + name: "short salt", + seed: seed[:], + salt: salt[:16], + wantCode: codes.InvalidArgument, + }, + { + name: "normal values", + seed: seed[:], + salt: salt[:], + wantCode: codes.OK, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require := require.New(t) + + a := newCoordinator() + _, err := a.Recover(context.Background(), &recoveryapi.RecoverRequest{ + Seed: tc.seed, + Salt: tc.salt, + }) + + require.Equal(tc.wantCode, status.Code(err)) + }) + } +} + +// TestRecoveryFlow exercises the recovery flow's expected path. +func TestRecoveryFlow(t *testing.T) { + require := require.New(t) + + // 1. A Coordinator is created from empty state. + + a := newCoordinator() + + // 2. A manifest is set and the returned seed is recorded. + seedShareOwnerKey, err := rsa.GenerateKey(rand.Reader, 2048) + require.NoError(err) + seedShareOwnerKeyBytes := manifest.MarshalSeedShareOwnerKey(&seedShareOwnerKey.PublicKey) + + mnfst, _, policies := newManifest(t) + mnfst.SeedshareOwnerPubKeys = []manifest.HexString{seedShareOwnerKeyBytes} + manifestBytes, err := json.Marshal(mnfst) + require.NoError(err) + + req := &userapi.SetManifestRequest{ + Manifest: manifestBytes, + Policies: policies, + } + resp1, err := a.SetManifest(context.Background(), req) + require.NoError(err) + require.NotNil(resp1) + seedSharesDoc := resp1.GetSeedSharesDoc() + require.NotNil(seedSharesDoc) + seedShares := seedSharesDoc.GetSeedShares() + require.Len(seedShares, 1) + + seed, err := manifest.DecryptSeedShare(seedShareOwnerKey, seedShares[0]) + require.NoError(err) + + recoverReq := &recoveryapi.RecoverRequest{ + Seed: seed, + Salt: seedSharesDoc.GetSalt(), + } + + // Recovery on this Coordinator should fail now that a manifest is set. + _, err = a.Recover(context.Background(), recoverReq) + require.ErrorContains(err, ErrAlreadyRecovered.Error()) + + // 3. A new Coordinator is created with the existing history. + // GetManifests and SetManifest are expected to fail. + + a = New(a.hist, prometheus.NewRegistry(), slog.Default()) + _, err = a.SetManifest(context.Background(), req) + require.ErrorContains(err, ErrNeedsRecovery.Error()) + + _, err = a.GetManifests(context.Background(), &userapi.GetManifestsRequest{}) + require.ErrorContains(err, ErrNeedsRecovery.Error()) + + // 4. Recovery is called. + _, err = a.Recover(context.Background(), recoverReq) + require.NoError(err) + + // 5. Coordinator should be operational and know about the latest manifest. + resp, err := a.GetManifests(context.Background(), &userapi.GetManifestsRequest{}) + require.NoError(err) + require.NotNil(resp) + require.Len(resp.Manifests, 1) + require.Equal([][]byte{manifestBytes}, resp.Manifests) + + // Recover on a recovered authority should fail. + _, err = a.Recover(context.Background(), recoverReq) + require.Error(err) +} diff --git a/coordinator/internal/authority/userapi.go b/coordinator/internal/authority/userapi.go index 6ddb227147..054340e221 100644 --- a/coordinator/internal/authority/userapi.go +++ b/coordinator/internal/authority/userapi.go @@ -28,7 +28,9 @@ import ( func (a *Authority) SetManifest(ctx context.Context, req *userapi.SetManifestRequest) (*userapi.SetManifestResponse, error) { a.logger.Info("SetManifest called") - if err := a.syncState(); err != nil { + if err := a.syncState(); errors.Is(err, ErrNeedsRecovery) { + return nil, status.Error(codes.FailedPrecondition, ErrNeedsRecovery.Error()) + } else if err != nil { return nil, status.Errorf(codes.Internal, "syncing internal state: %v", err) } @@ -40,7 +42,13 @@ func (a *Authority) SetManifest(ctx context.Context, req *userapi.SetManifestReq var resp userapi.SetManifestResponse oldState := a.state.Load() - if oldState == nil { + if oldState != nil { + // Subsequent SetManifest call, check permissions of caller. + if err := a.validatePeer(ctx, oldState.manifest); err != nil { + a.logger.Warn("SetManifest peer validation failed", "err", err) + return nil, status.Errorf(codes.PermissionDenied, "validating peer: %v", err) + } + } else if a.se.Load() == nil { // First SetManifest call, initialize seed engine. seedSalt, err := crypto.GenerateRandomBytes(64) if err != nil { @@ -48,24 +56,19 @@ func (a *Authority) SetManifest(ctx context.Context, req *userapi.SetManifestReq } seed, salt := seedSalt[:32], seedSalt[32:] - // TODO(burgerdev): requires https://github.com/edgelesssys/contrast/commit/bae8e7f - seedShares, err := manifest.EncryptSeedShares(seed /*m.SeedshareOwnerPubKeys*/, nil) + seedShares, err := manifest.EncryptSeedShares(seed, m.SeedshareOwnerPubKeys) if err != nil { return nil, status.Errorf(codes.InvalidArgument, "initializing seed engine: %v", err) } - if err := a.initSeedEngine(seed, salt); err != nil { + if err := a.initSeedEngine(seed, salt); errors.Is(err, ErrAlreadyRecovered) { + return nil, status.Error(codes.Unavailable, "concurrent initialization through SetManifest detected") + } else if err != nil { return nil, status.Errorf(codes.Internal, "setting seed: %v", err) } resp.SeedSharesDoc = &userapi.SeedShareDocument{ Salt: salt, SeedShares: seedShares, } - } else { - // Subsequent SetManifest call, check permissions of caller. - if err := a.validatePeer(ctx, oldState.manifest); err != nil { - a.logger.Warn("SetManifest peer validation failed", "err", err) - return nil, status.Errorf(codes.PermissionDenied, "validating peer: %v", err) - } } // Store resources in History. diff --git a/internal/manifest/manifest.go b/internal/manifest/manifest.go index fd28681c96..de163d187f 100644 --- a/internal/manifest/manifest.go +++ b/internal/manifest/manifest.go @@ -22,6 +22,7 @@ type Manifest struct { Policies map[HexString][]string ReferenceValues ReferenceValues WorkloadOwnerKeyDigests []HexString + SeedshareOwnerPubKeys []HexString } // ReferenceValues contains the workload independent reference values.