Skip to content

Commit

Permalink
Eliminate some raciness from gcpkms plugin tests (#5544)
Browse files Browse the repository at this point in the history
* Eliminate some raciness from gcpkms plugin tests

`TestDisposeActiveCryptoKeys` has a race condition where a goroutine
that was processing keys for disposal was executing in parallel to some
assertions on state of the same keys. Rework this test to address
this race in the following ways:

- Enhance the fake KMS client used in the test to consider the filter condition
  for the `spire-last-update` label passed in the `ListCryptoKeys()`
  request. The test was not considering this field before, even though
  it is one of the primary factors in the plugin determining how to
  handle the key.
- Enrich the crypto keys set up in the fake KMS client by setting the
  `spire-last-update` label such that the plugin recognizes these keys
  as active based on last update time.
- Assert that the keys are not scheduled for destruction. The
  destruction of the keys is handled asynchronously in the plugin.
  The current test could have passed if the keys were queued for
  destruction, but the other goroutine processing the destruction
  operations hadn't completed yet. This is a potential source of raciness
  in the test.
- Make the mock clock used in the tests start at `time.Now()` instead of
  the Unix epoch because starting at the epoch causes some weird times
  to generated in the tests that are before the Unix epoch.

Making these changes broke a couple other tests. Summary of changes to
fix those tests:

`TestDisposeStaleCryptoKeys`:

- Start setting the `spire-last-update` label in the crypto keys set up
  in the fake KMS client so that the new filtering logic in
  `ListCryptoKeys()` in the fake KMS client gets exercised.
- Set up an unbuffered `chan error` for the `keepActiveCryptoKeySignal`
  so that we can block the goroutine that is renewing the active key
  last update time. Without blocking this goroutine, it will keep
  renewing the last update time of the active keys, potentially before
  another parallel goroutine can process it as stale.
- Advance the mock clock by the maximum of:
    - The duration that is the frequency of how often the stale key disposal
    goroutine runs
    - The maximum stale duration of a key
  The way the test is currently written assumes that the stale key disposal
  goroutine will run more frequently than the maximum stale duration, but it
  doesn't necessarily need to be the case.

`TestKeepActiveCryptoKeys`:

- Base the times in the test off of the current time rather than the
  Unix epoch as a start to align with other tests.

Signed-off-by: Ryan Turner <[email protected]>
  • Loading branch information
rturner3 authored Oct 5, 2024
1 parent ebd5a1f commit aa7ef27
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 22 deletions.
37 changes: 32 additions & 5 deletions pkg/server/plugin/keymanager/gcpkms/client_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@ import (
"fmt"
"path"
"reflect"
"regexp"
"strconv"
"strings"
"sync"
"testing"
"time"

"cloud.google.com/go/iam"
"cloud.google.com/go/iam/apiv1/iampb"
Expand All @@ -32,6 +35,8 @@ import (
"google.golang.org/protobuf/types/known/wrapperspb"
)

var lastUpdateFilterRegexp = regexp.MustCompile(fmt.Sprintf(`labels.%s < ([[:digit:]]+)`, labelNameLastUpdate))

type fakeCryptoKeyIterator struct {
mu sync.RWMutex

Expand Down Expand Up @@ -614,16 +619,38 @@ func (k *fakeKMSClient) ListCryptoKeys(_ context.Context, req *kmspb.ListCryptoK
}

// We Have a simplified filtering logic in this fake implementation,
// where we only care about the spire-active label.
// where we only care about the spire-active and spire-last-update labels.
if req.Filter != "" {
if !strings.Contains(req.Filter, "labels.spire-active = true") {
{
k.t.Fatal("Unsupported filter in ListCryptoKeys request")
k.t.Fatal("Unsupported filter in ListCryptoKeys request")
}

lastUpdateRegexpResults := lastUpdateFilterRegexp.FindStringSubmatch(req.Filter)
var lastUpdateTimeFilter time.Time
var keyLastUpdateTime time.Time
if len(lastUpdateRegexpResults) == 2 {
lastUpdate := lastUpdateRegexpResults[1]
lastUpdateUnix, err := strconv.ParseInt(lastUpdate, 10, 64)
if err != nil {
k.t.Fatalf("Failed to parse last update time in request filter: %s", err)
}
if fck.Labels[labelNameActive] != "true" {
continue

lastUpdateTimeFilter = time.Unix(lastUpdateUnix, 0)

if keyLastUpdate, ok := fck.Labels[labelNameLastUpdate]; ok {
keyLastUpdateUnix, err := strconv.ParseInt(keyLastUpdate, 10, 64)
if err != nil {
k.t.Fatalf("Failed to parse last update time in crypto key: %s", err)
}

keyLastUpdateTime = time.Unix(keyLastUpdateUnix, 0)
}
}

if fck.Labels[labelNameActive] != "true" ||
(!lastUpdateTimeFilter.IsZero() && !keyLastUpdateTime.IsZero() && !keyLastUpdateTime.Before(lastUpdateTimeFilter)) {
continue
}
}

cryptoKeys = append(cryptoKeys, fck.CryptoKey)
Expand Down
2 changes: 1 addition & 1 deletion pkg/server/plugin/keymanager/gcpkms/gcpkms.go
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ func (p *Plugin) disposeCryptoKeys(ctx context.Context) error {
return nil
}

// disposeCryptoKeysTask will be run every 24hs.
// disposeCryptoKeysTask will be run every 24hr.
// It will schedule the destruction of CryptoKeyVersions that have a
// spire-last-update label value older than two weeks.
// It will only schedule the destruction of CryptoKeyVersions belonging to the
Expand Down
69 changes: 53 additions & 16 deletions pkg/server/plugin/keymanager/gcpkms/gcpkms_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ var (
cryptoKeyName1 = path.Join(validKeyRing, "cryptoKeys", fmt.Sprintf("test-crypto-key/spire-key-%s-spireKeyID-1", validServerID))
cryptoKeyName2 = path.Join(validKeyRing, "cryptoKeys", fmt.Sprintf("test-crypto-key/spire-key-%s-spireKeyID-2", validServerID))
fakeTime = timestamppb.Now()
unixEpoch = time.Unix(0, 0)

pubKey = &kmspb.PublicKey{
Pem: pemCert,
Expand All @@ -92,7 +91,6 @@ func setupTest(t *testing.T) *pluginTest {
log.Level = logrus.DebugLevel

c := clock.NewMock(t)
c.Set(unixEpoch)
fakeKMSClient := newKMSClientFake(t, c)
p := newPlugin(
func(ctx context.Context, opts ...option.ClientOption) (cloudKeyManagementService, error) {
Expand Down Expand Up @@ -433,11 +431,16 @@ func TestConfigure(t *testing.T) {

func TestDisposeStaleCryptoKeys(t *testing.T) {
configureRequest := configureRequestWithDefaults(t)
ts := setupTest(t)
now := ts.clockHook.Now()
fakeCryptoKeys := []*fakeCryptoKey{
{
CryptoKey: &kmspb.CryptoKey{
Name: cryptoKeyName1,
Labels: map[string]string{labelNameActive: "true"},
Name: cryptoKeyName1,
Labels: map[string]string{
labelNameActive: "true",
labelNameLastUpdate: fmt.Sprintf("%d", now.Unix()),
},
VersionTemplate: &kmspb.CryptoKeyVersionTemplate{Algorithm: kmspb.CryptoKeyVersion_EC_SIGN_P256_SHA256},
},
fakeCryptoKeyVersions: map[string]*fakeCryptoKeyVersion{
Expand All @@ -453,8 +456,11 @@ func TestDisposeStaleCryptoKeys(t *testing.T) {
},
{
CryptoKey: &kmspb.CryptoKey{
Name: cryptoKeyName2,
Labels: map[string]string{labelNameActive: "true"},
Name: cryptoKeyName2,
Labels: map[string]string{
labelNameActive: "true",
labelNameLastUpdate: fmt.Sprintf("%d", now.Unix()),
},
VersionTemplate: &kmspb.CryptoKeyVersionTemplate{Algorithm: kmspb.CryptoKeyVersion_EC_SIGN_P256_SHA256},
},
fakeCryptoKeyVersions: map[string]*fakeCryptoKeyVersion{
Expand All @@ -470,18 +476,20 @@ func TestDisposeStaleCryptoKeys(t *testing.T) {
},
}

ts := setupTest(t)
ts.fakeKMSClient.putFakeCryptoKeys(fakeCryptoKeys)

ts.plugin.hooks.disposeCryptoKeysSignal = make(chan error)
ts.plugin.hooks.scheduleDestroySignal = make(chan error)
ts.plugin.hooks.setInactiveSignal = make(chan error)
// Set up an unbuffered channel for the keepActiveCryptoKeys task so that it gets blocked and we can simulate a key getting stale.
ts.plugin.hooks.keepActiveCryptoKeysSignal = make(chan error)

_, err := ts.plugin.Configure(ctx, configureRequest)
require.NoError(t, err)

// Move the clock to start disposeCryptoKeysTask.
ts.clockHook.Add(disposeCryptoKeysFrequency)
clkAdv := maxDuration(disposeCryptoKeysFrequency, maxStaleDuration)
ts.clockHook.Add(clkAdv)

// Wait for dispose disposeCryptoKeysTask to be initialized.
_ = waitForSignal(t, ts.plugin.hooks.disposeCryptoKeysSignal)
Expand Down Expand Up @@ -535,11 +543,16 @@ func TestDisposeStaleCryptoKeys(t *testing.T) {

func TestDisposeActiveCryptoKeys(t *testing.T) {
configureRequest := configureRequestWithDefaults(t)
ts := setupTest(t)
now := ts.clockHook.Now()
fakeCryptoKeys := []*fakeCryptoKey{
{
CryptoKey: &kmspb.CryptoKey{
Name: cryptoKeyName1,
Labels: map[string]string{labelNameActive: "true"},
Name: cryptoKeyName1,
Labels: map[string]string{
labelNameActive: "true",
labelNameLastUpdate: fmt.Sprintf("%d", now.Unix()),
},
VersionTemplate: &kmspb.CryptoKeyVersionTemplate{Algorithm: kmspb.CryptoKeyVersion_EC_SIGN_P256_SHA256},
},
fakeCryptoKeyVersions: map[string]*fakeCryptoKeyVersion{
Expand All @@ -555,8 +568,11 @@ func TestDisposeActiveCryptoKeys(t *testing.T) {
},
{
CryptoKey: &kmspb.CryptoKey{
Name: cryptoKeyName2,
Labels: map[string]string{labelNameActive: "true"},
Name: cryptoKeyName2,
Labels: map[string]string{
labelNameActive: "true",
labelNameLastUpdate: fmt.Sprintf("%d", now.Unix()),
},
VersionTemplate: &kmspb.CryptoKeyVersionTemplate{Algorithm: kmspb.CryptoKeyVersion_EC_SIGN_P256_SHA256},
},
fakeCryptoKeyVersions: map[string]*fakeCryptoKeyVersion{
Expand All @@ -572,21 +588,34 @@ func TestDisposeActiveCryptoKeys(t *testing.T) {
},
}

ts := setupTest(t)
ts.fakeKMSClient.putFakeCryptoKeys(fakeCryptoKeys)

ts.plugin.hooks.disposeCryptoKeysSignal = make(chan error)
scheduleDestroySignal := make(chan error)
ts.plugin.hooks.scheduleDestroySignal = scheduleDestroySignal
enqueueDestructionSignal := make(chan error, 1)
ts.plugin.hooks.enqueueDestructionSignal = enqueueDestructionSignal

_, err := ts.plugin.Configure(ctx, configureRequest)
require.NoError(t, err)

// Wait for disposeCryptoKeysTask to be initialized.
err = waitForSignal(t, ts.plugin.hooks.disposeCryptoKeysSignal)
require.NoError(t, err)

// Move the clock to start disposeCryptoKeysTask.
ts.clockHook.Add(disposeCryptoKeysFrequency)

// Wait for dispose disposeCryptoKeysTask to be initialized.
_ = waitForSignal(t, ts.plugin.hooks.disposeCryptoKeysSignal)
// Wait for disposeCryptoKeysTask to complete.
err = waitForSignal(t, ts.plugin.hooks.disposeCryptoKeysSignal)
require.NoError(t, err)

// Verify that no active keys have been queued for destruction.
select {
case <-enqueueDestructionSignal:
require.Fail(t, "Active key should not be queued for destruction")
default:
}

// The CryptoKeys are not stale yet. Assert that they are active and the
// CryptoKeyVersions enabled.
Expand Down Expand Up @@ -1094,7 +1123,7 @@ func TestKeepActiveCryptoKeys(t *testing.T) {
_ = waitForSignal(t, ts.plugin.hooks.keepActiveCryptoKeysSignal)

// Move the clock forward so the task is run.
currentTime := unixEpoch.Add(6 * time.Hour)
currentTime := ts.clockHook.Now().Add(6 * time.Hour)
ts.clockHook.Set(currentTime)

// Wait for keepActiveCryptoKeys to be run.
Expand Down Expand Up @@ -1749,3 +1778,11 @@ func waitForSignal(t *testing.T, ch chan error) error {
}
return nil
}

func maxDuration(d1, d2 time.Duration) time.Duration {
if d1 > d2 {
return d1
}

return d2
}

0 comments on commit aa7ef27

Please sign in to comment.