From 109639b052abcdf58885ac9ce394178366c92288 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 1 Dec 2017 13:49:17 -0500 Subject: [PATCH 1/3] Move location of quit channel closing in exp manager If it happens after stopping timers any timers firing before all timers are stopped will still run the revocation function. With plugin auto-crash-recovery this could end up instantiating a plugin that could then try to unwrap a token from a nil token store. This also plumbs in core so that we can grab a read lock during the operation and check standby/sealed status before running it (after grabbing the lock). --- vault/expiration.go | 60 +++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/vault/expiration.go b/vault/expiration.go index 710fcb8f0b16..47e7ba41ff76 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -51,11 +51,13 @@ const ( // If a secret is not renewed in timely manner, it may be expired, and // the ExpirationManager will handle doing automatic revocation. type ExpirationManager struct { - router *Router - idView *BarrierView - tokenView *BarrierView - tokenStore *TokenStore - logger log.Logger + core *Core + router *Router + idView *BarrierView + tokenView *BarrierView + tokenStore *TokenStore + logger log.Logger + coreStateLock *sync.RWMutex pending map[string]*time.Timer pendingLock sync.RWMutex @@ -72,18 +74,16 @@ type ExpirationManager struct { // NewExpirationManager creates a new ExpirationManager that is backed // using a given view, and uses the provided router for revocation. -func NewExpirationManager(router *Router, view *BarrierView, ts *TokenStore, logger log.Logger) *ExpirationManager { - if logger == nil { - logger = log.New("expiration_manager") - } - +func NewExpirationManager(c *Core, view *BarrierView) *ExpirationManager { exp := &ExpirationManager{ - router: router, - idView: view.SubView(leaseViewPrefix), - tokenView: view.SubView(tokenViewPrefix), - tokenStore: ts, - logger: logger, - pending: make(map[string]*time.Timer), + core: c, + router: c.router, + idView: view.SubView(leaseViewPrefix), + tokenView: view.SubView(tokenViewPrefix), + tokenStore: c.tokenStore, + logger: c.logger, + pending: make(map[string]*time.Timer), + coreStateLock: &c.stateLock, // new instances of the expiration manager will go immediately into // restore mode @@ -91,6 +91,11 @@ func NewExpirationManager(router *Router, view *BarrierView, ts *TokenStore, log restoreLocks: locksutil.CreateLocks(), quitCh: make(chan struct{}), } + + if exp.logger == nil { + exp.logger = log.New("expiration_manager") + } + return exp } @@ -103,7 +108,7 @@ func (c *Core) setupExpiration() error { view := c.systemBarrierView.SubView(expirationSubPath) // Create the manager - mgr := NewExpirationManager(c.router, view, c.tokenStore, c.logger) + mgr := NewExpirationManager(c, view) c.expiration = mgr // Link the token store to this @@ -430,6 +435,10 @@ func (m *ExpirationManager) Stop() error { m.logger.Debug("expiration: stop triggered") defer m.logger.Debug("expiration: finished stopping") + // Do this before stopping pending timers to avoid potential races with + // expiring timers + close(m.quitCh) + m.pendingLock.Lock() for _, timer := range m.pending { timer.Stop() @@ -437,7 +446,6 @@ func (m *ExpirationManager) Stop() error { m.pending = make(map[string]*time.Timer) m.pendingLock.Unlock() - close(m.quitCh) if m.inRestoreMode() { for { if !m.inRestoreMode() { @@ -969,13 +977,29 @@ func (m *ExpirationManager) expireID(leaseID string) { return default: } + + m.coreStateLock.RLock() + if m.core.sealed { + m.logger.Error("expiration: sealed, not attempting further revocation of lease", "lease_id", leaseID) + m.coreStateLock.RUnlock() + return + } + if m.core.standby { + m.logger.Error("expiration: standby, not attempting further revocation of lease", "lease_id", leaseID) + m.coreStateLock.RUnlock() + return + } + err := m.Revoke(leaseID) if err == nil { if m.logger.IsInfo() { m.logger.Info("expiration: revoked lease", "lease_id", leaseID) } + m.coreStateLock.RUnlock() return } + + m.coreStateLock.RUnlock() m.logger.Error("expiration: failed to revoke lease", "lease_id", leaseID, "error", err) time.Sleep((1 << attempt) * revokeRetryBase) } From 3c6309b9451267061d6f296422f542d751b10465 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 1 Dec 2017 14:23:14 -0500 Subject: [PATCH 2/3] Use context instead of checking core values directly --- vault/core.go | 7 +++---- vault/expiration.go | 42 ++++++++++++++++++++---------------------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/vault/core.go b/vault/core.go index 5ec32443066c..9a9df431be47 100644 --- a/vault/core.go +++ b/vault/core.go @@ -1498,8 +1498,6 @@ func (c *Core) sealInternal() error { // Signal the standby goroutine to shutdown, wait for completion close(c.standbyStopCh) - c.requestContext = nil - // Release the lock while we wait to avoid deadlocking c.stateLock.Unlock() <-c.standbyDoneCh @@ -1536,9 +1534,8 @@ func (c *Core) postUnseal() (retErr error) { defer metrics.MeasureSince([]string{"core", "post_unseal"}, time.Now()) defer func() { if retErr != nil { + c.requestContextCancelFunc() c.preSeal() - } else { - c.requestContext, c.requestContextCancelFunc = context.WithCancel(context.Background()) } }() c.logger.Info("core: post-unseal setup starting") @@ -1559,6 +1556,8 @@ func (c *Core) postUnseal() (retErr error) { c.seal.SetRecoveryConfig(nil) } + c.requestContext, c.requestContextCancelFunc = context.WithCancel(context.Background()) + if err := enterprisePostUnseal(c); err != nil { return err } diff --git a/vault/expiration.go b/vault/expiration.go index 47e7ba41ff76..8c88ba63f4c8 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -12,6 +12,7 @@ import ( "github.com/armon/go-metrics" log "github.com/mgutz/logxi/v1" + context "golang.org/x/net/context" "github.com/hashicorp/errwrap" multierror "github.com/hashicorp/go-multierror" @@ -51,13 +52,11 @@ const ( // If a secret is not renewed in timely manner, it may be expired, and // the ExpirationManager will handle doing automatic revocation. type ExpirationManager struct { - core *Core - router *Router - idView *BarrierView - tokenView *BarrierView - tokenStore *TokenStore - logger log.Logger - coreStateLock *sync.RWMutex + router *Router + idView *BarrierView + tokenView *BarrierView + tokenStore *TokenStore + logger log.Logger pending map[string]*time.Timer pendingLock sync.RWMutex @@ -70,26 +69,30 @@ type ExpirationManager struct { restoreLocks []*locksutil.LockEntry restoreLoaded sync.Map quitCh chan struct{} + + coreStateLock *sync.RWMutex + quitContext context.Context } // NewExpirationManager creates a new ExpirationManager that is backed // using a given view, and uses the provided router for revocation. func NewExpirationManager(c *Core, view *BarrierView) *ExpirationManager { exp := &ExpirationManager{ - core: c, - router: c.router, - idView: view.SubView(leaseViewPrefix), - tokenView: view.SubView(tokenViewPrefix), - tokenStore: c.tokenStore, - logger: c.logger, - pending: make(map[string]*time.Timer), - coreStateLock: &c.stateLock, + router: c.router, + idView: view.SubView(leaseViewPrefix), + tokenView: view.SubView(tokenViewPrefix), + tokenStore: c.tokenStore, + logger: c.logger, + pending: make(map[string]*time.Timer), // new instances of the expiration manager will go immediately into // restore mode restoreMode: 1, restoreLocks: locksutil.CreateLocks(), quitCh: make(chan struct{}), + + coreStateLock: &c.stateLock, + quitContext: c.requestContext, } if exp.logger == nil { @@ -979,13 +982,8 @@ func (m *ExpirationManager) expireID(leaseID string) { } m.coreStateLock.RLock() - if m.core.sealed { - m.logger.Error("expiration: sealed, not attempting further revocation of lease", "lease_id", leaseID) - m.coreStateLock.RUnlock() - return - } - if m.core.standby { - m.logger.Error("expiration: standby, not attempting further revocation of lease", "lease_id", leaseID) + if m.quitContext.Err() == context.Canceled { + m.logger.Error("expiration: core context canceled, not attempting further revocation of lease", "lease_id", leaseID) m.coreStateLock.RUnlock() return } From 91b26493199752f54e769378f1975c9d4c678437 Mon Sep 17 00:00:00 2001 From: Jeff Mitchell Date: Fri, 1 Dec 2017 15:29:23 -0500 Subject: [PATCH 3/3] Use official Go context in a few key places --- vault/core.go | 2 +- vault/expiration.go | 2 +- vault/request_forwarding.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vault/core.go b/vault/core.go index 9a9df431be47..4df0cbdc76f9 100644 --- a/vault/core.go +++ b/vault/core.go @@ -1,6 +1,7 @@ package vault import ( + "context" "crypto" "crypto/ecdsa" "crypto/subtle" @@ -18,7 +19,6 @@ import ( "github.com/armon/go-metrics" log "github.com/mgutz/logxi/v1" - "golang.org/x/net/context" "google.golang.org/grpc" "github.com/hashicorp/errwrap" diff --git a/vault/expiration.go b/vault/expiration.go index 8c88ba63f4c8..23176ae4453f 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -1,6 +1,7 @@ package vault import ( + "context" "encoding/json" "errors" "fmt" @@ -12,7 +13,6 @@ import ( "github.com/armon/go-metrics" log "github.com/mgutz/logxi/v1" - context "golang.org/x/net/context" "github.com/hashicorp/errwrap" multierror "github.com/hashicorp/go-multierror" diff --git a/vault/request_forwarding.go b/vault/request_forwarding.go index 10e6a9a5781e..d031566d3927 100644 --- a/vault/request_forwarding.go +++ b/vault/request_forwarding.go @@ -1,6 +1,7 @@ package vault import ( + "context" "crypto/tls" "crypto/x509" "fmt" @@ -13,7 +14,6 @@ import ( "time" "github.com/hashicorp/vault/helper/forwarding" - "golang.org/x/net/context" "golang.org/x/net/http2" "google.golang.org/grpc" "google.golang.org/grpc/keepalive"