diff --git a/client/fingerprint/vault.go b/client/fingerprint/vault.go index 5639e861c48..c887ddcef1e 100644 --- a/client/fingerprint/vault.go +++ b/client/fingerprint/vault.go @@ -7,6 +7,7 @@ import ( "time" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/helper" vapi "github.com/hashicorp/vault/api" ) @@ -78,5 +79,11 @@ func (f *VaultFingerprint) Fingerprint(req *FingerprintRequest, resp *Fingerprin } func (f *VaultFingerprint) Periodic() (bool, time.Duration) { + if f.lastState == vaultAvailable { + // Fingerprint infrequently once Vault is initially discovered with wide + // jitter to avoid thundering herds of fingerprints against central Vault + // servers. + return true, (30 * time.Second) + helper.RandomStagger(90*time.Second) + } return true, 15 * time.Second } diff --git a/client/fingerprint/vault_test.go b/client/fingerprint/vault_test.go index c56ff91381a..32cd8f3b412 100644 --- a/client/fingerprint/vault_test.go +++ b/client/fingerprint/vault_test.go @@ -2,6 +2,7 @@ package fingerprint import ( "testing" + "time" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/config" @@ -21,6 +22,14 @@ func TestVaultFingerprint(t *testing.T) { Attributes: make(map[string]string), } + p, period := fp.Periodic() + if !p { + t.Fatalf("expected fingerprint to be periodic") + } + if period != (15 * time.Second) { + t.Fatalf("expected period to be 15s but found: %s", period) + } + conf := config.DefaultConfig() conf.VaultConfig = tv.Config @@ -40,6 +49,16 @@ func TestVaultFingerprint(t *testing.T) { assertNodeAttributeContains(t, response.Attributes, "vault.cluster_id") assertNodeAttributeContains(t, response.Attributes, "vault.cluster_name") + // Period should be longer after initial discovery + p, period = fp.Periodic() + if !p { + t.Fatalf("expected fingerprint to be periodic") + } + if period < (30*time.Second) || period > (2*time.Minute) { + t.Fatalf("expected period to be between 30s and 2m but found: %s", period) + } + + // Stop Vault to simulate it being unavailable tv.Stop() err = fp.Fingerprint(request, &response) @@ -56,4 +75,12 @@ func TestVaultFingerprint(t *testing.T) { assertNodeAttributeContains(t, response.Attributes, "vault.cluster_id") assertNodeAttributeContains(t, response.Attributes, "vault.cluster_name") + // Period should be original once trying to discover Vault is available again + p, period = fp.Periodic() + if !p { + t.Fatalf("expected fingerprint to be periodic") + } + if period != (15 * time.Second) { + t.Fatalf("expected period to be 15s but found: %s", period) + } } diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index 8a08a077a2c..1886c8df551 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -142,9 +142,9 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { appliedFingerprints = append(appliedFingerprints, name) } - p, period := f.Periodic() + p, _ := f.Periodic() if p { - go fm.runFingerprint(f, period, name) + go fm.runFingerprint(f, name) } if rfp, ok := f.(fingerprint.ReloadableFingerprint); ok { @@ -157,8 +157,9 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { } // runFingerprint runs each fingerprinter individually on an ongoing basis -func (fm *FingerprintManager) runFingerprint(f fingerprint.Fingerprint, period time.Duration, name string) { - fm.logger.Debug("fingerprinting periodically", "fingerprinter", name, "period", period) +func (fm *FingerprintManager) runFingerprint(f fingerprint.Fingerprint, name string) { + _, period := f.Periodic() + fm.logger.Debug("fingerprinting periodically", "fingerprinter", name, "initial_period", period) timer := time.NewTimer(period) defer timer.Stop() @@ -166,14 +167,14 @@ func (fm *FingerprintManager) runFingerprint(f fingerprint.Fingerprint, period t for { select { case <-timer.C: - timer.Reset(period) - _, err := fm.fingerprint(name, f) if err != nil { fm.logger.Debug("error periodic fingerprinting", "error", err, "fingerprinter", name) continue } + _, period = f.Periodic() + timer.Reset(period) case <-fm.shutdownCh: return } diff --git a/website/content/docs/upgrade/upgrade-specific.mdx b/website/content/docs/upgrade/upgrade-specific.mdx index b9eb22281c3..9a98d2d20fb 100644 --- a/website/content/docs/upgrade/upgrade-specific.mdx +++ b/website/content/docs/upgrade/upgrade-specific.mdx @@ -62,7 +62,7 @@ the `-json` flag is provided. Nomad clients no longer have their Consul and Vault fingerprints cleared when connectivity is lost with Consul and Vault. To intentionally remove Consul and -Vault from a client node, you will need to restart the client. +Vault from a client node, you will need to restart the Nomad client agent. ## Nomad 1.3.3