diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
index 31cecd30b56a..8563e1c0cc75 100644
--- a/docs/generated/settings/settings-for-tenants.txt
+++ b/docs/generated/settings/settings-for-tenants.txt
@@ -52,6 +52,7 @@ server.auth_log.sql_connections.enabled boolean false if set, log SQL client con
server.auth_log.sql_sessions.enabled boolean false if set, log SQL session login/disconnection events (note: may hinder performance on loaded nodes) tenant-rw
server.authentication_cache.enabled boolean true enables a cache used during authentication to avoid lookups to system tables when retrieving per-user authentication-related information tenant-rw
server.child_metrics.enabled boolean false enables the exporting of child metrics, additional prometheus time series with extra labels tenant-rw
+server.client_cert_expiration_cache.capacity integer 1000 the maximum number of client cert expirations stored tenant-rw
server.clock.forward_jump_check_enabled boolean false if enabled, forward clock jumps > max_offset/2 will cause a panic tenant-rw
server.clock.persist_upper_bound_interval duration 0s the interval between persisting the wall time upper bound of the clock. The clock does not generate a wall time greater than the persisted timestamp and will panic if it sees a wall time greater than this value. When cockroach starts, it waits for the wall time to catch-up till this persisted timestamp. This guarantees monotonic wall time across server restarts. Not setting this or setting a value of 0 disables this feature. tenant-rw
server.eventlog.enabled boolean true if set, logged notable events are also stored in the table system.eventlog tenant-rw
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
index efc37fc73279..1d8a39ed285e 100644
--- a/docs/generated/settings/settings.html
+++ b/docs/generated/settings/settings.html
@@ -79,6 +79,7 @@
server.auth_log.sql_sessions.enabled
| boolean | false | if set, log SQL session login/disconnection events (note: may hinder performance on loaded nodes) | Serverless/Dedicated/Self-Hosted |
server.authentication_cache.enabled
| boolean | true | enables a cache used during authentication to avoid lookups to system tables when retrieving per-user authentication-related information | Serverless/Dedicated/Self-Hosted |
server.child_metrics.enabled
| boolean | false | enables the exporting of child metrics, additional prometheus time series with extra labels | Serverless/Dedicated/Self-Hosted |
+server.client_cert_expiration_cache.capacity
| integer | 1000 | the maximum number of client cert expirations stored | Serverless/Dedicated/Self-Hosted |
server.clock.forward_jump_check_enabled
| boolean | false | if enabled, forward clock jumps > max_offset/2 will cause a panic | Serverless/Dedicated/Self-Hosted |
server.clock.persist_upper_bound_interval
| duration | 0s | the interval between persisting the wall time upper bound of the clock. The clock does not generate a wall time greater than the persisted timestamp and will panic if it sees a wall time greater than this value. When cockroach starts, it waits for the wall time to catch-up till this persisted timestamp. This guarantees monotonic wall time across server restarts. Not setting this or setting a value of 0 disables this feature. | Serverless/Dedicated/Self-Hosted |
server.consistency_check.max_rate
| byte size | 8.0 MiB | the rate limit (bytes/sec) to use for consistency checks; used in conjunction with server.consistency_check.interval to control the frequency of consistency checks. Note that setting this too high can negatively impact performance. | Dedicated/Self-Hosted |
diff --git a/pkg/security/BUILD.bazel b/pkg/security/BUILD.bazel
index ae463214c161..bdfaedaa3b2a 100644
--- a/pkg/security/BUILD.bazel
+++ b/pkg/security/BUILD.bazel
@@ -6,8 +6,10 @@ go_library(
srcs = [
"auth.go",
"auto_tls_init.go",
+ "cert_expiry_cache.go",
"certificate_loader.go",
"certificate_manager.go",
+ "certificate_metrics.go",
"certs.go",
"join_token.go",
"ocsp.go",
@@ -31,11 +33,14 @@ go_library(
"//pkg/server/telemetry",
"//pkg/settings",
"//pkg/settings/cluster",
+ "//pkg/util/cache",
"//pkg/util/encoding",
"//pkg/util/envutil",
"//pkg/util/log",
"//pkg/util/log/eventpb",
"//pkg/util/metric",
+ "//pkg/util/metric/aggmetric",
+ "//pkg/util/mon",
"//pkg/util/quotapool",
"//pkg/util/randutil",
"//pkg/util/stop",
@@ -58,6 +63,7 @@ go_test(
srcs = [
"auth_test.go",
"auto_tls_init_test.go",
+ "cert_expiry_cache_test.go",
"certificate_loader_test.go",
"certificate_manager_test.go",
"certs_rotation_test.go",
@@ -81,12 +87,17 @@ go_test(
"//pkg/security/securitytest",
"//pkg/security/username",
"//pkg/server",
+ "//pkg/settings/cluster",
"//pkg/testutils",
"//pkg/testutils/serverutils",
"//pkg/util/envutil",
"//pkg/util/leaktest",
"//pkg/util/log",
+ "//pkg/util/metric",
+ "//pkg/util/metric/aggmetric",
+ "//pkg/util/mon",
"//pkg/util/randutil",
+ "//pkg/util/stop",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
diff --git a/pkg/security/auth.go b/pkg/security/auth.go
index c586e41defd6..0f7ee0992e59 100644
--- a/pkg/security/auth.go
+++ b/pkg/security/auth.go
@@ -143,7 +143,11 @@ func Contains(sl []string, s string) bool {
// UserAuthCertHook builds an authentication hook based on the security
// mode and client certificate.
func UserAuthCertHook(
- insecureMode bool, tlsState *tls.ConnectionState, tenantID roachpb.TenantID,
+ insecureMode bool,
+ tlsState *tls.ConnectionState,
+ tenantID roachpb.TenantID,
+ certManager *CertificateManager,
+ cache *ClientCertExpirationCache,
) (UserAuthHook, error) {
var certUserScope []CertificateUserScope
if !insecureMode {
@@ -177,14 +181,24 @@ func UserAuthCertHook(
return nil
}
+ peerCert := tlsState.PeerCertificates[0]
+
// The client certificate should not be a tenant client type. For now just
// check that it doesn't have OU=Tenants. It would make sense to add
// explicit OU=Users to all client certificates and to check for match.
- if IsTenantCertificate(tlsState.PeerCertificates[0]) {
+ if IsTenantCertificate(peerCert) {
return errors.Errorf("using tenant client certificate as user certificate is not allowed")
}
if ValidateUserScope(certUserScope, systemIdentity.Normalized(), tenantID) {
+ if certManager != nil {
+ cache.MaybeUpsert(
+ ctx,
+ systemIdentity.Normalized(),
+ peerCert.NotAfter.Unix(),
+ certManager.certMetrics.ClientExpiration,
+ )
+ }
return nil
}
return errors.WithDetailf(errors.Errorf("certificate authentication failed for user %q", systemIdentity),
diff --git a/pkg/security/auth_test.go b/pkg/security/auth_test.go
index b4de758a3410..62fd88323d0a 100644
--- a/pkg/security/auth_test.go
+++ b/pkg/security/auth_test.go
@@ -321,7 +321,13 @@ func TestAuthenticationHook(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- hook, err := security.UserAuthCertHook(tc.insecure, makeFakeTLSState(t, tc.tlsSpec), tc.tenantID)
+ hook, err := security.UserAuthCertHook(
+ tc.insecure,
+ makeFakeTLSState(t, tc.tlsSpec),
+ tc.tenantID,
+ nil, /* certManager */
+ nil, /* cache */
+ )
if (err == nil) != tc.buildHookSuccess {
t.Fatalf("expected success=%t, got err=%v", tc.buildHookSuccess, err)
}
diff --git a/pkg/security/cert_expiry_cache.go b/pkg/security/cert_expiry_cache.go
new file mode 100644
index 000000000000..07a5f8a2c618
--- /dev/null
+++ b/pkg/security/cert_expiry_cache.go
@@ -0,0 +1,227 @@
+// Copyright 2023 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package security
+
+import (
+ "context"
+ math_rand "math/rand"
+ "time"
+ "unsafe"
+
+ "github.com/cockroachdb/cockroach/pkg/settings"
+ "github.com/cockroachdb/cockroach/pkg/settings/cluster"
+ "github.com/cockroachdb/cockroach/pkg/util/cache"
+ "github.com/cockroachdb/cockroach/pkg/util/log"
+ "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
+ "github.com/cockroachdb/cockroach/pkg/util/mon"
+ "github.com/cockroachdb/cockroach/pkg/util/stop"
+ "github.com/cockroachdb/cockroach/pkg/util/syncutil"
+ "github.com/cockroachdb/cockroach/pkg/util/timeutil"
+)
+
+// CacheCapacityMax is set arbitrarily high; configurable later if needed.
+const CacheCapacityMax = 65000
+
+// ClientCertExpirationCacheCapacity is the cluster setting that controls the
+// maximum number of client cert expirations in the cache.
+var ClientCertExpirationCacheCapacity = settings.RegisterIntSetting(
+ settings.TenantWritable,
+ "server.client_cert_expiration_cache.capacity",
+ "the maximum number of client cert expirations stored",
+ 1000,
+).WithPublic()
+
+// ClientCertExpirationCache contains a cache of gauge objects keyed by
+// SQL username strings. It is a FIFO cache that stores gauges valued by
+// minimum expiration of the client certs seen (per user).
+type ClientCertExpirationCache struct {
+ mu struct {
+ // NB: Cannot be a RWMutex for Get because UnorderedCache.Get manipulates
+ // an internal hashmap.
+ syncutil.Mutex
+ cache *cache.UnorderedCache
+ acc mon.BoundAccount
+ }
+ settings *cluster.Settings
+ stopper *stop.Stopper
+ mon *mon.BytesMonitor
+ timeSrc interface{}
+}
+
+func NewClientCertExpirationCache(
+ ctx context.Context,
+ st *cluster.Settings,
+ stopper *stop.Stopper,
+ timeSrc timeutil.TimeSource,
+ parentMon *mon.BytesMonitor,
+) *ClientCertExpirationCache {
+ c := &ClientCertExpirationCache{settings: st}
+ c.stopper = stopper
+
+ switch timeSrc := timeSrc.(type) {
+ case *timeutil.DefaultTimeSource, *timeutil.ManualTime:
+ c.timeSrc = timeSrc
+ default:
+ c.timeSrc = &timeutil.DefaultTimeSource{}
+ }
+
+ c.mu.cache = cache.NewUnorderedCache(cache.Config{
+ Policy: cache.CacheFIFO,
+ ShouldEvict: func(size int, _, value interface{}) bool {
+ var capacity int64
+ settingCapacity := ClientCertExpirationCacheCapacity.Get(&st.SV)
+ if settingCapacity < CacheCapacityMax {
+ capacity = settingCapacity
+ } else {
+ capacity = CacheCapacityMax
+ }
+ return int64(size) > capacity
+ },
+ OnEvictedEntry: func(entry *cache.Entry) {
+ gauge := entry.Value.(*aggmetric.Gauge)
+ gauge.Unlink()
+ c.mu.acc.Shrink(ctx, int64(unsafe.Sizeof(*gauge)))
+ },
+ })
+ c.mon = mon.NewMonitorInheritWithLimit(
+ "client-expiration-cache", 0 /* limit */, parentMon,
+ )
+ c.mu.acc = c.mon.MakeBoundAccount()
+ c.mon.StartNoReserved(ctx, parentMon)
+
+ // Begin an async task to periodically evict entries associated with
+ // expiration values that are in the past.
+ if err := c.startPurgePastExpirations(ctx); err != nil {
+ log.Ops.Warningf(
+ ctx, "failed to initiate periodic purge of expiration cache entries: %v", err,
+ )
+ }
+
+ return c
+}
+
+// Get retrieves the cert expiration for the given username, if it exists.
+// An expiration of 0 indicates an entry was not found.
+func (c *ClientCertExpirationCache) Get(key string) (int64, bool) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ value, ok := c.mu.cache.Get(key)
+ if !ok {
+ return 0, ok
+ }
+ // If the expiration has already been reached, remove the entry and indicate
+ // that the entry was not found.
+ gauge := value.(*aggmetric.Gauge)
+ if gauge.Value() < c.timeNow() {
+ c.mu.cache.Del(key)
+ return 0, false
+ }
+ return gauge.Value(), ok
+}
+
+// MaybeUpsert may update or insert a client cert expiration gauge for a
+// particular user into the cache. An update is contingent on whether the
+// old expiration is after the new expiration. This ensures that the cache
+// maintains the minimum expiration for each user.
+func (c *ClientCertExpirationCache) MaybeUpsert(
+ ctx context.Context, key string, newExpiry int64, parentGauge *aggmetric.AggGauge,
+) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+
+ value, ok := c.mu.cache.Get(key)
+ if !ok {
+ err := c.mu.acc.Grow(ctx, int64(unsafe.Sizeof(aggmetric.Gauge{})))
+ if err == nil {
+ // Only create new gauges for expirations in the future.
+ if newExpiry > c.timeNow() {
+ gauge := parentGauge.AddChild(key)
+ gauge.Update(newExpiry)
+ c.mu.cache.Add(key, gauge)
+ }
+ } else {
+ log.Ops.Warningf(ctx, "no memory available to cache cert expiry: %v", err)
+ }
+ } else if gauge := value.(*aggmetric.Gauge); newExpiry < gauge.Value() || gauge.Value() == 0 {
+ gauge.Update(newExpiry)
+ }
+}
+
+func (c *ClientCertExpirationCache) Clear() {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ c.mu.cache.Clear()
+}
+
+// Len returns the number of cert expirations in the cache.
+func (c *ClientCertExpirationCache) Len() int {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ return c.mu.cache.Len()
+}
+
+// timeNow returns the current time depending on the time source of the cache.
+func (c *ClientCertExpirationCache) timeNow() int64 {
+ if timeSrc, ok := c.timeSrc.(timeutil.TimeSource); ok {
+ return timeSrc.Now().Unix()
+ }
+ return timeutil.Now().Unix()
+}
+
+// startPurgePastExpirations runs an infinite loop in a goroutine which
+// regularly evicts entries associated with expiration values that have already
+// passed.
+func (c *ClientCertExpirationCache) startPurgePastExpirations(ctx context.Context) error {
+ return c.stopper.RunAsyncTask(ctx, "purge-cert-expiry-cache", func(context.Context) {
+ const period = time.Hour
+
+ timer := timeutil.NewTimer()
+ defer timer.Stop()
+
+ timer.Reset(jitteredInterval(period))
+ for ; ; timer.Reset(period) {
+ select {
+ case <-timer.C:
+ timer.Read = true
+ c.PurgePastExpirations()
+ case <-c.stopper.ShouldQuiesce():
+ return
+ case <-ctx.Done():
+ return
+ }
+ }
+ },
+ )
+}
+
+// PurgePastExpirations removes entries associated with expiration values that
+// have already passed. This helps ensure that the cache contains gauges
+// with expiration values in the future only.
+func (c *ClientCertExpirationCache) PurgePastExpirations() {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ var deleteEntryKeys []interface{}
+ c.mu.cache.Do(func(entry *cache.Entry) {
+ gauge := entry.Value.(*aggmetric.Gauge)
+ if gauge.Value() <= c.timeNow() {
+ deleteEntryKeys = append(deleteEntryKeys, entry.Key)
+ }
+ })
+ for _, key := range deleteEntryKeys {
+ c.mu.cache.Del(key)
+ }
+}
+
+// jitteredInterval returns a randomly jittered (+/-25%) duration
+// from the interval.
+func jitteredInterval(interval time.Duration) time.Duration {
+ return time.Duration(float64(interval) * (0.75 + 0.5*math_rand.Float64()))
+}
diff --git a/pkg/security/cert_expiry_cache_test.go b/pkg/security/cert_expiry_cache_test.go
new file mode 100644
index 000000000000..c899a6a328ab
--- /dev/null
+++ b/pkg/security/cert_expiry_cache_test.go
@@ -0,0 +1,232 @@
+// Copyright 2023 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package security_test
+
+import (
+ "context"
+ "math"
+ "sync"
+ "testing"
+
+ "github.com/cockroachdb/cockroach/pkg/security"
+ "github.com/cockroachdb/cockroach/pkg/settings/cluster"
+ "github.com/cockroachdb/cockroach/pkg/util/leaktest"
+ "github.com/cockroachdb/cockroach/pkg/util/metric"
+ "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
+ "github.com/cockroachdb/cockroach/pkg/util/mon"
+ "github.com/cockroachdb/cockroach/pkg/util/stop"
+ "github.com/cockroachdb/cockroach/pkg/util/timeutil"
+ "github.com/stretchr/testify/require"
+)
+
+func TestEntryCache(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+
+ const (
+ fooUser = "foo"
+ barUser = "bar"
+ blahUser = "blah"
+ fakeUser = "fake"
+
+ laterExpiration = int64(1684359292)
+ closerExpiration = int64(1584359292)
+ )
+
+ ctx := context.Background()
+
+ // Create a cache with a capacity of 3.
+ cache, metric := newCache(
+ ctx,
+ &cluster.Settings{},
+ 3, /* capacity */
+ timeutil.NewManualTime(timeutil.Unix(0, 123)),
+ )
+ require.Equal(t, 0, cache.Len())
+
+ // Verify insert.
+ cache.MaybeUpsert(ctx, fooUser, laterExpiration, metric)
+ require.Equal(t, 1, cache.Len())
+
+ // Verify update.
+ cache.MaybeUpsert(ctx, fooUser, closerExpiration, metric)
+ require.Equal(t, 1, cache.Len())
+
+ // Verify retrieval.
+ expiration, found := cache.Get(fooUser)
+ require.Equal(t, true, found)
+ require.Equal(t, closerExpiration, expiration)
+
+ // Verify the cache retains the minimum expiration for a user, assuming no
+ // eviction.
+ cache.MaybeUpsert(ctx, barUser, closerExpiration, metric)
+ require.Equal(t, 2, cache.Len())
+ cache.MaybeUpsert(ctx, barUser, laterExpiration, metric)
+ require.Equal(t, 2, cache.Len())
+ expiration, found = cache.Get(barUser)
+ require.Equal(t, true, found)
+ require.Equal(t, closerExpiration, expiration)
+
+ // Verify indication of absence for non-existent values.
+ expiration, found = cache.Get(fakeUser)
+ require.Equal(t, false, found)
+ require.Equal(t, int64(0), expiration)
+
+ // Verify eviction when the capacity is exceeded.
+ cache.MaybeUpsert(ctx, blahUser, laterExpiration, metric)
+ require.Equal(t, 3, cache.Len())
+ cache.MaybeUpsert(ctx, fakeUser, closerExpiration, metric)
+ require.Equal(t, 3, cache.Len())
+ _, found = cache.Get(fooUser)
+ require.Equal(t, false, found)
+ _, found = cache.Get(barUser)
+ require.Equal(t, true, found)
+
+ // Verify previous entries can be inserted after the cache is cleared.
+ cache.Clear()
+ require.Equal(t, 0, cache.Len())
+ _, found = cache.Get(fooUser)
+ require.Equal(t, false, found)
+ _, found = cache.Get(barUser)
+ require.Equal(t, false, found)
+ cache.MaybeUpsert(ctx, fooUser, laterExpiration, metric)
+ require.Equal(t, 1, cache.Len())
+ cache.MaybeUpsert(ctx, barUser, laterExpiration, metric)
+ require.Equal(t, 2, cache.Len())
+ expiration, found = cache.Get(fooUser)
+ require.Equal(t, true, found)
+ require.Equal(t, laterExpiration, expiration)
+ expiration, found = cache.Get(barUser)
+ require.Equal(t, true, found)
+ require.Equal(t, laterExpiration, expiration)
+
+ // Verify expirations in the past cannot be inserted into the cache.
+ cache.Clear()
+ cache.MaybeUpsert(ctx, fooUser, int64(0), metric)
+ require.Equal(t, 0, cache.Len())
+}
+
+func TestPurgePastEntries(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+
+ const (
+ fooUser = "foo"
+ barUser = "bar"
+ blahUser = "blah"
+ bazUser = "baz"
+
+ pastExpiration1 = int64(1000000000)
+ pastExpiration2 = int64(2000000000)
+ futureExpiration = int64(3000000000)
+ )
+
+ ctx := context.Background()
+
+ // Create a cache with a capacity of 3.
+ clock := timeutil.NewManualTime(timeutil.Unix(0, 123))
+ cache, metric := newCache(ctx, &cluster.Settings{}, 4 /* capacity */, clock)
+
+ // Insert entries that we expect to be cleaned up after advancing in time.
+ cache.MaybeUpsert(ctx, fooUser, pastExpiration1, metric)
+ cache.MaybeUpsert(ctx, barUser, pastExpiration2, metric)
+ cache.MaybeUpsert(ctx, blahUser, pastExpiration2, metric)
+ // Insert an entry that should NOT be removed after advancing in time
+ // because it is still in the future.
+ cache.MaybeUpsert(ctx, bazUser, futureExpiration, metric)
+ require.Equal(t, 4, cache.Len())
+
+ // Advance time so that expirations have been reached already.
+ clock.AdvanceTo(timeutil.Unix(2000000000, 123))
+
+ // Verify an expiration from the past cannot be retrieved. Confirm it has
+ // been removed after the attempt as well.
+ _, found := cache.Get(fooUser)
+ require.Equal(t, false, found)
+ require.Equal(t, 3, cache.Len())
+
+ // Verify that when the cache gets cleaned of the past expirations.
+ // Confirm that expirations in the future do not get removed.
+ cache.PurgePastExpirations()
+ require.Equal(t, 1, cache.Len())
+ _, found = cache.Get(bazUser)
+ require.Equal(t, true, found)
+}
+
+// TestConcurrentUpdates ensures that concurrent updates do not race with each
+// other.
+func TestConcurrentUpdates(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+
+ ctx := context.Background()
+ st := &cluster.Settings{}
+
+ // Create a cache with a large capacity.
+ cache, metric := newCache(
+ ctx,
+ st,
+ 10000, /* capacity */
+ timeutil.NewManualTime(timeutil.Unix(0, 123)),
+ )
+
+ var (
+ user = "testUser"
+ expiration = int64(1684359292)
+ )
+
+ // NB: N is chosen based on the race detector's limit of 8128 goroutines.
+ const N = 8000
+ var wg sync.WaitGroup
+ wg.Add(N)
+ for i := 0; i < N; i++ {
+ go func(i int) {
+ if i%2 == 1 {
+ cache.MaybeUpsert(ctx, user, expiration, metric)
+ } else {
+ cache.Clear()
+ }
+ wg.Done()
+ }(i)
+ }
+ wg.Wait()
+ cache.Clear()
+}
+
+func BenchmarkCertExpirationCacheInsert(b *testing.B) {
+ ctx := context.Background()
+ st := &cluster.Settings{}
+ clock := timeutil.NewManualTime(timeutil.Unix(0, 123))
+ cache, metric := newCache(ctx, st, 1000 /* capacity */, clock)
+
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ cache.MaybeUpsert(ctx, "foo", clock.Now().Unix(), metric)
+ cache.MaybeUpsert(ctx, "bar", clock.Now().Unix(), metric)
+ cache.MaybeUpsert(ctx, "blah", clock.Now().Unix(), metric)
+ }
+}
+
+func newCache(
+ ctx context.Context, st *cluster.Settings, capacity int, clock *timeutil.ManualTime,
+) (*security.ClientCertExpirationCache, *aggmetric.AggGauge) {
+ stopper := stop.NewStopper()
+ defer stopper.Stop(ctx)
+ security.ClientCertExpirationCacheCapacity.Override(ctx, &st.SV, int64(capacity))
+ parentMon := mon.NewUnlimitedMonitor(
+ ctx,
+ "test", /* name */
+ mon.MemoryResource,
+ nil, /* currCount */
+ nil, /* maxHist */
+ math.MaxInt64,
+ st,
+ )
+ cache := security.NewClientCertExpirationCache(ctx, st, stopper, clock, parentMon)
+ return cache, aggmetric.MakeBuilder(security.SQLUserLabel).Gauge(metric.Metadata{})
+}
diff --git a/pkg/security/certificate_manager.go b/pkg/security/certificate_manager.go
index d63cbb9a06de..f6ef0aa3d970 100644
--- a/pkg/security/certificate_manager.go
+++ b/pkg/security/certificate_manager.go
@@ -26,58 +26,6 @@ import (
"github.com/cockroachdb/errors"
)
-var (
- metaCAExpiration = metric.Metadata{
- Name: "security.certificate.expiration.ca",
- Help: "Expiration for the CA certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
- metaClientCAExpiration = metric.Metadata{
- Name: "security.certificate.expiration.client-ca",
- Help: "Expiration for the client CA certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
- metaUICAExpiration = metric.Metadata{
- Name: "security.certificate.expiration.ui-ca",
- Help: "Expiration for the UI CA certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
- metaNodeExpiration = metric.Metadata{
- Name: "security.certificate.expiration.node",
- Help: "Expiration for the node certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
- metaNodeClientExpiration = metric.Metadata{
- Name: "security.certificate.expiration.node-client",
- Help: "Expiration for the node's client certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
- metaUIExpiration = metric.Metadata{
- Name: "security.certificate.expiration.ui",
- Help: "Expiration for the UI certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
-
- metaTenantCAExpiration = metric.Metadata{
- Name: "security.certificate.expiration.ca-client-tenant",
- Help: "Expiration for the Tenant Client CA certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
- metaTenantExpiration = metric.Metadata{
- Name: "security.certificate.expiration.client-tenant",
- Help: "Expiration for the Tenant Client certificate. 0 means no certificate or error.",
- Measurement: "Certificate Expiration",
- Unit: metric.Unit_TIMESTAMP_SEC,
- }
-)
-
// CertificateManager lives for the duration of the process and manages certificates and keys.
// It reloads all certificates when triggered and construct tls.Config objects for
// servers or clients.
@@ -106,7 +54,7 @@ type CertificateManager struct {
// The metrics struct is initialized at init time and metrics do their
// own locking.
- certMetrics CertificateMetrics
+ certMetrics Metrics
// mu protects all remaining fields.
mu syncutil.RWMutex
@@ -138,20 +86,6 @@ type CertificateManager struct {
tenantConfig *tls.Config
}
-// CertificateMetrics holds metrics about the various certificates.
-// These are initialized when the certificate manager is created and updated
-// on reload.
-type CertificateMetrics struct {
- CAExpiration *metric.Gauge
- ClientCAExpiration *metric.Gauge
- UICAExpiration *metric.Gauge
- NodeExpiration *metric.Gauge
- NodeClientExpiration *metric.Gauge
- UIExpiration *metric.Gauge
- TenantCAExpiration *metric.Gauge
- TenantExpiration *metric.Gauge
-}
-
func makeCertificateManager(
certsDir string, tlsSettings TLSSettings, opts ...Option,
) *CertificateManager {
@@ -164,16 +98,7 @@ func makeCertificateManager(
Locator: certnames.MakeLocator(certsDir),
tenantIdentifier: o.tenantIdentifier,
tlsSettings: tlsSettings,
- certMetrics: CertificateMetrics{
- CAExpiration: metric.NewGauge(metaCAExpiration),
- ClientCAExpiration: metric.NewGauge(metaClientCAExpiration),
- UICAExpiration: metric.NewGauge(metaUICAExpiration),
- NodeExpiration: metric.NewGauge(metaNodeExpiration),
- NodeClientExpiration: metric.NewGauge(metaNodeClientExpiration),
- UIExpiration: metric.NewGauge(metaUIExpiration),
- TenantCAExpiration: metric.NewGauge(metaTenantCAExpiration),
- TenantExpiration: metric.NewGauge(metaTenantExpiration),
- },
+ certMetrics: makeMetrics(),
}
}
@@ -225,7 +150,7 @@ func (cm *CertificateManager) IsForTenant() bool {
}
// Metrics returns the metrics struct.
-func (cm *CertificateManager) Metrics() CertificateMetrics {
+func (cm *CertificateManager) Metrics() Metrics {
return cm.certMetrics
}
diff --git a/pkg/security/certificate_metrics.go b/pkg/security/certificate_metrics.go
new file mode 100644
index 000000000000..7388b810c21c
--- /dev/null
+++ b/pkg/security/certificate_metrics.go
@@ -0,0 +1,119 @@
+// Copyright 2023 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package security
+
+import (
+ "github.com/cockroachdb/cockroach/pkg/util/metric"
+ "github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
+)
+
+const SQLUserLabel = "sql_user"
+
+// Metrics is a metric.Struct for certificates.
+type Metrics struct {
+ // CA certificate expirations.
+ CAExpiration *metric.Gauge
+ ClientCAExpiration *metric.Gauge
+ UICAExpiration *metric.Gauge
+ TenantCAExpiration *metric.Gauge
+
+ // Certificate expirations.
+ NodeExpiration *metric.Gauge
+ NodeClientExpiration *metric.Gauge
+ UIExpiration *metric.Gauge
+ TenantExpiration *metric.Gauge
+ // ClientExpiration is an aggregate metric, containing child metrics for all
+ // users that have done cert auth with this node.
+ // The children are labeled by SQL user.
+ // The top-level aggregated value for this metric is not meaningful
+ // (it sums up all the minimum expirations of all users).
+ ClientExpiration *aggmetric.AggGauge
+}
+
+var _ metric.Struct = (*Metrics)(nil)
+
+// MetricStruct indicates that Metrics is a metric.Struct.
+func (m *Metrics) MetricStruct() {}
+
+var (
+ metaCAExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.ca",
+ Help: "Expiration for the CA certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+ metaClientCAExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.client-ca",
+ Help: "Expiration for the client CA certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+ metaClientExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.client",
+ Help: "Minimum expiration for client certificates, labeled by SQL user. 0 means no " +
+ "certificate or error. ",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+ metaUICAExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.ui-ca",
+ Help: "Expiration for the UI CA certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+ metaNodeExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.node",
+ Help: "Expiration for the node certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+ metaNodeClientExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.node-client",
+ Help: "Expiration for the node's client certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+ metaUIExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.ui",
+ Help: "Expiration for the UI certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+
+ metaTenantCAExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.ca-client-tenant",
+ Help: "Expiration for the Tenant Client CA certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+ metaTenantExpiration = metric.Metadata{
+ Name: "security.certificate.expiration.client-tenant",
+ Help: "Expiration for the Tenant Client certificate. 0 means no certificate or error.",
+ Measurement: "Certificate Expiration",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }
+)
+
+func makeMetrics() Metrics {
+ b := aggmetric.MakeBuilder(SQLUserLabel)
+ m := Metrics{
+ CAExpiration: metric.NewGauge(metaCAExpiration),
+ ClientCAExpiration: metric.NewGauge(metaClientCAExpiration),
+ TenantCAExpiration: metric.NewGauge(metaTenantCAExpiration),
+ UICAExpiration: metric.NewGauge(metaUICAExpiration),
+ ClientExpiration: b.Gauge(metaClientExpiration),
+ TenantExpiration: metric.NewGauge(metaTenantExpiration),
+ NodeExpiration: metric.NewGauge(metaNodeExpiration),
+ NodeClientExpiration: metric.NewGauge(metaNodeClientExpiration),
+ UIExpiration: metric.NewGauge(metaUIExpiration),
+ }
+ return m
+}
diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go
index 54aa2f81576f..de5599c26577 100644
--- a/pkg/server/server_sql.go
+++ b/pkg/server/server_sql.go
@@ -48,6 +48,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/rpc"
"github.com/cockroachdb/cockroach/pkg/rpc/nodedialer"
"github.com/cockroachdb/cockroach/pkg/scheduledjobs"
+ "github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/security/clientsecopts"
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/server/autoconfig"
@@ -945,35 +946,42 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) {
storageEngineClient := kvserver.NewStorageEngineClient(cfg.nodeDialer)
*execCfg = sql.ExecutorConfig{
- Settings: cfg.Settings,
- NodeInfo: nodeInfo,
- Codec: codec,
- DefaultZoneConfig: &cfg.DefaultZoneConfig,
- Locality: cfg.Locality,
- AmbientCtx: cfg.AmbientCtx,
- DB: cfg.db,
- Gossip: cfg.gossip,
- NodeLiveness: cfg.nodeLiveness,
- SystemConfig: cfg.systemConfigWatcher,
- MetricsRecorder: cfg.recorder,
- DistSender: cfg.distSender,
- RPCContext: cfg.rpcContext,
- LeaseManager: leaseMgr,
- TenantStatusServer: cfg.tenantStatusServer,
- Clock: cfg.clock,
- DistSQLSrv: distSQLServer,
- NodesStatusServer: cfg.nodesStatusServer,
- SQLStatusServer: cfg.sqlStatusServer,
- SessionRegistry: cfg.sessionRegistry,
- ClosedSessionCache: cfg.closedSessionCache,
- ContentionRegistry: contentionRegistry,
- SQLLiveness: cfg.sqlLivenessProvider,
- JobRegistry: jobRegistry,
- VirtualSchemas: virtualSchemas,
- HistogramWindowInterval: cfg.HistogramWindowInterval(),
- RangeDescriptorCache: cfg.distSender.RangeDescriptorCache(),
- RoleMemberCache: sql.NewMembershipCache(serverCacheMemoryMonitor.MakeBoundAccount(), cfg.stopper),
- SessionInitCache: sessioninit.NewCache(serverCacheMemoryMonitor.MakeBoundAccount(), cfg.stopper),
+ Settings: cfg.Settings,
+ NodeInfo: nodeInfo,
+ Codec: codec,
+ DefaultZoneConfig: &cfg.DefaultZoneConfig,
+ Locality: cfg.Locality,
+ AmbientCtx: cfg.AmbientCtx,
+ DB: cfg.db,
+ Gossip: cfg.gossip,
+ NodeLiveness: cfg.nodeLiveness,
+ SystemConfig: cfg.systemConfigWatcher,
+ MetricsRecorder: cfg.recorder,
+ DistSender: cfg.distSender,
+ RPCContext: cfg.rpcContext,
+ LeaseManager: leaseMgr,
+ TenantStatusServer: cfg.tenantStatusServer,
+ Clock: cfg.clock,
+ DistSQLSrv: distSQLServer,
+ NodesStatusServer: cfg.nodesStatusServer,
+ SQLStatusServer: cfg.sqlStatusServer,
+ SessionRegistry: cfg.sessionRegistry,
+ ClosedSessionCache: cfg.closedSessionCache,
+ ContentionRegistry: contentionRegistry,
+ SQLLiveness: cfg.sqlLivenessProvider,
+ JobRegistry: jobRegistry,
+ VirtualSchemas: virtualSchemas,
+ HistogramWindowInterval: cfg.HistogramWindowInterval(),
+ RangeDescriptorCache: cfg.distSender.RangeDescriptorCache(),
+ RoleMemberCache: sql.NewMembershipCache(
+ serverCacheMemoryMonitor.MakeBoundAccount(), cfg.stopper,
+ ),
+ SessionInitCache: sessioninit.NewCache(
+ serverCacheMemoryMonitor.MakeBoundAccount(), cfg.stopper,
+ ),
+ ClientCertExpirationCache: security.NewClientCertExpirationCache(
+ ctx, cfg.Settings, cfg.stopper, &timeutil.DefaultTimeSource{}, rootSQLMemoryMonitor,
+ ),
RootMemoryMonitor: rootSQLMemoryMonitor,
TestingKnobs: sqlExecutorTestingKnobs,
CompactEngineSpanFunc: storageEngineClient.CompactEngineSpan,
diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go
index c1dcbbd3e084..6933edf10380 100644
--- a/pkg/sql/exec_util.go
+++ b/pkg/sql/exec_util.go
@@ -49,6 +49,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/obs"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/rpc"
+ "github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/server/autoconfig/acprovider"
"github.com/cockroachdb/cockroach/pkg/server/pgurl"
@@ -1309,6 +1310,9 @@ type ExecutorConfig struct {
// Role membership cache.
RoleMemberCache *MembershipCache
+ // Client cert expiration cache.
+ ClientCertExpirationCache *security.ClientCertExpirationCache
+
// SessionInitCache cache; contains information used during authentication
// and per-role default settings.
SessionInitCache *sessioninit.Cache
diff --git a/pkg/sql/pgwire/auth_methods.go b/pkg/sql/pgwire/auth_methods.go
index 4065737bd05d..b18f2e25aeb1 100644
--- a/pkg/sql/pgwire/auth_methods.go
+++ b/pkg/sql/pgwire/auth_methods.go
@@ -427,7 +427,19 @@ func authCert(
tlsState.PeerCertificates[0].Subject.CommonName = tree.Name(
tlsState.PeerCertificates[0].Subject.CommonName,
).Normalize()
- hook, err := security.UserAuthCertHook(false /*insecure*/, &tlsState, execCfg.RPCContext.TenantID)
+
+ cm, err := execCfg.RPCContext.SecurityContext.GetCertificateManager()
+ if err != nil {
+ log.Ops.Warningf(ctx, "failed to get cert manager info: %v", err)
+ }
+
+ hook, err := security.UserAuthCertHook(
+ false, /*insecure*/
+ &tlsState,
+ execCfg.RPCContext.TenantID,
+ cm,
+ execCfg.ClientCertExpirationCache,
+ )
if err != nil {
return err
}