Skip to content

Commit

Permalink
Merge #118886
Browse files Browse the repository at this point in the history
118886: kv: setting to disable expiration leases for high replica counts r=erikgrinaker a=andrewbaptist

This change adds a new setting to disable expiration leases if there are too many replicas on a node. This prevents existing systems from having a performance hit once expiration leases are disabled while providing the benefit to a majority of existing systems.

Epic: CRDB-34218

Release note: None

Co-authored-by: Andrew Baptist <[email protected]>
  • Loading branch information
craig[bot] and andrewbaptist committed Feb 22, 2024
2 parents e29641b + b3208e8 commit 04f0416
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 5 deletions.
35 changes: 31 additions & 4 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,34 @@ var TransferExpirationLeasesFirstEnabled = settings.RegisterBoolSetting(
var ExpirationLeasesOnly = settings.RegisterBoolSetting(
settings.SystemOnly,
"kv.expiration_leases_only.enabled",
"only use expiration-based leases, never epoch-based ones (experimental, affects performance)",
"only use expiration-based leases never epoch-based ones "+
"when there are less than kv.lease.expiration_max_replicas_per_node on the node, "+
"(experimental, affects performance)",
// false by default. Metamorphically enabled in tests, but not in deadlock
// builds because TestClusters are usually so slow that they're unable
// to maintain leases/leadership/liveness.
!syncutil.DeadlockEnabled &&
util.ConstantWithMetamorphicTestBool("kv.expiration_leases_only.enabled", false),
)

// ExpirationLeasesMaxReplicasPerNode converts from expiration back to epoch
// leases if there are too many replicas on a node. Expiration leases are more
// expensive to maintain than epoch leases, so they are only used on clusters
// with a small number of replicas per node. We chose a conservative maximum of
// 3000 replicas per node, but this maximum will increase as we decrease the
// cost of expiration based leases. Note that the maximum is for all stores on
// the node in a multi-store configuration. The decisions is node-local so in
// some clusters there can be a mix of some nodes using expiration leases and
// others using epoch leases. A mixed state is a valid state to be in, however
// it doesn't bring the benefits of expiration based leases, and it does incur
// the additional costs.
var ExpirationLeasesMaxReplicasPerNode = settings.RegisterIntSetting(
settings.SystemOnly,
"kv.lease.expiration_max_replicas_per_node",
"maximum number of replicas a node can have before expiration leases are disabled (0 disables this setting)",
0,
)

// DisableExpirationLeasesOnly is an escape hatch for ExpirationLeasesOnly,
// which can be used to hard-disable expiration-based leases e.g. if clusters
// are unable to start back up due to the lease extension load.
Expand Down Expand Up @@ -847,10 +867,17 @@ func (r *Replica) requiresExpirationLeaseRLocked() bool {

// shouldUseExpirationLeaseRLocked returns true if this range should be using an
// expiration-based lease, either because it requires one or because
// kv.expiration_leases_only.enabled is enabled.
// kv.expiration_leases_only.enabled is enabled and the number of ranges
// (replicas) per node is fewer than kv.expiration_leases.max_replicas_per_node"
func (r *Replica) shouldUseExpirationLeaseRLocked() bool {
return (ExpirationLeasesOnly.Get(&r.ClusterSettings().SV) && !DisableExpirationLeasesOnly) ||
r.requiresExpirationLeaseRLocked()
settingEnabled := ExpirationLeasesOnly.Get(&r.ClusterSettings().SV) && !DisableExpirationLeasesOnly
maxAllowedReplicas := ExpirationLeasesMaxReplicasPerNode.Get(&r.ClusterSettings().SV)
// Disable the setting if there are too many replicas.
if settingEnabled && maxAllowedReplicas > 0 && r.store.getNodeRangeCount() > maxAllowedReplicas {
settingEnabled = false
}

return settingEnabled || r.requiresExpirationLeaseRLocked()
}

// requestLeaseLocked executes a request to obtain or extend a lease
Expand Down
12 changes: 12 additions & 0 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ func testStoreConfig(clock *hlc.Clock, version roachpb.Version) StoreConfig {
ProtectedTimestampReader: spanconfig.EmptyProtectedTSReader(clock),
SnapshotSendLimit: DefaultSnapshotSendLimit,
SnapshotApplyLimit: DefaultSnapshotApplyLimit,
RangeCount: &atomic.Int64{},

// Use a constant empty system config, which mirrors the previously
// existing logic to install an empty system config in gossip.
Expand Down Expand Up @@ -1099,6 +1100,7 @@ var _ OutgoingRaftMessageHandler = &Store{}
// required to create a store.
// All fields holding a pointer or an interface are required to create
// a store; the rest will have sane defaults set if omitted.
// TODO(baptist): Split into StoreConfig (immutable) and NodeState (mutable).
type StoreConfig struct {
AmbientCtx log.AmbientContext
base.RaftConfig
Expand Down Expand Up @@ -1261,6 +1263,10 @@ type StoreConfig struct {
// RangeFeedSchedulerShardSize specifies the maximum number of workers per
// scheduler shard.
RangeFeedSchedulerShardSize int

// RangeCount is populated by the node and represents the total number of
// ranges this node has.
RangeCount *atomic.Int64
}

// logRangeAndNodeEventsEnabled is used to enable or disable logging range events
Expand Down Expand Up @@ -3792,6 +3798,12 @@ func (s *Store) getRangefeedScheduler() *rangefeed.Scheduler {
return s.rangefeedScheduler
}

// getNodeRangeCount returns the number of total ranges on this node. The value
// is cached and updated every few seconds by Node.computeMetricsPeriodically.
func (s *Store) getNodeRangeCount() int64 {
return s.cfg.RangeCount.Load()
}

// Implementation of the storeForTruncator interface.
type storeForTruncatorImpl Store

Expand Down
16 changes: 15 additions & 1 deletion pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1009,12 +1009,24 @@ func (n *Node) startComputePeriodicMetrics(stopper *stop.Stopper, interval time.
})
}

// updateNodeRangeCount updates the internal counter of the total ranges across
// all stores. This value is used to make a decision on whether the node should
// use expiration leases (see Replica.shouldUseExpirationLeaseRLocked).
func (n *Node) updateNodeRangeCount() {
var count int64
_ = n.stores.VisitStores(func(store *kvserver.Store) error {
count += store.Metrics().RangeCount.Value()
return nil
})
n.storeCfg.RangeCount.Store(count)
}

// computeMetricsPeriodically instructs each store to compute the value of
// complicated metrics.
func (n *Node) computeMetricsPeriodically(
ctx context.Context, storeToMetrics map[*kvserver.Store]*storage.MetricsForInterval, tick int,
) error {
return n.stores.VisitStores(func(store *kvserver.Store) error {
err := n.stores.VisitStores(func(store *kvserver.Store) error {
if newMetrics, err := store.ComputeMetricsPeriodically(ctx, storeToMetrics[store], tick); err != nil {
log.Warningf(ctx, "%s: unable to compute metrics: %s", store, err)
} else {
Expand All @@ -1031,6 +1043,8 @@ func (n *Node) computeMetricsPeriodically(
}
return nil
})
n.updateNodeRangeCount()
return err
}

// UpdateIOThreshold relays the supplied IOThreshold to the same method on the
Expand Down
54 changes: 54 additions & 0 deletions pkg/server/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"reflect"
"runtime/pprof"
"sort"
"strconv"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
Expand Down Expand Up @@ -1081,3 +1082,56 @@ func TestDiskStatsMap(t *testing.T) {
require.Equal(t, expectedDS, ds)
}
}

// TestRevertToEpochIfTooManyRanges verifies that leases switch from epoch back
// to expiration after a short time interval if there are enough ranges on a node.
func TestRevertToEpochIfTooManyRanges(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

const expirationThreshold = 100
ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
// Use expiration leases by default, but decrease the limit for the test to
// avoid having to create too many splits.
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, true)
kvserver.ExpirationLeasesMaxReplicasPerNode.Override(ctx, &st.SV, expirationThreshold)
s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{Settings: st})
defer s.Stopper().Stop(ctx)

// Create range and upreplicate.
key := roachpb.Key("a")
require.NoError(t, kvDB.AdminSplit(ctx, key, hlc.MaxTimestamp))

// Make sure the lease is an expiration lease.
lease, _, err := s.GetRangeLease(ctx, key, roachpb.QueryLocalNodeOnly)
require.NoError(t, err)
require.Equal(t, roachpb.LeaseExpiration, lease.Current().Type())

node := s.Node().(*Node)

// Force a metrics computation and check the current number of ranges. There
// are 68 ranges by default in 24.1.
require.NoError(t, node.computeMetricsPeriodically(ctx, map[*kvserver.Store]*storage.MetricsForInterval{}, 0))
num := node.storeCfg.RangeCount.Load()
require.Greaterf(t, num, int64(50), "Expected more than 50 ranges, only found %d", num)

// Add 50 more ranges to push over the 100 replica expiration limit.
for i := 0; i < 50; i++ {
require.NoError(t, kvDB.AdminSplit(ctx, roachpb.Key("a"+strconv.Itoa(i)), hlc.MaxTimestamp))
}
// Check metrics again. This has the impact of updating the RangeCount.
require.NoError(t, node.computeMetricsPeriodically(ctx, map[*kvserver.Store]*storage.MetricsForInterval{}, 0))
num = node.storeCfg.RangeCount.Load()
require.Greaterf(t, num, int64(expirationThreshold), "Expected more than 100 ranges, only found %d", num)

// Verify the lease switched back to Epoch automatically.
testutils.SucceedsSoon(t, func() error {
lease, _, err = s.GetRangeLease(ctx, key, roachpb.QueryLocalNodeOnly)
require.NoError(t, err)
if lease.Current().Type() != roachpb.LeaseEpoch {
return errors.New("Lease is still expiration")
}
return nil
})
}
2 changes: 2 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"reflect"
"strconv"
"sync"
"sync/atomic"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
Expand Down Expand Up @@ -867,6 +868,7 @@ func NewServer(cfg Config, stopper *stop.Stopper) (serverctl.ServerStartupInterf
KVFlowHandles: admissionControl.storesFlowControl,
KVFlowHandleMetrics: admissionControl.kvFlowHandleMetrics,
SchedulerLatencyListener: admissionControl.schedulerLatencyListener,
RangeCount: &atomic.Int64{},
}
if storeTestingKnobs := cfg.TestingKnobs.Store; storeTestingKnobs != nil {
storeCfg.TestingKnobs = *storeTestingKnobs.(*kvserver.StoreTestingKnobs)
Expand Down

0 comments on commit 04f0416

Please sign in to comment.