cockroachdb
Showing with 206 additions and 171 deletions.

+102 −71 pkg/util/admission/granter.go

+104 −100 pkg/util/admission/testdata/granter
diff --git a/pkg/util/admission/granter.go b/pkg/util/admission/granter.go
@@ -458,29 +458,59 @@ func (tg *tokenGranter) continueGrantChain(grantChainID grantChainID) {
 // per-store write admission control (see StoreGrantCoordinators).
 //
 //
+
 // For the cpu-bound slot case we have background activities (like Pebble
 // compactions) that would like to utilize additional slots if available (e.g.
 // to do concurrent compression of ssblocks). These activities do not want to
 // wait for a slot, since they can proceed without the slot at their usual
-// slower pace. They also are performance sensitive, and can't afford to
-// interact with admission control at a fine granularity (like asking for a
-// slot when compressing each ssblock). A coarse granularity interaction
-// causes a delay in returning slots to admission control, and we don't want
-// that delay to cause admission delay for normal work. Hence, we model slots
-// granted to background activities as "soft-slots". Granting a soft-slot has
-// to conform to usedSoftSlots+usedSlots <= totalSlots. Granting a regular
-// slot only has to conform to usedSlots <= totalSlots. That is, soft-slots
-// allow for over-commitment until the soft-slots are returned, which may mean
-// some additional queueing in the goroutine scheduler.
+// slower pace (e.g. without doing concurrent compression). They also are
+// sensitive to small overheads in their tight loops, and cannot afford the
+// overhead of interacting with admission control at a fine granularity (like
+// asking for a slot when compressing each ssblock). A coarse granularity
+// interaction causes a delay in returning slots to admission control, and we
+// don't want that delay to cause admission delay for normal work. Hence, we
+// model slots granted to background activities as "soft-slots". Think of
+// regular used slots as "hard-slots", in that we assume that the holder of
+// the slot is still "using" it, while a soft-slot is "squishy" and in some
+// cases we can pretend that it is not being used. Say we are allowed
+// to allocate up to M slots. In this scheme, when allocating a soft-slot
+// one must conform to usedSoftSlots+usedSlots <= M, and when allocating
+// a regular (hard) slot one must conform to usedSlots <= M.
+//
+// That is, soft-slots allow for over-commitment until the soft-slots are
+// returned, which may mean some additional queueing in the goroutine
+// scheduler.
+//
+// We have another wrinkle in that we do not want to maintain a single M. For
+// these optional background activities we desire to do them only when the
+// load is low enough. This is because at high load, all work suffers from
+// additional queueing in the goroutine scheduler. So we want to make sure
+// regular work does not suffer such goroutine scheduler queueing because we
+// granted too many soft-slots and caused CPU utilization to be high. So we
+// maintain two kinds of M, totalHighLoadSlots and totalModerateLoadSlots.
+// totalHighLoadSlots are estimated so as to allow CPU utilization to be high,
+// while totalModerateLoadSlots are trying to keep queuing in the goroutine
+// scheduler to a lower level. So the revised equations for allocation are:
+// - Allocating a soft-slot: usedSoftSlots+usedSlots <= totalModerateLoadSlots
+// - Allocating a regular slot: usedSlots <= totalHighLoadSlots
+//
+// NB: we may in the future add other kinds of background activities that do
+// not have a lag in interacting with admission control, but want to schedule
+// them only under moderate load. Those activities will be counted in
+// usedSlots but when granting a slot to such an activity, the equation will
+// be usedSoftSlots+usedSlots <= totalModerateLoadSlots.
+//
+// That is, let us not confuse that moderate load slot allocation is only for
+// soft-slots. Soft-slots are introduced only for squishiness.
 type kvGranter struct {
-	coord               *GrantCoordinator
-	requester           requester
-	usedSlots           int
-	usedSoftSlots       int
-	totalSlots          int
-	totalSoftSlots      int
-	skipSlotEnforcement bool
-	failedSoftSlotsGet  bool
+	coord                  *GrantCoordinator
+	requester              requester
+	usedSlots              int
+	usedSoftSlots          int
+	totalHighLoadSlots     int
+	totalModerateLoadSlots int
+	skipSlotEnforcement    bool
+	failedSoftSlotsGet     bool
 
 	ioTokensEnabled bool
 	// There is no rate limiting in granting these tokens. That is, they are all
@@ -510,7 +540,7 @@ func (sg *kvGranter) tryGet() bool {
 }
 
 func (sg *kvGranter) tryGetLocked() grantResult {
-	if sg.usedSlots < sg.totalSlots || sg.skipSlotEnforcement {
+	if sg.usedSlots < sg.totalHighLoadSlots || sg.skipSlotEnforcement {
 		if !sg.ioTokensEnabled || sg.availableIOTokens > 0 {
 			sg.usedSlots++
 			if sg.usedSlotsMetric != nil {
@@ -582,18 +612,14 @@ func (sg *kvGranter) setAvailableIOTokensLocked(tokens int64) {
 func (sg *kvGranter) tryGetSoftSlots(count int) int {
 	sg.coord.mu.Lock()
 	defer sg.coord.mu.Unlock()
-	spareSoftSlots := sg.totalSoftSlots - sg.usedSoftSlots
-	spareSlots := sg.totalSlots - (sg.usedSlots + sg.usedSoftSlots)
-	if spareSlots < spareSoftSlots {
-		spareSoftSlots = spareSlots
-	}
-	if spareSoftSlots <= 0 {
+	spareModerateLoadSlots := sg.totalModerateLoadSlots - sg.usedSoftSlots - sg.usedSlots
+	if spareModerateLoadSlots <= 0 {
 		sg.failedSoftSlotsGet = true
 		return 0
 	}
 	allocatedSlots := count
-	if allocatedSlots > spareSoftSlots {
-		allocatedSlots = spareSoftSlots
+	if allocatedSlots > spareModerateLoadSlots {
+		allocatedSlots = spareModerateLoadSlots
 	}
 	sg.usedSoftSlots += allocatedSlots
 	return allocatedSlots
@@ -768,10 +794,10 @@ func NewGrantCoordinators(
 	}
 
 	kvg := &kvGranter{
-		coord:           coord,
-		totalSlots:      opts.MinCPUSlots,
-		totalSoftSlots:  opts.MinCPUSlots,
-		usedSlotsMetric: metrics.KVUsedSlots,
+		coord:                  coord,
+		totalHighLoadSlots:     opts.MinCPUSlots,
+		totalModerateLoadSlots: opts.MinCPUSlots,
+		usedSlotsMetric:        metrics.KVUsedSlots,
 	}
 	kvSlotAdjuster.granter = kvg
 	coord.queues[KVWork] = makeRequester(ambientCtx, KVWork, kvg, st, makeWorkQueueOptions(KVWork))
@@ -1229,8 +1255,8 @@ func (coord *GrantCoordinator) SafeFormat(s redact.SafePrinter, verb rune) {
 		switch kind {
 		case KVWork:
 			g := coord.granters[i].(*kvGranter)
-			s.Printf("%s%s: used: %d, total(soft): %d(%d)", curSep, workKindString(kind),
-				g.usedSlots, g.totalSlots, g.totalSoftSlots)
+			s.Printf("%s%s: used: %d, high(moderate)-total: %d(%d)", curSep, workKindString(kind),
+				g.usedSlots, g.totalHighLoadSlots, g.totalModerateLoadSlots)
 			if g.usedSoftSlots > 0 {
 				s.Printf(" used-soft: %d", g.usedSoftSlots)
 			}
@@ -1353,7 +1379,7 @@ func (sgc *StoreGrantCoordinators) initGrantCoordinator(storeID int32) *GrantCoo
 	kvg := &kvGranter{
 		coord: coord,
 		// Unlimited slots since not constrained by CPU.
-		totalSlots:                      math.MaxInt32,
+		totalHighLoadSlots:              math.MaxInt32,
 		ioTokensExhaustedDurationMetric: sgc.kvIOTokensExhaustedDuration,
 	}
 	opts := makeWorkQueueOptions(KVWork)
@@ -1457,7 +1483,7 @@ func (kvsa *kvSlotAdjuster) CPULoad(runnable int, procs int, _ time.Duration) {
 	// Simple heuristic, which worked ok in experiments. More sophisticated ones
 	// could be devised.
 	usedSlots := kvsa.granter.usedSlots + kvsa.granter.usedSoftSlots
-	tryDecreaseSlots := func(used int, total int) int {
+	tryDecreaseSlots := func(total int) int {
 		// Overload.
 		// If using some slots, and the used slots is less than the total slots,
 		// and total slots hasn't bottomed out at the min, decrease the total
@@ -1470,19 +1496,19 @@ func (kvsa *kvSlotAdjuster) CPULoad(runnable int, procs int, _ time.Duration) {
 		// so it is suggests that the drop in slots should not be causing cpu
 		// under-utilization, but one cannot be sure. Experiment with a smoothed
 		// signal or other ways to prevent a fast drop.
-		if used > 0 && total > kvsa.minCPUSlots && used <= total {
+		if usedSlots > 0 && total > kvsa.minCPUSlots && usedSlots <= total {
 			total--
 		}
 		return total
 	}
-	tryIncreaseSlots := func(used int, total int) int {
+	tryIncreaseSlots := func(total int) int {
 		// TODO: 0.8 is arbitrary.
 		closeToTotalSlots := int(float64(total) * 0.8)
 		// Underload.
 		// Used all its slots and can increase further, so additive increase. We
 		// also handle the case where the used slots are a bit less than total
 		// slots, since callers for soft slots don't block.
-		if (used >= total || (used >= closeToTotalSlots && kvsa.granter.failedSoftSlotsGet)) &&
+		if (usedSlots >= total || (usedSlots >= closeToTotalSlots && kvsa.granter.failedSoftSlotsGet)) &&
 			total < kvsa.maxCPUSlots {
 			// NB: If the workload is IO bound, the slot count here will keep
 			// incrementing until these slots are no longer the bottleneck for
@@ -1494,52 +1520,57 @@ func (kvsa *kvSlotAdjuster) CPULoad(runnable int, procs int, _ time.Duration) {
 		}
 		return total
 	}
-	// NB: usedSlots >= kvGranter.usedSoftSlots. Consider the following cases, when
-	// totalSlots == totalSoftSlots.
-	// - underload such that we are able to increase totalSoftSlots: in this
-	//   case we will also be able to increase totalSlots (since the used and
-	//   total comparisons gating the increase in tryIncreaseSlots will also be
-	//   true for totalSlots).
-	// - overload such that we are able to decrease totalSlots: in this case it
-	//   is possible that we don't decrease the totalSoftSlots because of the
-	//   used > 0 condition in tryDecreaseSlots.
-
-	// So the natural behavior of the slot adjustments does not guarantee
-	// totalSlots >= totalSoftSlots. We add logic to impose this on top of the
-	// natural adjustment.
-	//
+
 	// TODO: the fractions below are arbitrary and subject to tuning.
 	if runnable >= threshold*procs {
 		// Very overloaded.
-		kvsa.granter.totalSlots = tryDecreaseSlots(usedSlots, kvsa.granter.totalSlots)
-		kvsa.granter.totalSoftSlots = tryDecreaseSlots(
-			kvsa.granter.usedSoftSlots, kvsa.granter.totalSoftSlots)
+		kvsa.granter.totalHighLoadSlots = tryDecreaseSlots(kvsa.granter.totalHighLoadSlots)
+		kvsa.granter.totalModerateLoadSlots = tryDecreaseSlots(kvsa.granter.totalModerateLoadSlots)
 	} else if float64(runnable) <= float64((threshold*procs)/4) {
 		// Very underloaded.
-		kvsa.granter.totalSlots = tryIncreaseSlots(usedSlots, kvsa.granter.totalSlots)
-		kvsa.granter.totalSoftSlots = tryIncreaseSlots(
-			kvsa.granter.usedSoftSlots, kvsa.granter.totalSoftSlots)
+		kvsa.granter.totalHighLoadSlots = tryIncreaseSlots(kvsa.granter.totalHighLoadSlots)
+		kvsa.granter.totalModerateLoadSlots = tryIncreaseSlots(kvsa.granter.totalModerateLoadSlots)
 	} else if float64(runnable) <= float64((threshold*procs)/2) {
 		// Moderately underloaded -- can afford to increase regular slots.
-		kvsa.granter.totalSlots = tryIncreaseSlots(usedSlots, kvsa.granter.totalSlots)
+		kvsa.granter.totalHighLoadSlots = tryIncreaseSlots(kvsa.granter.totalHighLoadSlots)
 	} else if runnable >= 3*threshold*procs/4 {
-		// Moderately overloaded -- should decrease soft slots.
-		// NB: decreasing soft slots may not halt the runnable growth since the
-		// regular slot traffic may be high. Which means we will keep decreasing
-		// soft slots and undershoot. This is acceptable since soft slots are
-		// strictly best-effort.
-		kvsa.granter.totalSoftSlots = tryDecreaseSlots(
-			kvsa.granter.usedSoftSlots, kvsa.granter.totalSoftSlots)
-	}
-	if kvsa.granter.totalSlots < kvsa.granter.totalSoftSlots {
-		kvsa.granter.totalSoftSlots = kvsa.granter.totalSlots
+		// Moderately overloaded -- should decrease moderate load slots.
+		//
+		// NB: decreasing moderate load slots may not halt the runnable growth
+		// since the regular traffic may be high and can use up to the high load
+		// slots. When usedSlots>totalModerateLoadSlots, we won't actually
+		// decrease totalModerateLoadSlots (see the logic in tryDecreaseSlots).
+		// However, that doesn't mean that totalModerateLoadSlots is accurate.
+		// This inaccuracy is fine since we have chosen to be in a high load
+		// regime, since all the work we are doing is non-optional regular work
+		// (not background work).
+		//
+		// Where this will help is when what is pushing us over moderate load is
+		// optional background work, so by decreasing totalModerateLoadSlots we will
+		// contain the load due to that work.
+		kvsa.granter.totalModerateLoadSlots = tryDecreaseSlots(kvsa.granter.totalModerateLoadSlots)
+	}
+	// Consider the following cases, when we started this method with
+	// totalHighLoadSlots==totalModerateLoadSlots.
+	// - underload such that we are able to increase totalModerateLoadSlots: in
+	//   this case we will also be able to increase totalHighLoadSlots (since
+	//   the used and total comparisons gating the increase in tryIncreaseSlots
+	//   will also be true for totalHighLoadSlots).
+	// - overload such that we are able to decrease totalHighLoadSlots: in this
+	//   case the logic in tryDecreaseSlots will also be able to decrease
+	//   totalModerateLoadSlots.
+	// So the natural behavior of the slot adjustment itself guarantees
+	// totalHighLoadSlots >= totalModerateLoadSlots. But as a defensive measure
+	// we clamp totalModerateLoadSlots to not exceed totalHighLoadSlots.
+	if kvsa.granter.totalHighLoadSlots < kvsa.granter.totalModerateLoadSlots {
+		kvsa.granter.totalModerateLoadSlots = kvsa.granter.totalHighLoadSlots
 	}
 	kvsa.granter.failedSoftSlotsGet = false
-	kvsa.totalSlotsMetric.Update(int64(kvsa.granter.totalSlots))
+	kvsa.totalSlotsMetric.Update(int64(kvsa.granter.totalHighLoadSlots))
 }
 
 func (kvsa *kvSlotAdjuster) isOverloaded() bool {
-	return kvsa.granter.usedSlots >= kvsa.granter.totalSlots && !kvsa.granter.skipSlotEnforcement
+	return kvsa.granter.usedSlots >= kvsa.granter.totalHighLoadSlots && !kvsa.granter.skipSlotEnforcement
 }
 
 // sqlNodeCPUOverloadIndicator is the implementation of cpuOverloadIndicator