diff --git a/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs b/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs
index d25d54f26f3..5346e8d17bd 100644
--- a/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs
+++ b/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs
@@ -69,9 +69,26 @@ public struct SpinWait
// numbers may seem fairly arbitrary, but were derived with at least some
// thought in the design document. I fully expect they will need to change
// over time as we gain more experience with performance.
- internal const int YIELD_THRESHOLD = 10; // When to switch over to a true yield.
- internal const int SLEEP_0_EVERY_HOW_MANY_TIMES = 5; // After how many yields should we Sleep(0)?
- internal const int SLEEP_1_EVERY_HOW_MANY_TIMES = 20; // After how many yields should we Sleep(1)?
+ internal const int YieldThreshold = 10; // When to switch over to a true yield.
+ private const int Sleep0EveryHowManyYields = 5; // After how many yields should we Sleep(0)?
+ internal const int DefaultSleep1Threshold = 20; // After how many yields should we Sleep(1) frequently?
+
+ ///
+ /// A suggested number of spin iterations before doing a proper wait, such as waiting on an event that becomes signaled
+ /// when the resource becomes available.
+ ///
+ ///
+ /// These numbers were arrived at by experimenting with different numbers in various cases that currently use it. It's
+ /// only a suggested value and typically works well when the proper wait is something like an event.
+ ///
+ /// Spinning less can lead to early waiting and more context switching, spinning more can decrease latency but may use
+ /// up some CPU time unnecessarily. Depends on the situation too, for instance SemaphoreSlim uses double this number
+ /// because the waiting there is currently a lot more expensive (involves more spinning, taking a lock, etc.). It also
+ /// depends on the likelihood of the spin being successful and how long the wait would be but those are not accounted
+ /// for here.
+ ///
+ internal static readonly int SpinCountforSpinBeforeWait = PlatformHelper.IsSingleProcessor ? 1 : 35;
+ internal const int Sleep1ThresholdForSpinBeforeWait = 40; // should be greater than SpinCountforSpinBeforeWait
// The number of times we've spun already.
private int _count;
@@ -81,7 +98,12 @@ public struct SpinWait
///
public int Count
{
- get { return _count; }
+ get => _count;
+ internal set
+ {
+ Debug.Assert(value >= 0);
+ _count = value;
+ }
}
///
@@ -94,10 +116,7 @@ public int Count
/// On a single-CPU machine, always yields the processor. On machines with
/// multiple CPUs, may yield after an unspecified number of calls.
///
- public bool NextSpinWillYield
- {
- get { return _count > YIELD_THRESHOLD || PlatformHelper.IsSingleProcessor; }
- }
+ public bool NextSpinWillYield => _count >= YieldThreshold || PlatformHelper.IsSingleProcessor;
///
/// Performs a single spin.
@@ -108,7 +127,27 @@ public bool NextSpinWillYield
///
public void SpinOnce()
{
- if (NextSpinWillYield)
+ SpinOnce(DefaultSleep1Threshold);
+ }
+
+ internal void SpinOnce(int sleep1Threshold)
+ {
+ Debug.Assert(sleep1Threshold >= YieldThreshold || PlatformHelper.IsSingleProcessor); // so that NextSpinWillYield behaves as requested
+
+ // (_count - YieldThreshold) % 2 == 0: The purpose of this check is to interleave Thread.Yield/Sleep(0) with
+ // Thread.SpinWait. Otherwise, the following issues occur:
+ // - When there are no threads to switch to, Yield and Sleep(0) become no-op and it turns the spin loop into a
+ // busy-spin that may quickly reach the max spin count and cause the thread to enter a wait state, or may
+ // just busy-spin for longer than desired before a Sleep(1). Completing the spin loop too early can cause
+ // excessive context switcing if a wait follows, and entering the Sleep(1) stage too early can cause
+ // excessive delays.
+ // - If there are multiple threads doing Yield and Sleep(0) (typically from the same spin loop due to
+ // contention), they may switch between one another, delaying work that can make progress.
+ if ((
+ _count >= YieldThreshold &&
+ (_count >= sleep1Threshold || (_count - YieldThreshold) % 2 == 0)
+ ) ||
+ PlatformHelper.IsSingleProcessor)
{
//
// We must yield.
@@ -125,19 +164,21 @@ public void SpinOnce()
// configured to use the (default) coarse-grained system timer.
//
- int yieldsSoFar = (_count >= YIELD_THRESHOLD ? _count - YIELD_THRESHOLD : _count);
-
- if ((yieldsSoFar % SLEEP_1_EVERY_HOW_MANY_TIMES) == (SLEEP_1_EVERY_HOW_MANY_TIMES - 1))
+ if (_count >= sleep1Threshold)
{
RuntimeThread.Sleep(1);
}
- else if ((yieldsSoFar % SLEEP_0_EVERY_HOW_MANY_TIMES) == (SLEEP_0_EVERY_HOW_MANY_TIMES - 1))
- {
- RuntimeThread.Sleep(0);
- }
else
{
- RuntimeThread.Yield();
+ int yieldsSoFar = _count >= YieldThreshold ? (_count - YieldThreshold) / 2 : _count;
+ if ((yieldsSoFar % Sleep0EveryHowManyYields) == (Sleep0EveryHowManyYields - 1))
+ {
+ RuntimeThread.Sleep(0);
+ }
+ else
+ {
+ RuntimeThread.Yield();
+ }
}
}
else
@@ -153,11 +194,24 @@ public void SpinOnce()
// number of spins we are willing to tolerate to reduce delay to the caller,
// since we expect most callers will eventually block anyway.
//
- RuntimeThread.SpinWait(4 << _count);
+ // Also, cap the maximum spin count to a value such that many thousands of CPU cycles would not be wasted doing
+ // the equivalent of YieldProcessor(), as that that point SwitchToThread/Sleep(0) are more likely to be able to
+ // allow other useful work to run. Long YieldProcessor() loops can help to reduce contention, but Sleep(1) is
+ // usually better for that.
+ //
+ // RuntimeThread.OptimalMaxSpinWaitsPerSpinIteration:
+ // - See Thread::InitializeYieldProcessorNormalized(), which describes and calculates this value.
+ //
+ int n = RuntimeThread.OptimalMaxSpinWaitsPerSpinIteration;
+ if (_count <= 30 && (1 << _count) < n)
+ {
+ n = 1 << _count;
+ }
+ RuntimeThread.SpinWait(n);
}
// Finally, increment our spin counter.
- _count = (_count == int.MaxValue ? YIELD_THRESHOLD : _count + 1);
+ _count = (_count == int.MaxValue ? YieldThreshold : _count + 1);
}
///
@@ -299,9 +353,7 @@ internal static int ProcessorCount
///
/// Gets whether the current machine has only a single processor.
///
- internal static bool IsSingleProcessor
- {
- get { return ProcessorCount == 1; }
- }
+ /// This typically does not change on a machine, so it's checked only once.
+ internal static readonly bool IsSingleProcessor = ProcessorCount == 1;
}
}