diff --git a/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs b/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs index d25d54f26f3..5346e8d17bd 100644 --- a/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs +++ b/src/System.Private.CoreLib/shared/System/Threading/SpinWait.cs @@ -69,9 +69,26 @@ public struct SpinWait // numbers may seem fairly arbitrary, but were derived with at least some // thought in the design document. I fully expect they will need to change // over time as we gain more experience with performance. - internal const int YIELD_THRESHOLD = 10; // When to switch over to a true yield. - internal const int SLEEP_0_EVERY_HOW_MANY_TIMES = 5; // After how many yields should we Sleep(0)? - internal const int SLEEP_1_EVERY_HOW_MANY_TIMES = 20; // After how many yields should we Sleep(1)? + internal const int YieldThreshold = 10; // When to switch over to a true yield. + private const int Sleep0EveryHowManyYields = 5; // After how many yields should we Sleep(0)? + internal const int DefaultSleep1Threshold = 20; // After how many yields should we Sleep(1) frequently? + + /// + /// A suggested number of spin iterations before doing a proper wait, such as waiting on an event that becomes signaled + /// when the resource becomes available. + /// + /// + /// These numbers were arrived at by experimenting with different numbers in various cases that currently use it. It's + /// only a suggested value and typically works well when the proper wait is something like an event. + /// + /// Spinning less can lead to early waiting and more context switching, spinning more can decrease latency but may use + /// up some CPU time unnecessarily. Depends on the situation too, for instance SemaphoreSlim uses double this number + /// because the waiting there is currently a lot more expensive (involves more spinning, taking a lock, etc.). It also + /// depends on the likelihood of the spin being successful and how long the wait would be but those are not accounted + /// for here. + /// + internal static readonly int SpinCountforSpinBeforeWait = PlatformHelper.IsSingleProcessor ? 1 : 35; + internal const int Sleep1ThresholdForSpinBeforeWait = 40; // should be greater than SpinCountforSpinBeforeWait // The number of times we've spun already. private int _count; @@ -81,7 +98,12 @@ public struct SpinWait /// public int Count { - get { return _count; } + get => _count; + internal set + { + Debug.Assert(value >= 0); + _count = value; + } } /// @@ -94,10 +116,7 @@ public int Count /// On a single-CPU machine, always yields the processor. On machines with /// multiple CPUs, may yield after an unspecified number of calls. /// - public bool NextSpinWillYield - { - get { return _count > YIELD_THRESHOLD || PlatformHelper.IsSingleProcessor; } - } + public bool NextSpinWillYield => _count >= YieldThreshold || PlatformHelper.IsSingleProcessor; /// /// Performs a single spin. @@ -108,7 +127,27 @@ public bool NextSpinWillYield /// public void SpinOnce() { - if (NextSpinWillYield) + SpinOnce(DefaultSleep1Threshold); + } + + internal void SpinOnce(int sleep1Threshold) + { + Debug.Assert(sleep1Threshold >= YieldThreshold || PlatformHelper.IsSingleProcessor); // so that NextSpinWillYield behaves as requested + + // (_count - YieldThreshold) % 2 == 0: The purpose of this check is to interleave Thread.Yield/Sleep(0) with + // Thread.SpinWait. Otherwise, the following issues occur: + // - When there are no threads to switch to, Yield and Sleep(0) become no-op and it turns the spin loop into a + // busy-spin that may quickly reach the max spin count and cause the thread to enter a wait state, or may + // just busy-spin for longer than desired before a Sleep(1). Completing the spin loop too early can cause + // excessive context switcing if a wait follows, and entering the Sleep(1) stage too early can cause + // excessive delays. + // - If there are multiple threads doing Yield and Sleep(0) (typically from the same spin loop due to + // contention), they may switch between one another, delaying work that can make progress. + if (( + _count >= YieldThreshold && + (_count >= sleep1Threshold || (_count - YieldThreshold) % 2 == 0) + ) || + PlatformHelper.IsSingleProcessor) { // // We must yield. @@ -125,19 +164,21 @@ public void SpinOnce() // configured to use the (default) coarse-grained system timer. // - int yieldsSoFar = (_count >= YIELD_THRESHOLD ? _count - YIELD_THRESHOLD : _count); - - if ((yieldsSoFar % SLEEP_1_EVERY_HOW_MANY_TIMES) == (SLEEP_1_EVERY_HOW_MANY_TIMES - 1)) + if (_count >= sleep1Threshold) { RuntimeThread.Sleep(1); } - else if ((yieldsSoFar % SLEEP_0_EVERY_HOW_MANY_TIMES) == (SLEEP_0_EVERY_HOW_MANY_TIMES - 1)) - { - RuntimeThread.Sleep(0); - } else { - RuntimeThread.Yield(); + int yieldsSoFar = _count >= YieldThreshold ? (_count - YieldThreshold) / 2 : _count; + if ((yieldsSoFar % Sleep0EveryHowManyYields) == (Sleep0EveryHowManyYields - 1)) + { + RuntimeThread.Sleep(0); + } + else + { + RuntimeThread.Yield(); + } } } else @@ -153,11 +194,24 @@ public void SpinOnce() // number of spins we are willing to tolerate to reduce delay to the caller, // since we expect most callers will eventually block anyway. // - RuntimeThread.SpinWait(4 << _count); + // Also, cap the maximum spin count to a value such that many thousands of CPU cycles would not be wasted doing + // the equivalent of YieldProcessor(), as that that point SwitchToThread/Sleep(0) are more likely to be able to + // allow other useful work to run. Long YieldProcessor() loops can help to reduce contention, but Sleep(1) is + // usually better for that. + // + // RuntimeThread.OptimalMaxSpinWaitsPerSpinIteration: + // - See Thread::InitializeYieldProcessorNormalized(), which describes and calculates this value. + // + int n = RuntimeThread.OptimalMaxSpinWaitsPerSpinIteration; + if (_count <= 30 && (1 << _count) < n) + { + n = 1 << _count; + } + RuntimeThread.SpinWait(n); } // Finally, increment our spin counter. - _count = (_count == int.MaxValue ? YIELD_THRESHOLD : _count + 1); + _count = (_count == int.MaxValue ? YieldThreshold : _count + 1); } /// @@ -299,9 +353,7 @@ internal static int ProcessorCount /// /// Gets whether the current machine has only a single processor. /// - internal static bool IsSingleProcessor - { - get { return ProcessorCount == 1; } - } + /// This typically does not change on a machine, so it's checked only once. + internal static readonly bool IsSingleProcessor = ProcessorCount == 1; } }