From 10d22ba78169f01a738bcfc48b1a5f6df2f59452 Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 2 Jul 2023 09:03:34 -0400 Subject: [PATCH 1/2] Exceptions: include Timer.ActiveCount in .NET Core stats We're seeing some instances of quite delayed timeouts and at least in two deeper investigations it was due to a huge number of timers firing and the backlog timeout assessment timer not triggering for an extended period of time. To help users diagnose this, adding the cheap counter to the .NET Core pool stats where Timer.ActiveCount is readily available. --- src/StackExchange.Redis/PerfCounterHelper.cs | 2 +- .../StackExchange.Redis.Tests/AbortOnConnectFailTests.cs | 8 ++++---- tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs | 7 ++++++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/StackExchange.Redis/PerfCounterHelper.cs b/src/StackExchange.Redis/PerfCounterHelper.cs index bcc1b5b0a..8d8b6fbb0 100644 --- a/src/StackExchange.Redis/PerfCounterHelper.cs +++ b/src/StackExchange.Redis/PerfCounterHelper.cs @@ -23,7 +23,7 @@ internal static int GetThreadPoolStats(out string iocp, out string worker, out s worker = $"(Busy={busyWorkerThreads},Free={freeWorkerThreads},Min={minWorkerThreads},Max={maxWorkerThreads})"; #if NETCOREAPP - workItems = $"(Threads={ThreadPool.ThreadCount},QueuedItems={ThreadPool.PendingWorkItemCount},CompletedItems={ThreadPool.CompletedWorkItemCount})"; + workItems = $"(Threads={ThreadPool.ThreadCount},QueuedItems={ThreadPool.PendingWorkItemCount},CompletedItems={ThreadPool.CompletedWorkItemCount},Timers={Timer.ActiveCount})"; #else workItems = null; #endif diff --git a/tests/StackExchange.Redis.Tests/AbortOnConnectFailTests.cs b/tests/StackExchange.Redis.Tests/AbortOnConnectFailTests.cs index 0ba8fca9e..3cccda4de 100644 --- a/tests/StackExchange.Redis.Tests/AbortOnConnectFailTests.cs +++ b/tests/StackExchange.Redis.Tests/AbortOnConnectFailTests.cs @@ -16,7 +16,7 @@ public void NeverEverConnectedNoBacklogThrowsConnectionNotAvailableSync() var db = conn.GetDatabase(); var key = Me(); - // No connection is active/available to service this operation: GET 6.0.14AbortOnConnectFailTests-NeverEverConnectedThrowsConnectionNotAvailable; UnableToConnect on doesnot.exist.0d034c26350e4ee199d6c5f385a073f7:6379/Interactive, Initializing/NotStarted, last: NONE, origin: BeginConnectAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 500s, state: Connecting, mgr: 10 of 10 available, last-heartbeat: never, global: 0s ago, v: 2.6.99.22667, mc: 1/1/0, mgr: 10 of 10 available, clientName: NAMISTOU-3(SE.Redis-v2.6.99.22667), IOCP: (Busy=0,Free=1000,Min=32,Max=1000), WORKER: (Busy=2,Free=32765,Min=32,Max=32767), POOL: (Threads=17,QueuedItems=0,CompletedItems=20), v: 2.6.99.22667 + // No connection is active/available to service this operation: GET 6.0.18AbortOnConnectFailTests-NeverEverConnectedNoBacklogThrowsConnectionNotAvailableSync; UnableToConnect on doesnot.exist.d4d1424806204b68b047954b1db3411d:6379/Interactive, Initializing/NotStarted, last: NONE, origin: BeginConnectAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 100s, state: Connecting, mgr: 4 of 10 available, last-heartbeat: never, global: 0s ago, v: 2.6.120.51136, mc: 1/1/0, mgr: 5 of 10 available, clientName: CRAVERTOP7(SE.Redis-v2.6.120.51136), IOCP: (Busy=0,Free=1000,Min=16,Max=1000), WORKER: (Busy=3,Free=32764,Min=16,Max=32767), POOL: (Threads=25,QueuedItems=0,CompletedItems=1066,Timers=46), v: 2.6.120.51136 var ex = Assert.Throws(() => db.StringGet(key)); Log("Exception: " + ex.Message); Assert.Contains("No connection is active/available to service this operation", ex.Message); @@ -29,7 +29,7 @@ public async Task NeverEverConnectedNoBacklogThrowsConnectionNotAvailableAsync() var db = conn.GetDatabase(); var key = Me(); - // No connection is active/available to service this operation: GET 6.0.14AbortOnConnectFailTests-NeverEverConnectedThrowsConnectionNotAvailable; UnableToConnect on doesnot.exist.0d034c26350e4ee199d6c5f385a073f7:6379/Interactive, Initializing/NotStarted, last: NONE, origin: BeginConnectAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 500s, state: Connecting, mgr: 10 of 10 available, last-heartbeat: never, global: 0s ago, v: 2.6.99.22667, mc: 1/1/0, mgr: 10 of 10 available, clientName: NAMISTOU-3(SE.Redis-v2.6.99.22667), IOCP: (Busy=0,Free=1000,Min=32,Max=1000), WORKER: (Busy=2,Free=32765,Min=32,Max=32767), POOL: (Threads=17,QueuedItems=0,CompletedItems=20), v: 2.6.99.22667 + // No connection is active/available to service this operation: GET 6.0.18AbortOnConnectFailTests-NeverEverConnectedNoBacklogThrowsConnectionNotAvailableSync; UnableToConnect on doesnot.exist.d4d1424806204b68b047954b1db3411d:6379/Interactive, Initializing/NotStarted, last: NONE, origin: BeginConnectAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 100s, state: Connecting, mgr: 4 of 10 available, last-heartbeat: never, global: 0s ago, v: 2.6.120.51136, mc: 1/1/0, mgr: 5 of 10 available, clientName: CRAVERTOP7(SE.Redis-v2.6.120.51136), IOCP: (Busy=0,Free=1000,Min=16,Max=1000), WORKER: (Busy=3,Free=32764,Min=16,Max=32767), POOL: (Threads=25,QueuedItems=0,CompletedItems=1066,Timers=46), v: 2.6.120.51136 var ex = await Assert.ThrowsAsync(() => db.StringGetAsync(key)); Log("Exception: " + ex.Message); Assert.Contains("No connection is active/available to service this operation", ex.Message); @@ -49,7 +49,7 @@ public void DisconnectAndReconnectThrowsConnectionExceptionSync() var server = conn.GetServerSnapshot()[0]; server.SimulateConnectionFailure(SimulatedFailureType.All); - // Exception: The message timed out in the backlog attempting to send because no connection became available (100ms) - Last Connection Exception: InternalFailure on 127.0.0.1:6379/Interactive, Initializing/NotStarted, last: GET, origin: ConnectedAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 500s, state: Connecting, mgr: 10 of 10 available, last-heartbeat: never, last-mbeat: 0s ago, global: 0s ago, v: 2.6.99.22667, command=PING, inst: 0, qu: 0, qs: 0, aw: False, bw: CheckingForTimeout, last-in: 0, cur-in: 0, sync-ops: 1, async-ops: 1, serverEndpoint: 127.0.0.1:6379, conn-sec: n/a, aoc: 0, mc: 1/1/0, mgr: 10 of 10 available, clientName: NAMISTOU-3(SE.Redis-v2.6.99.22667), IOCP: (Busy=0,Free=1000,Min=32,Max=1000), WORKER: (Busy=2,Free=32765,Min=32,Max=32767), POOL: (Threads=18,QueuedItems=0,CompletedItems=65), v: 2.6.99.22667 (Please take a look at this article for some common client-side issues that can cause timeouts: https://stackexchange.github.io/StackExchange.Redis/Timeouts) + // Exception: The message timed out in the backlog attempting to send because no connection became available (100ms) - Last Connection Exception: SocketFailure (InputReaderCompleted, last-recv: 7) on 127.0.0.1:6379/Interactive, Idle/ReadAsync, last: PING, origin: SimulateConnectionFailure, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 100s, state: ConnectedEstablished, mgr: 10 of 10 available, in: 0, in-pipe: 0, out-pipe: 0, last-heartbeat: never, last-mbeat: 0s ago, global: 0s ago, v: 2.6.120.51136, command=PING, timeout: 100, inst: 13, qu: 1, qs: 0, aw: False, bw: Inactive, last-in: 0, cur-in: 0, sync-ops: 2, async-ops: 0, serverEndpoint: 127.0.0.1:6379, conn-sec: n/a, aoc: 0, mc: 1/1/0, mgr: 10 of 10 available, clientName: CRAVERTOP7(SE.Redis-v2.6.120.51136), IOCP: (Busy=0,Free=1000,Min=16,Max=1000), WORKER: (Busy=2,Free=32765,Min=16,Max=32767), POOL: (Threads=33,QueuedItems=0,CompletedItems=6237,Timers=39), v: 2.6.120.51136 (Please take a look at this article for some common client-side issues that can cause timeouts: https://stackexchange.github.io/StackExchange.Redis/Timeouts) var ex = Assert.ThrowsAny(() => db.Ping()); Log("Exception: " + ex.Message); Assert.True(ex is RedisConnectionException or RedisTimeoutException); @@ -73,7 +73,7 @@ public async Task DisconnectAndNoReconnectThrowsConnectionExceptionAsync() var server = conn.GetServerSnapshot()[0]; server.SimulateConnectionFailure(SimulatedFailureType.All); - // Exception: The message timed out in the backlog attempting to send because no connection became available (100ms) - Last Connection Exception: InternalFailure on 127.0.0.1:6379/Interactive, Initializing/NotStarted, last: GET, origin: ConnectedAsync, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 500s, state: Connecting, mgr: 10 of 10 available, last-heartbeat: never, last-mbeat: 0s ago, global: 0s ago, v: 2.6.99.22667, command=PING, inst: 0, qu: 0, qs: 0, aw: False, bw: CheckingForTimeout, last-in: 0, cur-in: 0, sync-ops: 1, async-ops: 1, serverEndpoint: 127.0.0.1:6379, conn-sec: n/a, aoc: 0, mc: 1/1/0, mgr: 10 of 10 available, clientName: NAMISTOU-3(SE.Redis-v2.6.99.22667), IOCP: (Busy=0,Free=1000,Min=32,Max=1000), WORKER: (Busy=2,Free=32765,Min=32,Max=32767), POOL: (Threads=18,QueuedItems=0,CompletedItems=65), v: 2.6.99.22667 (Please take a look at this article for some common client-side issues that can cause timeouts: https://stackexchange.github.io/StackExchange.Redis/Timeouts) + // Exception: The message timed out in the backlog attempting to send because no connection became available (100ms) - Last Connection Exception: SocketFailure (InputReaderCompleted, last-recv: 7) on 127.0.0.1:6379/Interactive, Idle/ReadAsync, last: PING, origin: SimulateConnectionFailure, outstanding: 0, last-read: 0s ago, last-write: 0s ago, keep-alive: 100s, state: ConnectedEstablished, mgr: 8 of 10 available, in: 0, in-pipe: 0, out-pipe: 0, last-heartbeat: never, last-mbeat: 0s ago, global: 0s ago, v: 2.6.120.51136, command=PING, timeout: 100, inst: 0, qu: 0, qs: 0, aw: False, bw: CheckingForTimeout, last-in: 0, cur-in: 0, sync-ops: 1, async-ops: 1, serverEndpoint: 127.0.0.1:6379, conn-sec: n/a, aoc: 0, mc: 1/1/0, mgr: 8 of 10 available, clientName: CRAVERTOP7(SE.Redis-v2.6.120.51136), IOCP: (Busy=0,Free=1000,Min=16,Max=1000), WORKER: (Busy=6,Free=32761,Min=16,Max=32767), POOL: (Threads=33,QueuedItems=0,CompletedItems=5547,Timers=60), v: 2.6.120.51136 (Please take a look at this article for some common client-side issues that can cause timeouts: https://stackexchange.github.io/StackExchange.Redis/Timeouts) var ex = await Assert.ThrowsAsync(() => db.PingAsync()); Log("Exception: " + ex.Message); Assert.StartsWith("The message timed out in the backlog attempting to send because no connection became available (100ms) - Last Connection Exception: ", ex.Message); diff --git a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs index 42f444418..02309ec79 100644 --- a/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs +++ b/tests/StackExchange.Redis.Tests/ExceptionFactoryTests.cs @@ -126,7 +126,12 @@ public void TimeoutException() Assert.Contains("conn-sec: n/a", ex.Message); Assert.Contains("aoc: 1", ex.Message); #if NETCOREAPP - Assert.Contains("POOL: ", ex.Message); + // ...POOL: (Threads=33,QueuedItems=0,CompletedItems=5547,Timers=60)... + Assert.Contains("POOL: ", ex.Message); + Assert.Contains("Threads=", ex.Message); + Assert.Contains("QueuedItems=", ex.Message); + Assert.Contains("CompletedItems=", ex.Message); + Assert.Contains("Timers=", ex.Message); #endif Assert.DoesNotContain("Unspecified/", ex.Message); Assert.EndsWith(" (Please take a look at this article for some common client-side issues that can cause timeouts: https://stackexchange.github.io/StackExchange.Redis/Timeouts)", ex.Message); From a6aed4d6a5262d7ed7708ef51575dcea7a37193f Mon Sep 17 00:00:00 2001 From: Nick Craver Date: Sun, 2 Jul 2023 09:07:36 -0400 Subject: [PATCH 2/2] Add release notes --- docs/ReleaseNotes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index cf44a985d..99315cf2b 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -10,6 +10,7 @@ Current package versions: - Change: Target net6.0 instead of net5.0, since net5.0 is end of life. ([#2497 by eerhardt](https://github.com/StackExchange/StackExchange.Redis/pull/2497)) - Fix: Fix nullability annotation of IConnectionMultiplexer.RegisterProfiler ([#2494 by eerhardt](https://github.com/StackExchange/StackExchange.Redis/pull/2494)) +- Add: `Timer.ActiveCount` under `POOL` in timeout messages on .NET 6+ to help diagnose timer overload affecting timeout evaluations ([#2500 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2500)) ## 2.6.116