diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index a657ee394..bf27b5eca 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -8,8 +8,9 @@ Current package versions: ## Unreleased -- Fix [#2593](https://github.com/StackExchange/StackExchange.Redis/pull/2593): `EXPIRETIME` and `PEXPIRETIME` miscategorized as `PrimaryOnly` commands causing them to fail when issued against a read-only replica ([#2593 by slorello89](https://github.com/StackExchange/StackExchange.Redis/pull/2593)) -- Fix [#2591](https://github.com/StackExchange/StackExchange.Redis/pull/2591): Add `HELLO` to Sentinel connections so they can support RESP3 ([#2601 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2601)) +- Fix [#2593](https://github.com/StackExchange/StackExchange.Redis/issues/2593): `EXPIRETIME` and `PEXPIRETIME` miscategorized as `PrimaryOnly` commands causing them to fail when issued against a read-only replica ([#2593 by slorello89](https://github.com/StackExchange/StackExchange.Redis/pull/2593)) +- Fix [#2591](https://github.com/StackExchange/StackExchange.Redis/issues/2591): Add `HELLO` to Sentinel connections so they can support RESP3 ([#2601 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2601)) +- Fix [#2595](https://github.com/StackExchange/StackExchange.Redis/issues/2595): Add detection handling for dead sockets that the OS says are okay, seen especially in Linux environments (https://github.com/StackExchange/StackExchange.Redis/pull/2610) ## 2.7.4 diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs index 3a4494821..bf49e6e30 100644 --- a/src/StackExchange.Redis/PhysicalBridge.cs +++ b/src/StackExchange.Redis/PhysicalBridge.cs @@ -591,7 +591,7 @@ internal void OnHeartbeat(bool ifConnectedOnly) Interlocked.Exchange(ref connectTimeoutRetryCount, 0); tmp.BridgeCouldBeNull?.ServerEndPoint?.ClearUnselectable(UnselectableFlags.DidNotRespond); } - tmp.OnBridgeHeartbeat(); + int timedOutThisHeartbeat = tmp.OnBridgeHeartbeat(); int writeEverySeconds = ServerEndPoint.WriteEverySeconds, checkConfigSeconds = ServerEndPoint.ConfigCheckSeconds; @@ -623,6 +623,17 @@ internal void OnHeartbeat(bool ifConnectedOnly) // queue, test the socket KeepAlive(); } + else if (timedOutThisHeartbeat > 0 + && tmp.LastReadSecondsAgo * 1_000 > (tmp.BridgeCouldBeNull?.Multiplexer.AsyncTimeoutMilliseconds * 4)) + { + // If we've received *NOTHING* on the pipe in 4 timeouts worth of time and we're timing out commands, issue a connection failure so that we reconnect + // This is meant to address the scenario we see often in Linux configs where TCP retries will happen for 15 minutes. + // To us as a client, we'll see the socket as green/open/fine when writing but we'll bet getting nothing back. + // Since we can't depend on the pipe to fail in that case, we want to error here based on the criteria above so we reconnect broken clients much faster. + tmp.BridgeCouldBeNull?.Multiplexer.Logger?.LogWarning($"Dead socket detected, no reads in {tmp.LastReadSecondsAgo} seconds with {timedOutThisHeartbeat} timeouts, issuing disconnect"); + OnDisconnected(ConnectionFailureType.SocketFailure, tmp, out _, out State oldState); + tmp.Dispose(); // Cleanup the existing connection/socket if any, otherwise it will wait reading indefinitely + } } break; case (int)State.Disconnected: diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs index 22c9d2894..2ed2a1819 100644 --- a/src/StackExchange.Redis/PhysicalConnection.cs +++ b/src/StackExchange.Redis/PhysicalConnection.cs @@ -248,6 +248,7 @@ private enum ReadMode : byte private readonly WeakReference _bridge; public PhysicalBridge? BridgeCouldBeNull => (PhysicalBridge?)_bridge.Target; + public long LastReadSecondsAgo => unchecked(Environment.TickCount - Thread.VolatileRead(ref lastReadTickCount)) / 1000; public long LastWriteSecondsAgo => unchecked(Environment.TickCount - Thread.VolatileRead(ref lastWriteTickCount)) / 1000; private bool IncludeDetailInExceptions => BridgeCouldBeNull?.Multiplexer.RawConfig.IncludeDetailInExceptions ?? false; @@ -720,8 +721,13 @@ internal void GetStormLog(StringBuilder sb) } } - internal void OnBridgeHeartbeat() + /// + /// Runs on every heartbeat for a bridge, timing out any commands that are overdue and returning an integer of how many we timed out. + /// + /// How many commands were overdue and threw timeout exceptions. + internal int OnBridgeHeartbeat() { + var result = 0; var now = Environment.TickCount; Interlocked.Exchange(ref lastBeatTickCount, now); @@ -747,6 +753,7 @@ internal void OnBridgeHeartbeat() multiplexer.OnMessageFaulted(msg, timeoutEx); msg.SetExceptionAndComplete(timeoutEx, bridge); // tell the message that it is doomed multiplexer.OnAsyncTimeout(); + result++; } } else @@ -761,6 +768,7 @@ internal void OnBridgeHeartbeat() } } } + return result; } internal void OnInternalError(Exception exception, [CallerMemberName] string? origin = null)