diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md
index a657ee394..bf27b5eca 100644
--- a/docs/ReleaseNotes.md
+++ b/docs/ReleaseNotes.md
@@ -8,8 +8,9 @@ Current package versions:
## Unreleased
-- Fix [#2593](https://github.com/StackExchange/StackExchange.Redis/pull/2593): `EXPIRETIME` and `PEXPIRETIME` miscategorized as `PrimaryOnly` commands causing them to fail when issued against a read-only replica ([#2593 by slorello89](https://github.com/StackExchange/StackExchange.Redis/pull/2593))
-- Fix [#2591](https://github.com/StackExchange/StackExchange.Redis/pull/2591): Add `HELLO` to Sentinel connections so they can support RESP3 ([#2601 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2601))
+- Fix [#2593](https://github.com/StackExchange/StackExchange.Redis/issues/2593): `EXPIRETIME` and `PEXPIRETIME` miscategorized as `PrimaryOnly` commands causing them to fail when issued against a read-only replica ([#2593 by slorello89](https://github.com/StackExchange/StackExchange.Redis/pull/2593))
+- Fix [#2591](https://github.com/StackExchange/StackExchange.Redis/issues/2591): Add `HELLO` to Sentinel connections so they can support RESP3 ([#2601 by NickCraver](https://github.com/StackExchange/StackExchange.Redis/pull/2601))
+- Fix [#2595](https://github.com/StackExchange/StackExchange.Redis/issues/2595): Add detection handling for dead sockets that the OS says are okay, seen especially in Linux environments (https://github.com/StackExchange/StackExchange.Redis/pull/2610)
## 2.7.4
diff --git a/src/StackExchange.Redis/PhysicalBridge.cs b/src/StackExchange.Redis/PhysicalBridge.cs
index 3a4494821..bf49e6e30 100644
--- a/src/StackExchange.Redis/PhysicalBridge.cs
+++ b/src/StackExchange.Redis/PhysicalBridge.cs
@@ -591,7 +591,7 @@ internal void OnHeartbeat(bool ifConnectedOnly)
Interlocked.Exchange(ref connectTimeoutRetryCount, 0);
tmp.BridgeCouldBeNull?.ServerEndPoint?.ClearUnselectable(UnselectableFlags.DidNotRespond);
}
- tmp.OnBridgeHeartbeat();
+ int timedOutThisHeartbeat = tmp.OnBridgeHeartbeat();
int writeEverySeconds = ServerEndPoint.WriteEverySeconds,
checkConfigSeconds = ServerEndPoint.ConfigCheckSeconds;
@@ -623,6 +623,17 @@ internal void OnHeartbeat(bool ifConnectedOnly)
// queue, test the socket
KeepAlive();
}
+ else if (timedOutThisHeartbeat > 0
+ && tmp.LastReadSecondsAgo * 1_000 > (tmp.BridgeCouldBeNull?.Multiplexer.AsyncTimeoutMilliseconds * 4))
+ {
+ // If we've received *NOTHING* on the pipe in 4 timeouts worth of time and we're timing out commands, issue a connection failure so that we reconnect
+ // This is meant to address the scenario we see often in Linux configs where TCP retries will happen for 15 minutes.
+ // To us as a client, we'll see the socket as green/open/fine when writing but we'll bet getting nothing back.
+ // Since we can't depend on the pipe to fail in that case, we want to error here based on the criteria above so we reconnect broken clients much faster.
+ tmp.BridgeCouldBeNull?.Multiplexer.Logger?.LogWarning($"Dead socket detected, no reads in {tmp.LastReadSecondsAgo} seconds with {timedOutThisHeartbeat} timeouts, issuing disconnect");
+ OnDisconnected(ConnectionFailureType.SocketFailure, tmp, out _, out State oldState);
+ tmp.Dispose(); // Cleanup the existing connection/socket if any, otherwise it will wait reading indefinitely
+ }
}
break;
case (int)State.Disconnected:
diff --git a/src/StackExchange.Redis/PhysicalConnection.cs b/src/StackExchange.Redis/PhysicalConnection.cs
index 22c9d2894..2ed2a1819 100644
--- a/src/StackExchange.Redis/PhysicalConnection.cs
+++ b/src/StackExchange.Redis/PhysicalConnection.cs
@@ -248,6 +248,7 @@ private enum ReadMode : byte
private readonly WeakReference _bridge;
public PhysicalBridge? BridgeCouldBeNull => (PhysicalBridge?)_bridge.Target;
+ public long LastReadSecondsAgo => unchecked(Environment.TickCount - Thread.VolatileRead(ref lastReadTickCount)) / 1000;
public long LastWriteSecondsAgo => unchecked(Environment.TickCount - Thread.VolatileRead(ref lastWriteTickCount)) / 1000;
private bool IncludeDetailInExceptions => BridgeCouldBeNull?.Multiplexer.RawConfig.IncludeDetailInExceptions ?? false;
@@ -720,8 +721,13 @@ internal void GetStormLog(StringBuilder sb)
}
}
- internal void OnBridgeHeartbeat()
+ ///
+ /// Runs on every heartbeat for a bridge, timing out any commands that are overdue and returning an integer of how many we timed out.
+ ///
+ /// How many commands were overdue and threw timeout exceptions.
+ internal int OnBridgeHeartbeat()
{
+ var result = 0;
var now = Environment.TickCount;
Interlocked.Exchange(ref lastBeatTickCount, now);
@@ -747,6 +753,7 @@ internal void OnBridgeHeartbeat()
multiplexer.OnMessageFaulted(msg, timeoutEx);
msg.SetExceptionAndComplete(timeoutEx, bridge); // tell the message that it is doomed
multiplexer.OnAsyncTimeout();
+ result++;
}
}
else
@@ -761,6 +768,7 @@ internal void OnBridgeHeartbeat()
}
}
}
+ return result;
}
internal void OnInternalError(Exception exception, [CallerMemberName] string? origin = null)