From 027f2105451d13cc5a89b6d6a092ffb2a6e72544 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Fri, 8 Mar 2019 12:53:27 -0500 Subject: [PATCH] Treat TransportService stopped error as node is closing (#39800) If TransportService is stopped before a shard-failure request is sent but after the request is registered, TransportService will notify ReplicationOperation a TransportException with an error message: "transport stop, action: internal:cluster/shard/failure". Relates #39584 --- .../action/support/replication/ReplicationOperation.java | 4 +++- .../support/replication/ReplicationOperationTests.java | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java b/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java index c232404d5fe5d..7fdb613c38bbf 100644 --- a/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java +++ b/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java @@ -206,7 +206,9 @@ public String toString() { private void onNoLongerPrimary(Exception failure) { final Throwable cause = ExceptionsHelper.unwrapCause(failure); final boolean nodeIsClosing = cause instanceof NodeClosedException - || (cause instanceof TransportException && "TransportService is closed stopped can't send request".equals(cause.getMessage())); + || (cause instanceof TransportException && + ("TransportService is closed stopped can't send request".equals(cause.getMessage()) + || "transport stopped, action: internal:cluster/shard/failure".equals(cause.getMessage()))); final String message; if (nodeIsClosing) { message = String.format(Locale.ROOT, diff --git a/server/src/test/java/org/elasticsearch/action/support/replication/ReplicationOperationTests.java b/server/src/test/java/org/elasticsearch/action/support/replication/ReplicationOperationTests.java index 8adb9c2f26b1a..02988e7981a29 100644 --- a/server/src/test/java/org/elasticsearch/action/support/replication/ReplicationOperationTests.java +++ b/server/src/test/java/org/elasticsearch/action/support/replication/ReplicationOperationTests.java @@ -205,8 +205,11 @@ public void testNoLongerPrimary() throws Exception { shardActionFailure = new NodeClosedException(new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT)); } else if (randomBoolean()) { shardActionFailure = new SendRequestTransportException( - new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT), "internal:cluster/shard/failure", + new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT), ShardStateAction.SHARD_FAILED_ACTION_NAME, new TransportException("TransportService is closed stopped can't send request")); + } else if (randomBoolean()) { + shardActionFailure = new TransportException( + "transport stopped, action: " + ShardStateAction.SHARD_FAILED_ACTION_NAME); } else { shardActionFailure = new ShardStateAction.NoLongerPrimaryShardException(failedReplica.shardId(), "the king is dead"); }