From 7166af0b72c75907fa7360a991888191d39b41aa Mon Sep 17 00:00:00 2001 From: Tim Brooks Date: Mon, 20 Mar 2023 09:49:53 -0600 Subject: [PATCH] Mute connection fail log during test cluster stop (#94523) Currently a node connection failure is logged at the warn level every six failures. While stopping a test cluster this often fills the logs with many noisy logs. This is particularly true for large internal test clusters. This commit temporarily raises the NodeConnectionsService log to error during the cluster stop. --- .../elasticsearch/cluster/NodeConnectionsService.java | 4 +++- .../java/org/elasticsearch/test/InternalTestCluster.java | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java index 8809a883bd978..198c964ee2695 100644 --- a/server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java @@ -307,7 +307,9 @@ public void onResponse(Releasable connectionReleasable) { @Override public void onFailure(Exception e) { final int currentFailureCount = consecutiveFailureCount.incrementAndGet(); - // only warn every 6th failure + // Only warn every 6th failure. We work around this log while stopping integ test clusters in InternalTestCluster#close + // by temporarily raising the log level to ERROR. If the nature of this log changes in the future, that workaround might + // need to be adjusted. final Level level = currentFailureCount % 6 == 1 ? Level.WARN : Level.DEBUG; logger.log(level, () -> format("failed to connect to %s (tried [%s] times)", discoveryNode, currentFailureCount), e); setConnectionRef(null); diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index d89c3c8bf54cf..447559137b5ba 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -13,6 +13,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; import com.carrotsearch.randomizedtesting.generators.RandomStrings; +import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.store.AlreadyClosedException; @@ -28,6 +29,7 @@ import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.NodeConnectionsService; import org.elasticsearch.cluster.action.index.MappingUpdatedAction; import org.elasticsearch.cluster.coordination.ClusterBootstrapService; import org.elasticsearch.cluster.coordination.NoMasterBlockService; @@ -49,6 +51,7 @@ import org.elasticsearch.common.component.LifecycleListener; import org.elasticsearch.common.io.FileSystemUtils; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.MockSecureSettings; import org.elasticsearch.common.settings.SecureSettings; import org.elasticsearch.common.settings.Settings; @@ -898,10 +901,16 @@ public synchronized void close() throws IOException { activeDisruptionScheme.testClusterClosed(); activeDisruptionScheme = null; } + // There is a WARN level log in NodeConnectionsService which logs when a node connection attempt fails. As we stop nodes one by + // one, this logs a bunch of noise. Temporarily raise the log level to ERROR while stopping cluster. + Logger nodeConnectionLogger = LogManager.getLogger(NodeConnectionsService.class); + Level initialLogLevel = nodeConnectionLogger.getLevel(); + Loggers.setLevel(nodeConnectionLogger, Level.ERROR); try { IOUtils.close(nodes.values()); } finally { nodes = Collections.emptyNavigableMap(); + Loggers.setLevel(nodeConnectionLogger, initialLogLevel); executor.shutdownNow(); } }