Skip to content

Commit

Permalink
Mute connection fail log during test cluster stop (elastic#94523)
Browse files Browse the repository at this point in the history
Currently a node connection failure is logged at the warn level every
six failures. While stopping a test cluster this often fills the logs
with many noisy logs. This is particularly true for large internal test
clusters. This commit temporarily raises the NodeConnectionsService log
to error during the cluster stop.
  • Loading branch information
Tim-Brooks authored Mar 20, 2023
1 parent b803cf9 commit 7166af0
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,9 @@ public void onResponse(Releasable connectionReleasable) {
@Override
public void onFailure(Exception e) {
final int currentFailureCount = consecutiveFailureCount.incrementAndGet();
// only warn every 6th failure
// Only warn every 6th failure. We work around this log while stopping integ test clusters in InternalTestCluster#close
// by temporarily raising the log level to ERROR. If the nature of this log changes in the future, that workaround might
// need to be adjusted.
final Level level = currentFailureCount % 6 == 1 ? Level.WARN : Level.DEBUG;
logger.log(level, () -> format("failed to connect to %s (tried [%s] times)", discoveryNode, currentFailureCount), e);
setConnectionRef(null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;

import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.AlreadyClosedException;
Expand All @@ -28,6 +29,7 @@
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.NodeConnectionsService;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.coordination.ClusterBootstrapService;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
Expand All @@ -49,6 +51,7 @@
import org.elasticsearch.common.component.LifecycleListener;
import org.elasticsearch.common.io.FileSystemUtils;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.MockSecureSettings;
import org.elasticsearch.common.settings.SecureSettings;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -898,10 +901,16 @@ public synchronized void close() throws IOException {
activeDisruptionScheme.testClusterClosed();
activeDisruptionScheme = null;
}
// There is a WARN level log in NodeConnectionsService which logs when a node connection attempt fails. As we stop nodes one by
// one, this logs a bunch of noise. Temporarily raise the log level to ERROR while stopping cluster.
Logger nodeConnectionLogger = LogManager.getLogger(NodeConnectionsService.class);
Level initialLogLevel = nodeConnectionLogger.getLevel();
Loggers.setLevel(nodeConnectionLogger, Level.ERROR);
try {
IOUtils.close(nodes.values());
} finally {
nodes = Collections.emptyNavigableMap();
Loggers.setLevel(nodeConnectionLogger, initialLogLevel);
executor.shutdownNow();
}
}
Expand Down

0 comments on commit 7166af0

Please sign in to comment.