Fork the sending of file chunks during recovery (#74164)

Today if sending file chunks is CPU-bound (e.g. when using compression) then we tend to concentrate all that work onto relatively few threads, even if `indices.recovery.max_concurrent_file_chunks` is increased. With this commit we fork the transmission of each chunk onto its own thread so that the CPU-bound work can happen in parallel.
elastic · Jun 16, 2021 · 2335be1 · 2335be1
1 parent 8e3399d
commit 2335be1
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 10 deletions.
diff --git a/docs/reference/modules/indices/recovery.asciidoc b/docs/reference/modules/indices/recovery.asciidoc
@@ -57,11 +57,12 @@ You can use the following _expert_ setting to manage resources for peer
 recoveries.
 
 `indices.recovery.max_concurrent_file_chunks`::
-(<<cluster-update-settings,Dynamic>>, Expert) Number of file chunk requests
-sent in parallel for each recovery. Defaults to `2`.
+(<<cluster-update-settings,Dynamic>>, Expert) Number of file chunks sent in
+parallel for each recovery. Defaults to `2`.
 +
 You can increase the value of this setting when the recovery of a single shard
-is not reaching the traffic limit set by `indices.recovery.max_bytes_per_sec`.
+is not reaching the traffic limit set by `indices.recovery.max_bytes_per_sec`,
+up to a maximum of `8`.
 
 `indices.recovery.max_concurrent_operations`::
 (<<cluster-update-settings,Dynamic>>, Expert) Number of operations sent

diff --git a/server/src/main/java/org/elasticsearch/indices/recovery/RecoverySettings.java b/server/src/main/java/org/elasticsearch/indices/recovery/RecoverySettings.java
@@ -80,7 +80,7 @@ public class RecoverySettings {
      * Controls the maximum number of file chunk requests that can be sent concurrently from the source node to the target node.
      */
     public static final Setting<Integer> INDICES_RECOVERY_MAX_CONCURRENT_FILE_CHUNKS_SETTING =
-        Setting.intSetting("indices.recovery.max_concurrent_file_chunks", 2, 1, 5, Property.Dynamic, Property.NodeScope);
+        Setting.intSetting("indices.recovery.max_concurrent_file_chunks", 2, 1, 8, Property.Dynamic, Property.NodeScope);
 
     /**
      * Controls the maximum number of operation chunk requests that can be sent concurrently from the source node to the target node.

diff --git a/server/src/main/java/org/elasticsearch/indices/recovery/RemoteRecoveryTargetHandler.java b/server/src/main/java/org/elasticsearch/indices/recovery/RemoteRecoveryTargetHandler.java
@@ -16,6 +16,7 @@
 import org.elasticsearch.Version;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.ActionListenerResponseHandler;
+import org.elasticsearch.action.ActionRunnable;
 import org.elasticsearch.action.support.RetryableAction;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.common.breaker.CircuitBreakingException;
@@ -202,12 +203,17 @@ public void writeFileChunk(StoreFileMetadata fileMetadata, long position, Releas
         final RecoveryFileChunkRequest request = new RecoveryFileChunkRequest(
             recoveryId, requestSeqNo, shardId, fileMetadata, position, content, lastChunk, totalTranslogOps, throttleTimeInNanos);
         final Writeable.Reader<TransportResponse.Empty> reader = in -> TransportResponse.Empty.INSTANCE;
-        executeRetryableAction(
-            action,
-            request,
-            fileChunkRequestOptions,
-            ActionListener.runBefore(listener.map(r -> null), request::decRef),
-            reader);
+
+        // Fork the actual sending onto a separate thread so we can send them concurrently even if CPU-bound (e.g. using compression).
+        // The AsyncIOProcessor and MultiFileWriter both concentrate their work onto fewer threads if possible, but once we have
+        // chunks to send we want to increase parallelism again.
+        threadPool.generic().execute(ActionRunnable.wrap(listener, l ->
+            executeRetryableAction(
+                action,
+                request,
+                fileChunkRequestOptions,
+                ActionListener.runBefore(l.map(r -> null), request::decRef),
+                reader)));
     }
 
     @Override