From d371a3dd31bc62fde11c7a9d9c3ff0124cd8a696 Mon Sep 17 00:00:00 2001
From: "opensearch-trigger-bot[bot]"
 <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com>
Date: Fri, 22 Mar 2024 00:29:14 +0530
Subject: [PATCH] Created new ReplicaShardBatchAllocator (#8992) (#12814)

* Created new ReplicaShardBatchAllocator to be used instead of ReplicaShardAllocator for batch calls


(cherry picked from commit 4dbd6fa6c35f7a3d0c5e2ee284c2f8b3cc829a32)

Signed-off-by: Shivansh Arora <shivansh.arora@protonmail.com>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 .../gateway/ReplicaShardBatchAllocator.java   | 188 ++++
 .../ReplicaShardBatchAllocatorTests.java      | 849 ++++++++++++++++++
 2 files changed, 1037 insertions(+)
 create mode 100644 server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java
 create mode 100644 server/src/test/java/org/opensearch/gateway/ReplicaShardBatchAllocatorTests.java
diff --git a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java
new file mode 100644
index 0000000000000..3459f1591b633
--- /dev/null
+++ b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java
@@ -0,0 +1,188 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway;
+
+import org.apache.logging.log4j.Logger;
+import org.opensearch.cluster.node.DiscoveryNode;
+import org.opensearch.cluster.routing.ShardRouting;
+import org.opensearch.cluster.routing.UnassignedInfo;
+import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision;
+import org.opensearch.cluster.routing.allocation.NodeAllocationResult;
+import org.opensearch.cluster.routing.allocation.RoutingAllocation;
+import org.opensearch.cluster.routing.allocation.decider.Decision;
+import org.opensearch.common.collect.Tuple;
+import org.opensearch.core.index.shard.ShardId;
+import org.opensearch.gateway.AsyncShardFetch.FetchResult;
+import org.opensearch.indices.store.TransportNodesListShardStoreMetadata;
+import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata;
+import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadataBatch;
+import org.opensearch.indices.store.TransportNodesListShardStoreMetadataHelper.StoreFilesMetadata;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Allocates replica shards in a batch mode
+ *
+ * @opensearch.internal
+ */
+public abstract class ReplicaShardBatchAllocator extends ReplicaShardAllocator {
+
+    /**
+     * Process existing recoveries of replicas and see if we need to cancel them if we find a better
+     * match. Today, a better match is one that can perform a no-op recovery while the previous recovery
+     * has to copy segment files.
+     *
+     * @param allocation the overall routing allocation
+     * @param shardBatches a list of shard batches to check for existing recoveries
+     */
+    public void processExistingRecoveries(RoutingAllocation allocation, List<List<ShardRouting>> shardBatches) {
+        List<Runnable> shardCancellationActions = new ArrayList<>();
+        // iterate through the batches, each batch needs to be processed together as fetch call should be made for shards from same batch
+        for (List<ShardRouting> shardBatch : shardBatches) {
+            List<ShardRouting> eligibleShards = new ArrayList<>();
+            List<ShardRouting> ineligibleShards = new ArrayList<>();
+            // iterate over shards to check for match for each of those
+            for (ShardRouting shard : shardBatch) {
+                if (shard != null && !shard.primary()) {
+                    // need to iterate over all the nodes to find matching shard
+                    if (shouldSkipFetchForRecovery(shard)) {
+                        ineligibleShards.add(shard);
+                        continue;
+                    }
+                    eligibleShards.add(shard);
+                }
+            }
+            AsyncShardFetch.FetchResult<NodeStoreFilesMetadataBatch> shardState = fetchData(eligibleShards, ineligibleShards, allocation);
+            if (!shardState.hasData()) {
+                logger.trace("{}: fetching new stores for initializing shard batch", eligibleShards);
+                continue; // still fetching
+            }
+            for (ShardRouting shard : eligibleShards) {
+                Map<DiscoveryNode, StoreFilesMetadata> nodeShardStores = convertToNodeStoreFilesMetadataMap(shard, shardState);
+
+                Runnable cancellationAction = cancelExistingRecoveryForBetterMatch(shard, allocation, nodeShardStores);
+                if (cancellationAction != null) {
+                    shardCancellationActions.add(cancellationAction);
+                }
+            }
+        }
+        for (Runnable action : shardCancellationActions) {
+            action.run();
+        }
+    }
+
+    abstract protected FetchResult<NodeStoreFilesMetadataBatch> fetchData(
+        List<ShardRouting> eligibleShards,
+        List<ShardRouting> ineligibleShards,
+        RoutingAllocation allocation
+    );
+
+    @Override
+    protected FetchResult<TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata> fetchData(
+        ShardRouting shard,
+        RoutingAllocation allocation
+    ) {
+        logger.error("fetchData for single shard called via batch allocator");
+        throw new IllegalStateException("ReplicaShardBatchAllocator should only be used for a batch of shards");
+    }
+
+    @Override
+    public AllocateUnassignedDecision makeAllocationDecision(ShardRouting unassignedShard, RoutingAllocation allocation, Logger logger) {
+        return makeAllocationDecision(Collections.singletonList(unassignedShard), allocation, logger).get(unassignedShard);
+    }
+
+    @Override
+    public HashMap<ShardRouting, AllocateUnassignedDecision> makeAllocationDecision(
+        List<ShardRouting> shards,
+        RoutingAllocation allocation,
+        Logger logger
+    ) {
+        HashMap<ShardRouting, AllocateUnassignedDecision> shardAllocationDecisions = new HashMap<>();
+        final boolean explain = allocation.debugDecision();
+        List<ShardRouting> eligibleShards = new ArrayList<>();
+        List<ShardRouting> ineligibleShards = new ArrayList<>();
+        HashMap<ShardRouting, Tuple<Decision, Map<String, NodeAllocationResult>>> nodeAllocationDecisions = new HashMap<>();
+        for (ShardRouting shard : shards) {
+            if (!isResponsibleFor(shard)) {
+                // this allocator n is not responsible for allocating this shard
+                ineligibleShards.add(shard);
+                shardAllocationDecisions.put(shard, AllocateUnassignedDecision.NOT_TAKEN);
+                continue;
+            }
+
+            Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(shard, allocation);
+            Decision allocationDecision = result.v1();
+            if (allocationDecision.type() != Decision.Type.YES && (!explain || !hasInitiatedFetching(shard))) {
+                // only return early if we are not in explain mode, or we are in explain mode but we have not
+                // yet attempted to fetch any shard data
+                logger.trace("{}: ignoring allocation, can't be allocated on any node", shard);
+                shardAllocationDecisions.put(
+                    shard,
+                    AllocateUnassignedDecision.no(
+                        UnassignedInfo.AllocationStatus.fromDecision(allocationDecision.type()),
+                        result.v2() != null ? new ArrayList<>(result.v2().values()) : null
+                    )
+                );
+                continue;
+            }
+            // storing the nodeDecisions in nodeAllocationDecisions if the decision is not YES
+            // so that we don't have to compute the decisions again
+            nodeAllocationDecisions.put(shard, result);
+
+            eligibleShards.add(shard);
+        }
+
+        // Do not call fetchData if there are no eligible shards
+        if (eligibleShards.isEmpty()) {
+            return shardAllocationDecisions;
+        }
+        // only fetch data for eligible shards
+        final FetchResult<NodeStoreFilesMetadataBatch> shardsState = fetchData(eligibleShards, ineligibleShards, allocation);
+
+        for (ShardRouting unassignedShard : eligibleShards) {
+            Tuple<Decision, Map<String, NodeAllocationResult>> result = nodeAllocationDecisions.get(unassignedShard);
+            shardAllocationDecisions.put(
+                unassignedShard,
+                getAllocationDecision(
+                    unassignedShard,
+                    allocation,
+                    convertToNodeStoreFilesMetadataMap(unassignedShard, shardsState),
+                    result,
+                    logger
+                )
+            );
+        }
+        return shardAllocationDecisions;
+    }
+
+    private Map<DiscoveryNode, StoreFilesMetadata> convertToNodeStoreFilesMetadataMap(
+        ShardRouting unassignedShard,
+        FetchResult<NodeStoreFilesMetadataBatch> data
+    ) {
+        if (!data.hasData()) {
+            return null;
+        }
+
+        Map<DiscoveryNode, StoreFilesMetadata> map = new HashMap<>();
+
+        data.getData().forEach((discoveryNode, value) -> {
+            Map<ShardId, NodeStoreFilesMetadata> batch = value.getNodeStoreFilesMetadataBatch();
+            NodeStoreFilesMetadata metadata = batch.get(unassignedShard.shardId());
+            if (metadata != null) {
+                map.put(discoveryNode, metadata.storeFilesMetadata());
+            }
+        });
+
+        return map;
+    }
+}
diff --git a/server/src/test/java/org/opensearch/gateway/ReplicaShardBatchAllocatorTests.java b/server/src/test/java/org/opensearch/gateway/ReplicaShardBatchAllocatorTests.java
new file mode 100644
index 0000000000000..464038c93228b
--- /dev/null
+++ b/server/src/test/java/org/opensearch/gateway/ReplicaShardBatchAllocatorTests.java
@@ -0,0 +1,849 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.gateway;
+
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+
+import org.opensearch.Version;
+import org.opensearch.cluster.ClusterInfo;
+import org.opensearch.cluster.ClusterState;
+import org.opensearch.cluster.OpenSearchAllocationTestCase;
+import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.cluster.metadata.Metadata;
+import org.opensearch.cluster.node.DiscoveryNode;
+import org.opensearch.cluster.node.DiscoveryNodes;
+import org.opensearch.cluster.routing.IndexRoutingTable;
+import org.opensearch.cluster.routing.IndexShardRoutingTable;
+import org.opensearch.cluster.routing.RecoverySource;
+import org.opensearch.cluster.routing.RoutingNode;
+import org.opensearch.cluster.routing.RoutingNodes;
+import org.opensearch.cluster.routing.RoutingTable;
+import org.opensearch.cluster.routing.ShardRouting;
+import org.opensearch.cluster.routing.ShardRoutingState;
+import org.opensearch.cluster.routing.TestShardRouting;
+import org.opensearch.cluster.routing.UnassignedInfo;
+import org.opensearch.cluster.routing.allocation.RoutingAllocation;
+import org.opensearch.cluster.routing.allocation.decider.AllocationDecider;
+import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders;
+import org.opensearch.cluster.routing.allocation.decider.Decision;
+import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider;
+import org.opensearch.common.Nullable;
+import org.opensearch.common.settings.ClusterSettings;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.common.util.set.Sets;
+import org.opensearch.core.index.shard.ShardId;
+import org.opensearch.index.engine.Engine;
+import org.opensearch.index.seqno.ReplicationTracker;
+import org.opensearch.index.seqno.RetentionLease;
+import org.opensearch.index.store.Store;
+import org.opensearch.index.store.StoreFileMetadata;
+import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch;
+import org.opensearch.indices.store.TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadataBatch;
+import org.opensearch.indices.store.TransportNodesListShardStoreMetadataHelper;
+import org.opensearch.snapshots.SnapshotShardSizeInfo;
+import org.junit.Before;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static java.util.Collections.unmodifiableMap;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasSize;
+
+public class ReplicaShardBatchAllocatorTests extends OpenSearchAllocationTestCase {
+    private static final org.apache.lucene.util.Version MIN_SUPPORTED_LUCENE_VERSION = org.opensearch.Version.CURRENT
+        .minimumIndexCompatibilityVersion().luceneVersion;
+    private final ShardId shardId = new ShardId("test", "_na_", 0);
+    private final DiscoveryNode node1 = newNode("node1");
+    private final DiscoveryNode node2 = newNode("node2");
+    private final DiscoveryNode node3 = newNode("node3");
+
+    private TestBatchAllocator testBatchAllocator;
+
+    @Before
+    public void buildTestAllocator() {
+        this.testBatchAllocator = new TestBatchAllocator();
+    }
+
+    private void allocateAllUnassignedBatch(final RoutingAllocation allocation) {
+        final RoutingNodes.UnassignedShards.UnassignedIterator iterator = allocation.routingNodes().unassigned().iterator();
+        List<ShardRouting> shardToBatch = new ArrayList<>();
+        while (iterator.hasNext()) {
+            shardToBatch.add(iterator.next());
+        }
+        testBatchAllocator.allocateUnassignedBatch(shardToBatch, allocation);
+    }
+
+    /**
+     * Verifies that when we are still fetching data in an async manner, the replica shard moves to ignore unassigned.
+     */
+    public void testNoAsyncFetchData() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        testBatchAllocator.clean();
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
+        assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+    }
+
+    /**
+     * Verifies that on index creation, we don't fetch data for any shards, but keep the replica shard unassigned to let
+     * the shard allocator to allocate it. There isn't a copy around to find anyhow.
+     */
+    public void testAsyncFetchWithNoShardOnIndexCreation() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(
+            yesAllocationDeciders(),
+            Settings.EMPTY,
+            UnassignedInfo.Reason.INDEX_CREATED
+        );
+        testBatchAllocator.clean();
+        allocateAllUnassignedBatch(allocation);
+        assertThat(testBatchAllocator.getFetchDataCalledAndClean(), equalTo(false));
+        assertThat(testBatchAllocator.getShardEligibleFetchDataCountAndClean(), equalTo(0));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(1));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).get(0).shardId(), equalTo(shardId));
+    }
+
+    /**
+     * Verifies that for anything but index creation, fetch data ends up being called, since we need to go and try
+     * and find a better copy for the shard.
+     */
+    public void testAsyncFetchOnAnythingButIndexCreation() {
+        UnassignedInfo.Reason reason = RandomPicks.randomFrom(
+            random(),
+            EnumSet.complementOf(EnumSet.of(UnassignedInfo.Reason.INDEX_CREATED))
+        );
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders(), Settings.EMPTY, reason);
+        testBatchAllocator.clean();
+        allocateAllUnassignedBatch(allocation);
+        assertThat("failed with reason " + reason, testBatchAllocator.getFetchDataCalledAndClean(), equalTo(true));
+        assertThat("failed with reason" + reason, testBatchAllocator.getShardEligibleFetchDataCountAndClean(), equalTo(1));
+    }
+
+    /**
+     * Verifies that when there is a full match (syncId and files) we allocate it to matching node.
+     */
+    public void testSimpleFullMatchAllocation() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        DiscoveryNode nodeToMatch = randomBoolean() ? node2 : node3;
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(nodeToMatch, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertThat(
+            allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(),
+            equalTo(nodeToMatch.getId())
+        );
+    }
+
+    /**
+     * Verifies that when there is a sync id match but no files match, we allocate it to matching node.
+     */
+    public void testSyncIdMatch() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        DiscoveryNode nodeToMatch = randomBoolean() ? node2 : node3;
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(nodeToMatch, "MATCH", null, new StoreFileMetadata("file1", 10, "NO_MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertThat(
+            allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(),
+            equalTo(nodeToMatch.getId())
+        );
+    }
+
+    /**
+     * Verifies that when there is no sync id match but files match, we allocate it to matching node.
+     */
+    public void testFileChecksumMatch() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        DiscoveryNode nodeToMatch = randomBoolean() ? node2 : node3;
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(nodeToMatch, "NO_MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertThat(
+            allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(),
+            equalTo(nodeToMatch.getId())
+        );
+    }
+
+    public void testPreferCopyWithHighestMatchingOperations() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        long retainingSeqNoOnPrimary = randomLongBetween(1, Integer.MAX_VALUE);
+        long retainingSeqNoForNode2 = randomLongBetween(0, retainingSeqNoOnPrimary - 1);
+        // Rarely use a seqNo above retainingSeqNoOnPrimary, which could in theory happen when primary fails and comes back quickly.
+        long retainingSeqNoForNode3 = randomLongBetween(retainingSeqNoForNode2 + 1, retainingSeqNoOnPrimary + 100);
+        List<RetentionLease> retentionLeases = Arrays.asList(
+            newRetentionLease(node1, retainingSeqNoOnPrimary),
+            newRetentionLease(node2, retainingSeqNoForNode2),
+            newRetentionLease(node3, retainingSeqNoForNode3)
+        );
+        testBatchAllocator.addData(
+            node1,
+            retentionLeases,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node2,
+            "NOT_MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node3,
+            randomSyncId(),
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertThat(
+            allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(),
+            equalTo(node3.getId())
+        );
+    }
+
+    public void testCancelRecoveryIfFoundCopyWithNoopRetentionLease() {
+        final UnassignedInfo unassignedInfo;
+        final Set<String> failedNodes;
+        if (randomBoolean()) {
+            failedNodes = Collections.emptySet();
+            unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null);
+        } else {
+            failedNodes = new HashSet<>(randomSubsetOf(Arrays.asList("node-4", "node-5", "node-6")));
+            unassignedInfo = new UnassignedInfo(
+                UnassignedInfo.Reason.ALLOCATION_FAILED,
+                null,
+                null,
+                randomIntBetween(1, 10),
+                System.nanoTime(),
+                System.currentTimeMillis(),
+                false,
+                UnassignedInfo.AllocationStatus.NO_ATTEMPT,
+                failedNodes
+            );
+        }
+        RoutingAllocation allocation = onePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders(), unassignedInfo);
+        long retainingSeqNo = randomLongBetween(1, Long.MAX_VALUE);
+        testBatchAllocator.addData(
+            node1,
+            Arrays.asList(newRetentionLease(node1, retainingSeqNo), newRetentionLease(node3, retainingSeqNo)),
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node2,
+            "NO_MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node3,
+            randomSyncId(),
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        Collection<ShardRouting> replicaShards = allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED);
+        List<ShardRouting> shardRoutingBatch = new ArrayList<>(replicaShards);
+        List<List<ShardRouting>> shardBatchList = Collections.singletonList(
+            new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING))
+        );
+
+        testBatchAllocator.processExistingRecoveries(allocation, shardBatchList);
+        assertThat(allocation.routingNodesChanged(), equalTo(true));
+        List<ShardRouting> unassignedShards = allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED);
+        assertThat(unassignedShards, hasSize(1));
+        assertThat(unassignedShards.get(0).shardId(), equalTo(shardId));
+        assertThat(unassignedShards.get(0).unassignedInfo().getNumFailedAllocations(), equalTo(0));
+        assertThat(unassignedShards.get(0).unassignedInfo().getFailedNodeIds(), equalTo(failedNodes));
+    }
+
+    public void testNotCancellingRecoveryIfCurrentRecoveryHasRetentionLease() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders());
+        List<RetentionLease> peerRecoveryRetentionLeasesOnPrimary = new ArrayList<>();
+        long retainingSeqNo = randomLongBetween(1, Long.MAX_VALUE);
+        peerRecoveryRetentionLeasesOnPrimary.add(newRetentionLease(node1, retainingSeqNo));
+        peerRecoveryRetentionLeasesOnPrimary.add(newRetentionLease(node2, randomLongBetween(1, retainingSeqNo)));
+        if (randomBoolean()) {
+            peerRecoveryRetentionLeasesOnPrimary.add(newRetentionLease(node3, randomLongBetween(0, retainingSeqNo)));
+        }
+        testBatchAllocator.addData(
+            node1,
+            peerRecoveryRetentionLeasesOnPrimary,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node2,
+            randomSyncId(),
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node3,
+            randomSyncId(),
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.processExistingRecoveries(
+            allocation,
+            Collections.singletonList(new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING)))
+        );
+        assertThat(allocation.routingNodesChanged(), equalTo(false));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(0));
+    }
+
+    public void testNotCancelIfPrimaryDoesNotHaveValidRetentionLease() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders());
+        testBatchAllocator.addData(
+            node1,
+            Collections.singletonList(newRetentionLease(node3, randomNonNegativeLong())),
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node2,
+            "NOT_MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node3,
+            "NOT_MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.processExistingRecoveries(
+            allocation,
+            Collections.singletonList(new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING)))
+        );
+        assertThat(allocation.routingNodesChanged(), equalTo(false));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(0));
+    }
+
+    public void testIgnoreRetentionLeaseIfCopyIsEmpty() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        long retainingSeqNo = randomLongBetween(1, Long.MAX_VALUE);
+        List<RetentionLease> retentionLeases = new ArrayList<>();
+        retentionLeases.add(newRetentionLease(node1, retainingSeqNo));
+        retentionLeases.add(newRetentionLease(node2, randomLongBetween(0, retainingSeqNo)));
+        if (randomBoolean()) {
+            retentionLeases.add(newRetentionLease(node3, randomLongBetween(0, retainingSeqNo)));
+        }
+        testBatchAllocator.addData(
+            node1,
+            retentionLeases,
+            randomSyncId(),
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(node2, null, null); // has retention lease but store is empty
+        testBatchAllocator.addData(
+            node3,
+            randomSyncId(),
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertThat(
+            allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(),
+            equalTo(node3.getId())
+        );
+    }
+
+    /**
+     * When we can't find primary data, but still find replica data, we go ahead and keep it unassigned
+     * to be allocated. This is today behavior, which relies on a primary corruption identified with
+     * adding a replica and having that replica actually recover and cause the corruption to be identified
+     * See CorruptFileTest#
+     */
+    public void testNoPrimaryData() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        testBatchAllocator.addData(
+            node2,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(1));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).get(0).shardId(), equalTo(shardId));
+    }
+
+    /**
+     * Verifies that when there is primary data, but no data at all on other nodes, the shard keeps
+     * unassigned to be allocated later on.
+     */
+    public void testNoDataForReplicaOnAnyNode() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        testBatchAllocator.addData(
+            node1,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(1));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).get(0).shardId(), equalTo(shardId));
+    }
+
+    /**
+     * Verifies that when there is primary data, but no matching data at all on other nodes, the shard keeps
+     * unassigned to be allocated later on.
+     */
+    public void testNoMatchingFilesForReplicaOnAnyNode() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(yesAllocationDeciders());
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(node2, "NO_MATCH", null, new StoreFileMetadata("file1", 10, "NO_MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(1));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).get(0).shardId(), equalTo(shardId));
+    }
+
+    /**
+     * When there is no decision or throttle decision across all nodes for the shard, make sure the shard
+     * moves to the ignore unassigned list.
+     */
+    public void testNoOrThrottleDecidersRemainsInUnassigned() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(
+            randomBoolean() ? noAllocationDeciders() : throttleAllocationDeciders()
+        );
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(node2, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
+        assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+    }
+
+    /**
+     * Tests when the node to allocate to due to matching is being throttled, we move the shard to ignored
+     * to wait till throttling on it is done.
+     */
+    public void testThrottleWhenAllocatingToMatchingNode() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(
+            new AllocationDeciders(
+                Arrays.asList(
+                    new TestAllocateDecision(Decision.YES),
+                    new SameShardAllocationDecider(
+                        Settings.EMPTY,
+                        new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)
+                    ),
+                    new AllocationDecider() {
+                        @Override
+                        public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
+                            if (node.node().equals(node2)) {
+                                return Decision.THROTTLE;
+                            }
+                            return Decision.YES;
+                        }
+                    }
+                )
+            )
+        );
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(node2, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
+        assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+    }
+
+    public void testDelayedAllocation() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1Replica(
+            yesAllocationDeciders(),
+            Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueHours(1)).build(),
+            UnassignedInfo.Reason.NODE_LEFT
+        );
+        testBatchAllocator.addData(
+            node1,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        if (randomBoolean()) {
+            // we sometime return empty list of files, make sure we test this as well
+            testBatchAllocator.addData(node2, null, null);
+        }
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodesChanged(), equalTo(false));
+        assertThat(allocation.routingNodes().unassigned().ignored().size(), equalTo(1));
+        assertThat(allocation.routingNodes().unassigned().ignored().get(0).shardId(), equalTo(shardId));
+
+        allocation = onePrimaryOnNode1And1Replica(
+            yesAllocationDeciders(),
+            Settings.builder().put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), TimeValue.timeValueHours(1)).build(),
+            UnassignedInfo.Reason.NODE_LEFT
+        );
+        testBatchAllocator.addData(
+            node2,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        allocateAllUnassignedBatch(allocation);
+        assertThat(allocation.routingNodesChanged(), equalTo(true));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(1));
+        assertThat(
+            allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING).get(0).currentNodeId(),
+            equalTo(node2.getId())
+        );
+    }
+
+    public void testCancelRecoveryBetterSyncId() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders());
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(node2, "NO_MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(node3, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        testBatchAllocator.processExistingRecoveries(
+            allocation,
+            Collections.singletonList(new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING)))
+        );
+        assertThat(allocation.routingNodesChanged(), equalTo(true));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(1));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).get(0).shardId(), equalTo(shardId));
+    }
+
+    public void testNotCancellingRecoveryIfSyncedOnExistingRecovery() {
+        final UnassignedInfo unassignedInfo;
+        if (randomBoolean()) {
+            unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null);
+        } else {
+            unassignedInfo = new UnassignedInfo(
+                UnassignedInfo.Reason.ALLOCATION_FAILED,
+                null,
+                null,
+                randomIntBetween(1, 10),
+                System.nanoTime(),
+                System.currentTimeMillis(),
+                false,
+                UnassignedInfo.AllocationStatus.NO_ATTEMPT,
+                Collections.singleton("node-4")
+            );
+        }
+        RoutingAllocation allocation = onePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders(), unassignedInfo);
+        List<RetentionLease> retentionLeases = new ArrayList<>();
+        if (randomBoolean()) {
+            long retainingSeqNoOnPrimary = randomLongBetween(0, Long.MAX_VALUE);
+            retentionLeases.add(newRetentionLease(node1, retainingSeqNoOnPrimary));
+            if (randomBoolean()) {
+                retentionLeases.add(newRetentionLease(node2, randomLongBetween(0, retainingSeqNoOnPrimary)));
+            }
+            if (randomBoolean()) {
+                retentionLeases.add(newRetentionLease(node3, randomLongBetween(0, retainingSeqNoOnPrimary)));
+            }
+        }
+        testBatchAllocator.addData(
+            node1,
+            retentionLeases,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node2,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.addData(
+            node3,
+            randomSyncId(),
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        );
+        testBatchAllocator.processExistingRecoveries(
+            allocation,
+            Collections.singletonList(new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING)))
+        );
+        assertThat(allocation.routingNodesChanged(), equalTo(false));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(0));
+    }
+
+    public void testNotCancellingRecovery() {
+        RoutingAllocation allocation = onePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders());
+        testBatchAllocator.addData(node1, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(node2, "MATCH", null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        testBatchAllocator.processExistingRecoveries(
+            allocation,
+            Collections.singletonList(new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING)))
+        );
+        assertThat(allocation.routingNodesChanged(), equalTo(false));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED).size(), equalTo(0));
+    }
+
+    public void testDoNotCancelForBrokenNode() {
+        Set<String> failedNodes = new HashSet<>();
+        failedNodes.add(node3.getId());
+        if (randomBoolean()) {
+            failedNodes.add("node4");
+        }
+        UnassignedInfo unassignedInfo = new UnassignedInfo(
+            UnassignedInfo.Reason.ALLOCATION_FAILED,
+            null,
+            null,
+            randomIntBetween(failedNodes.size(), 10),
+            System.nanoTime(),
+            System.currentTimeMillis(),
+            false,
+            UnassignedInfo.AllocationStatus.NO_ATTEMPT,
+            failedNodes
+        );
+        RoutingAllocation allocation = onePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders(), unassignedInfo);
+        long retainingSeqNoOnPrimary = randomLongBetween(0, Long.MAX_VALUE);
+        List<RetentionLease> retentionLeases = Arrays.asList(
+            newRetentionLease(node1, retainingSeqNoOnPrimary),
+            newRetentionLease(node3, retainingSeqNoOnPrimary)
+        );
+        testBatchAllocator.addData(
+            node1,
+            retentionLeases,
+            "MATCH",
+            null,
+            new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
+        )
+            .addData(node2, randomSyncId(), null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION))
+            .addData(node3, randomSyncId(), null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
+        testBatchAllocator.processExistingRecoveries(
+            allocation,
+            Collections.singletonList(new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING)))
+        );
+        assertThat(allocation.routingNodesChanged(), equalTo(false));
+        assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED), empty());
+    }
+
+    private RoutingAllocation onePrimaryOnNode1And1Replica(AllocationDeciders deciders) {
+        return onePrimaryOnNode1And1Replica(deciders, Settings.EMPTY, UnassignedInfo.Reason.CLUSTER_RECOVERED);
+    }
+
+    private RoutingAllocation onePrimaryOnNode1And1Replica(AllocationDeciders deciders, Settings settings, UnassignedInfo.Reason reason) {
+        ShardRouting primaryShard = TestShardRouting.newShardRouting(shardId, node1.getId(), true, ShardRoutingState.STARTED);
+        IndexMetadata.Builder indexMetadata = IndexMetadata.builder(shardId.getIndexName())
+            .settings(settings(Version.CURRENT).put(settings))
+            .numberOfShards(1)
+            .numberOfReplicas(1)
+            .putInSyncAllocationIds(0, Sets.newHashSet(primaryShard.allocationId().getId()));
+        Metadata metadata = Metadata.builder().put(indexMetadata).build();
+        // mark shard as delayed if reason is NODE_LEFT
+        boolean delayed = reason == UnassignedInfo.Reason.NODE_LEFT
+            && UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(settings).nanos() > 0;
+        int failedAllocations = reason == UnassignedInfo.Reason.ALLOCATION_FAILED ? 1 : 0;
+        RoutingTable routingTable = RoutingTable.builder()
+            .add(
+                IndexRoutingTable.builder(shardId.getIndex())
+                    .addIndexShard(
+                        new IndexShardRoutingTable.Builder(shardId).addShard(primaryShard)
+                            .addShard(
+                                ShardRouting.newUnassigned(
+                                    shardId,
+                                    false,
+                                    RecoverySource.PeerRecoverySource.INSTANCE,
+                                    new UnassignedInfo(
+                                        reason,
+                                        null,
+                                        null,
+                                        failedAllocations,
+                                        System.nanoTime(),
+                                        System.currentTimeMillis(),
+                                        delayed,
+                                        UnassignedInfo.AllocationStatus.NO_ATTEMPT,
+                                        Collections.emptySet()
+                                    )
+                                )
+                            )
+                            .build()
+                    )
+            )
+            .build();
+        ClusterState state = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
+            .metadata(metadata)
+            .routingTable(routingTable)
+            .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3))
+            .build();
+        return new RoutingAllocation(
+            deciders,
+            new RoutingNodes(state, false),
+            state,
+            ClusterInfo.EMPTY,
+            SnapshotShardSizeInfo.EMPTY,
+            System.nanoTime()
+        );
+    }
+
+    private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders, UnassignedInfo unassignedInfo) {
+        ShardRouting primaryShard = TestShardRouting.newShardRouting(shardId, node1.getId(), true, ShardRoutingState.STARTED);
+        Metadata metadata = Metadata.builder()
+            .put(
+                IndexMetadata.builder(shardId.getIndexName())
+                    .settings(settings(Version.CURRENT))
+                    .numberOfShards(1)
+                    .numberOfReplicas(1)
+                    .putInSyncAllocationIds(0, Sets.newHashSet(primaryShard.allocationId().getId()))
+            )
+            .build();
+        RoutingTable routingTable = RoutingTable.builder()
+            .add(
+                IndexRoutingTable.builder(shardId.getIndex())
+                    .addIndexShard(
+                        new IndexShardRoutingTable.Builder(shardId).addShard(primaryShard)
+                            .addShard(
+                                TestShardRouting.newShardRouting(
+                                    shardId,
+                                    node2.getId(),
+                                    null,
+                                    false,
+                                    ShardRoutingState.INITIALIZING,
+                                    unassignedInfo
+                                )
+                            )
+                            .build()
+                    )
+            )
+            .build();
+        ClusterState state = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
+            .metadata(metadata)
+            .routingTable(routingTable)
+            .nodes(DiscoveryNodes.builder().add(node1).add(node2).add(node3))
+            .build();
+        return new RoutingAllocation(
+            deciders,
+            new RoutingNodes(state, false),
+            state,
+            ClusterInfo.EMPTY,
+            SnapshotShardSizeInfo.EMPTY,
+            System.nanoTime()
+        );
+    }
+
+    private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders) {
+        return onePrimaryOnNode1And1ReplicaRecovering(deciders, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null));
+    }
+
+    static RetentionLease newRetentionLease(DiscoveryNode node, long retainingSeqNo) {
+        return new RetentionLease(
+            ReplicationTracker.getPeerRecoveryRetentionLeaseId(node.getId()),
+            retainingSeqNo,
+            randomNonNegativeLong(),
+            ReplicationTracker.PEER_RECOVERY_RETENTION_LEASE_SOURCE
+        );
+    }
+
+    static String randomSyncId() {
+        return randomFrom("MATCH", "NOT_MATCH", null);
+    }
+
+    class TestBatchAllocator extends ReplicaShardBatchAllocator {
+        private Map<DiscoveryNode, TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata> data = null;
+        private AtomicBoolean fetchDataCalled = new AtomicBoolean(false);
+        private AtomicInteger eligibleShardFetchDataCount = new AtomicInteger(0);
+
+        public void clean() {
+            data = null;
+        }
+
+        public boolean getFetchDataCalledAndClean() {
+            return fetchDataCalled.getAndSet(false);
+        }
+
+        public int getShardEligibleFetchDataCountAndClean() {
+            return eligibleShardFetchDataCount.getAndSet(0);
+        }
+
+        public TestBatchAllocator addData(
+            DiscoveryNode node,
+            String syncId,
+            @Nullable Exception storeFileFetchException,
+            StoreFileMetadata... files
+        ) {
+            return addData(node, Collections.emptyList(), syncId, storeFileFetchException, files);
+        }
+
+        public TestBatchAllocator addData(
+            DiscoveryNode node,
+            List<RetentionLease> peerRecoveryRetentionLeases,
+            String syncId,
+            @Nullable Exception storeFileFetchException,
+            StoreFileMetadata... files
+        ) {
+            if (data == null) {
+                data = new HashMap<>();
+            }
+            Map<String, StoreFileMetadata> filesAsMap = new HashMap<>();
+            for (StoreFileMetadata file : files) {
+                filesAsMap.put(file.name(), file);
+            }
+            Map<String, String> commitData = new HashMap<>();
+            if (syncId != null) {
+                commitData.put(Engine.SYNC_COMMIT_ID, syncId);
+            }
+            data.put(
+                node,
+                new TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata(
+                    new TransportNodesListShardStoreMetadataHelper.StoreFilesMetadata(
+                        shardId,
+                        new Store.MetadataSnapshot(unmodifiableMap(filesAsMap), unmodifiableMap(commitData), randomInt()),
+                        peerRecoveryRetentionLeases
+                    ),
+                    storeFileFetchException
+                )
+            );
+            return this;
+        }
+
+        @Override
+        protected AsyncShardFetch.FetchResult<NodeStoreFilesMetadataBatch> fetchData(
+            List<ShardRouting> eligibleShards,
+            List<ShardRouting> ineligibleShards,
+            RoutingAllocation allocation
+        ) {
+            fetchDataCalled.set(true);
+            eligibleShardFetchDataCount.set(eligibleShards.size());
+            Map<DiscoveryNode, NodeStoreFilesMetadataBatch> tData = null;
+            if (data != null) {
+                tData = new HashMap<>();
+                for (Map.Entry<DiscoveryNode, TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata> entry : data.entrySet()) {
+                    Map<ShardId, TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadata> shardData = Map.of(
+                        shardId,
+                        entry.getValue()
+                    );
+                    tData.put(
+                        entry.getKey(),
+                        new TransportNodesListShardStoreMetadataBatch.NodeStoreFilesMetadataBatch(entry.getKey(), shardData)
+                    );
+                }
+            }
+            return new AsyncShardFetch.FetchResult<>(tData, new HashMap<>() {
+                {
+                    put(shardId, Collections.emptySet());
+                }
+            });
+        }
+
+        @Override
+        protected boolean hasInitiatedFetching(ShardRouting shard) {
+            return fetchDataCalled.get();
+        }
+    }
+}