Skip to content

Commit

Permalink
[Segment Replication] Fix testAllocationWithDisruption flakyness (#6838)
Browse files Browse the repository at this point in the history
* [Segment Replication] Fix testAllocationWithDisruption flakyness

Signed-off-by: Suraj Singh <[email protected]>

* Use more number of nodes with lesser primary shards to decrease chances of primary accumulation on one node

Signed-off-by: Suraj Singh <[email protected]>

* Update comment

Signed-off-by: Suraj Singh <[email protected]>

---------

Signed-off-by: Suraj Singh <[email protected]>
  • Loading branch information
dreamer-89 authored Mar 27, 2023
1 parent 3fec567 commit 7500270
Showing 1 changed file with 21 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -166,15 +166,16 @@ public void testSingleIndexShardAllocation() throws Exception {
}

/**
* Similar to testSingleIndexShardAllocation test but creates multiple indices, multiple node adding in and getting
* removed. The test asserts post each such event that primary shard distribution is balanced across single index.
* Similar to testSingleIndexShardAllocation test but creates multiple indices, multiple nodes adding in and getting
* removed. The test asserts post each such event that primary shard distribution is balanced for each index.
*/
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/6565")
public void testAllocationWithDisruption() throws Exception {
internalCluster().startClusterManagerOnlyNode();
final int maxReplicaCount = 2;
final int maxShardCount = 5;
final int nodeCount = randomIntBetween(maxReplicaCount + 1, 10);
final int maxShardCount = 2;
// Create higher number of nodes than number of shards to reduce chances of SameShardAllocationDecider kicking-in
// and preventing primary relocations
final int nodeCount = randomIntBetween(5, 10);
final int numberOfIndices = randomIntBetween(1, 10);

logger.info("--> Creating {} nodes", nodeCount);
Expand All @@ -184,13 +185,11 @@ public void testAllocationWithDisruption() throws Exception {
}
enablePreferPrimaryBalance();

int shardCount, replicaCount, totalShardCount = 0, totalReplicaCount = 0;
int shardCount, replicaCount;
ClusterState state;
for (int i = 0; i < numberOfIndices; i++) {
shardCount = randomIntBetween(1, maxShardCount);
totalShardCount += shardCount;
replicaCount = randomIntBetween(1, maxReplicaCount);
totalReplicaCount += replicaCount;
logger.info("--> Creating index test{} with primary {} and replica {}", i, shardCount, replicaCount);
createIndex("test" + i, shardCount, replicaCount, i % 2 == 0);
ensureGreen(TimeValue.timeValueSeconds(60));
Expand All @@ -212,13 +211,15 @@ public void testAllocationWithDisruption() throws Exception {
logger.info(ShardAllocations.printShardDistribution(state));
verifyPerIndexPrimaryBalance();

logger.info("--> Stop one third nodes");
for (int i = 0; i < nodeCount; i += 3) {
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeNames.get(i)));
int nodeCountToStop = additionalNodeCount;
while (nodeCountToStop > 0) {
internalCluster().stopRandomDataNode();
// give replica a chance to promote as primary before terminating node containing the replica
ensureGreen(TimeValue.timeValueSeconds(60));
nodeCountToStop--;
}
state = client().admin().cluster().prepareState().execute().actionGet().getState();
logger.info("--> Cluster state post nodes stop {}", state);
logger.info(ShardAllocations.printShardDistribution(state));
verifyPerIndexPrimaryBalance();
}
Expand All @@ -240,6 +241,15 @@ private void verifyPerIndexPrimaryBalance() throws Exception {
.filter(ShardRouting::primary)
.collect(Collectors.toList())
.size();
if (primaryCount > avgPrimaryShardsPerNode) {
logger.info(
"--> Primary shard balance assertion failure for index {} on node {} {} <= {}",
index.key,
node.node().getName(),
primaryCount,
avgPrimaryShardsPerNode
);
}
assertTrue(primaryCount <= avgPrimaryShardsPerNode);
}
}
Expand Down

0 comments on commit 7500270

Please sign in to comment.