-
Notifications
You must be signed in to change notification settings - Fork 62
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Handling exception in getAssignment method #881
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,9 +56,14 @@ class ShardReplicationExecutor(executor: String, private val clusterService : Cl | |
} | ||
|
||
override fun getAssignment(params: ShardReplicationParams, clusterState: ClusterState) : Assignment { | ||
val primaryShard = clusterState.routingTable().shardRoutingTable(params.followerShardId).primaryShard() | ||
if (!primaryShard.active()) return SHARD_NOT_ACTIVE | ||
return Assignment(primaryShard.currentNodeId(), "node with primary shard") | ||
try { | ||
val primaryShard = clusterState.routingTable().shardRoutingTable(params.followerShardId).primaryShard() | ||
if (!primaryShard.active()) return SHARD_NOT_ACTIVE | ||
return Assignment(primaryShard.currentNodeId(), "node with primary shard") | ||
} catch (e: Exception) { | ||
log.error("Failed to assign shard replication task with id ${params.followerShardId}", e) | ||
return SHARD_NOT_ACTIVE | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add UTs for the assignment logic and can we validate the changes required for Index Replication task as well? (if any) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Index Replication task does not attach to particular node, so there assignment can happen on any node. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. UT has been added |
||
|
||
override fun nodeOperation(task: AllocatedPersistentTask, params: ShardReplicationParams, state: PersistentTaskState?) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
package org.opensearch.replication.task.shard | ||
|
||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope | ||
import org.junit.Assert | ||
import org.junit.Before | ||
import org.junit.Test | ||
import org.mockito.Mockito | ||
import org.opensearch.Version | ||
import org.opensearch.cluster.ClusterState | ||
import org.opensearch.cluster.metadata.IndexMetadata | ||
import org.opensearch.cluster.metadata.Metadata | ||
import org.opensearch.cluster.routing.* | ||
import org.opensearch.common.unit.TimeValue | ||
import org.opensearch.core.xcontent.NamedXContentRegistry | ||
import org.opensearch.index.Index | ||
import org.opensearch.index.shard.ShardId | ||
import org.opensearch.replication.ReplicationSettings | ||
import org.opensearch.replication.metadata.ReplicationMetadataManager | ||
import org.opensearch.replication.metadata.store.ReplicationMetadataStore | ||
import org.opensearch.replication.task.index.* | ||
import org.opensearch.test.ClusterServiceUtils | ||
import org.opensearch.test.OpenSearchTestCase | ||
import org.opensearch.threadpool.TestThreadPool | ||
import java.util.ArrayList | ||
import java.util.concurrent.TimeUnit | ||
|
||
@ThreadLeakScope(ThreadLeakScope.Scope.NONE) | ||
class ShardReplicationExecutorTests: OpenSearchTestCase() { | ||
|
||
companion object { | ||
var followerIndex = "follower-index" | ||
var remoteCluster = "remote-cluster" | ||
} | ||
|
||
private lateinit var shardReplicationExecutor: ShardReplicationExecutor | ||
|
||
private var threadPool = TestThreadPool("ShardExecutorTest") | ||
private var clusterService = ClusterServiceUtils.createClusterService(threadPool) | ||
|
||
@Before | ||
fun setup() { | ||
val spyClient = Mockito.spy(NoOpClient("testName")) | ||
val replicationMetadataManager = ReplicationMetadataManager(clusterService, spyClient, | ||
ReplicationMetadataStore(spyClient, clusterService, NamedXContentRegistry.EMPTY) | ||
) | ||
val followerStats = FollowerClusterStats() | ||
val followerShardId = ShardId("follower", "follower_uuid", 0) | ||
followerStats.stats[followerShardId] = FollowerShardMetric() | ||
|
||
val replicationSettings = Mockito.mock(ReplicationSettings::class.java) | ||
replicationSettings.metadataSyncInterval = TimeValue(100, TimeUnit.MILLISECONDS) | ||
shardReplicationExecutor = ShardReplicationExecutor( | ||
"test_executor", | ||
clusterService, | ||
threadPool, | ||
spyClient, | ||
replicationMetadataManager, | ||
replicationSettings, | ||
followerStats | ||
) | ||
} | ||
|
||
@Test | ||
fun `getAssignment should not throw exception when no shard is present` () { | ||
val sId = ShardId(Index(followerIndex, "_na_"), 0) | ||
val params = ShardReplicationParams(remoteCluster, sId, sId) | ||
val clusterState = createClusterState(null, null) | ||
|
||
try { | ||
val assignment = shardReplicationExecutor.getAssignment(params, clusterState) | ||
Assert.assertEquals(null, assignment.executorNode) | ||
} catch (e: Exception) { | ||
// Validation should not throw an exception, so the test should fail if it reaches this line | ||
Assert.fail("Expected Exception should not be thrown") | ||
} | ||
} | ||
|
||
@Test | ||
fun `getAssignment should return null if shard is present but is not active` () { | ||
val sId = ShardId(Index(followerIndex, "_na_"), 0) | ||
val params = ShardReplicationParams(remoteCluster, sId, sId) | ||
val unassignedShard = ShardRouting.newUnassigned( | ||
sId, | ||
true, | ||
RecoverySource.EmptyStoreRecoverySource.INSTANCE, | ||
UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null) | ||
) | ||
val clusterState = createClusterState(sId, unassignedShard) | ||
|
||
try { | ||
val assignment = shardReplicationExecutor.getAssignment(params, clusterState) | ||
Assert.assertEquals(null, assignment.executorNode) | ||
} catch (e: Exception) { | ||
// Validation should not throw an exception, so the test should fail if it reaches this line | ||
Assert.fail("Expected Exception should not be thrown") | ||
} | ||
} | ||
|
||
@Test | ||
fun `getAssignment should return node when shard is present` () { | ||
val sId = ShardId(Index(followerIndex, "_na_"), 0) | ||
val params = ShardReplicationParams(remoteCluster, sId, sId) | ||
val initializingShard = TestShardRouting.newShardRouting( | ||
followerIndex, | ||
sId.id, | ||
"1", | ||
true, | ||
ShardRoutingState.INITIALIZING | ||
) | ||
val startedShard = initializingShard.moveToStarted() | ||
val clusterState = createClusterState(sId, startedShard) | ||
|
||
try { | ||
val assignment = shardReplicationExecutor.getAssignment(params, clusterState) | ||
Assert.assertEquals(initializingShard.currentNodeId(), assignment.executorNode) | ||
} catch (e: Exception) { | ||
// Validation should not throw an exception, so the test should fail if it reaches this line | ||
Assert.fail("Expected Exception should not be thrown") | ||
} | ||
} | ||
|
||
private fun createClusterState(shardId: ShardId?, shardRouting: ShardRouting?): ClusterState { | ||
val indices: MutableList<String> = ArrayList() | ||
indices.add(followerIndex) | ||
val metadata = Metadata.builder() | ||
.put( | ||
IndexMetadata.builder(ReplicationMetadataStore.REPLICATION_CONFIG_SYSTEM_INDEX).settings(settings( | ||
Version.CURRENT)).numberOfShards(1).numberOfReplicas(0)) | ||
.put( | ||
IndexMetadata.builder(IndexReplicationTaskTests.followerIndex).settings(settings( | ||
Version.CURRENT)).numberOfShards(2).numberOfReplicas(0)) | ||
.build() | ||
|
||
val routingTableBuilder = RoutingTable.builder() | ||
.addAsNew(metadata.index(ReplicationMetadataStore.REPLICATION_CONFIG_SYSTEM_INDEX)) | ||
.addAsNew(metadata.index(followerIndex)) | ||
|
||
if (shardId != null) { | ||
routingTableBuilder.add( | ||
IndexRoutingTable.builder(shardId.index) | ||
.addShard(shardRouting) | ||
.build() | ||
) | ||
} | ||
|
||
return ClusterState.builder(clusterService.state()).routingTable(routingTableBuilder.build()).build() | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For testing, will _stop API clear these entries when the shards are not active?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For this we are taking another issue
#903