-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Segment Replication] Add new background task to fail stale replica shards. #6850
Changes from 13 commits
bf9b3dc
05897cc
dd34f0f
e386d5c
d98cef7
282ca4f
04481da
d15bedb
8f1ad84
8a935d5
c332c15
54bae01
1be88b7
11d82d1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,9 @@ | |
|
||
package org.opensearch.index; | ||
|
||
import org.mockito.Mockito; | ||
import org.mockito.stubbing.Answer; | ||
import org.opensearch.cluster.action.shard.ShardStateAction; | ||
import org.opensearch.cluster.metadata.IndexMetadata; | ||
import org.opensearch.cluster.service.ClusterService; | ||
import org.opensearch.common.settings.ClusterSettings; | ||
|
@@ -21,6 +23,7 @@ | |
import org.opensearch.index.shard.ShardId; | ||
import org.opensearch.indices.IndicesService; | ||
import org.opensearch.indices.replication.common.ReplicationType; | ||
import org.opensearch.threadpool.ThreadPool; | ||
|
||
import java.util.Iterator; | ||
import java.util.List; | ||
|
@@ -29,13 +32,20 @@ | |
import java.util.concurrent.TimeUnit; | ||
|
||
import static java.util.Arrays.asList; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.ArgumentMatchers.anyString; | ||
import static org.mockito.ArgumentMatchers.anyLong; | ||
import static org.mockito.ArgumentMatchers.anyBoolean; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
import static org.mockito.Mockito.verify; | ||
import static org.mockito.Mockito.times; | ||
import static org.opensearch.index.SegmentReplicationPressureService.MAX_REPLICATION_TIME_SETTING; | ||
import static org.opensearch.index.SegmentReplicationPressureService.SEGMENT_REPLICATION_INDEXING_PRESSURE_ENABLED; | ||
|
||
public class SegmentReplicationPressureServiceTests extends OpenSearchIndexLevelReplicationTestCase { | ||
|
||
private static ShardStateAction shardStateAction = Mockito.mock(ShardStateAction.class); | ||
private static final Settings settings = Settings.builder() | ||
.put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) | ||
.put(SEGMENT_REPLICATION_INDEXING_PRESSURE_ENABLED.getKey(), true) | ||
|
@@ -181,6 +191,36 @@ public void testIsSegrepLimitBreached_underStaleNodeLimit() throws Exception { | |
} | ||
} | ||
|
||
public void testFailStaleReplicaTask() throws Exception { | ||
final Settings settings = Settings.builder() | ||
.put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) | ||
.put(SEGMENT_REPLICATION_INDEXING_PRESSURE_ENABLED.getKey(), true) | ||
.put(MAX_REPLICATION_TIME_SETTING.getKey(), TimeValue.timeValueMillis(10)) | ||
.build(); | ||
|
||
try (ReplicationGroup shards = createGroup(1, settings, new NRTReplicationEngineFactory())) { | ||
shards.startAll(); | ||
final IndexShard primaryShard = shards.getPrimary(); | ||
SegmentReplicationPressureService service = buildPressureService(settings, primaryShard); | ||
|
||
// index docs in batches without refreshing | ||
indexInBatches(5, shards, primaryShard); | ||
|
||
// assert that replica shard is few checkpoints behind primary | ||
Set<SegmentReplicationShardStats> replicationStats = primaryShard.getReplicationStats(); | ||
assertEquals(1, replicationStats.size()); | ||
SegmentReplicationShardStats shardStats = replicationStats.stream().findFirst().get(); | ||
assertEquals(5, shardStats.getCheckpointsBehindCount()); | ||
|
||
// call the background task | ||
service.getFailStaleReplicaTask().runInternal(); | ||
|
||
// verify that remote shard failed method is called which fails the replica shards falling behind. | ||
verify(shardStateAction, times(1)).remoteShardFailed(any(), anyString(), anyLong(), anyBoolean(), anyString(), any(), any()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Instead of random values, we can use actual values i.e. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried doing this, but to verify exact values we need the reference of listener (last parameter), we cannot pass it as verify would fail and also I cannot do any() for just last parameter because we cannot combine any() with other parameters. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a race condition here? Is it possible for the explicit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If we try to combine any() with other regular parameters in method call, then we get an exception.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No there is no race condition in this unit test. So, there are two ways to start background task either use task.runInternal() or by rescheduleIfNecessary(). In actual cluster or in an integration test when new instance of class is created and we start the background task. But here as this is a unit test and we are using mocks, so rescheduleIfNecessary() would not trigger the runInternal() because the class is just a mock. So, to verify that task actually works in unit test, I am making an explicit |
||
replicateSegments(primaryShard, shards.getReplicas()); | ||
} | ||
} | ||
|
||
private int indexInBatches(int count, ReplicationGroup shards, IndexShard primaryShard) throws Exception { | ||
int totalDocs = 0; | ||
for (int i = 0; i < count; i++) { | ||
|
@@ -202,6 +242,6 @@ private SegmentReplicationPressureService buildPressureService(Settings settings | |
ClusterService clusterService = mock(ClusterService.class); | ||
when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); | ||
|
||
return new SegmentReplicationPressureService(settings, clusterService, indicesService); | ||
return new SegmentReplicationPressureService(settings, clusterService, indicesService, shardStateAction, mock(ThreadPool.class)); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where does this newly added method get called?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently, it is not called/used directly from anywhere. I took the reference from PersistentTasksClusterService, just to make sure when service is closed this async task is also closed.