-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Toggle replication strategy based on segrep index setting #2318
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,10 +32,6 @@ | |
|
||
package org.opensearch.index.engine; | ||
|
||
import java.io.IOException; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was this reorder intentional? Think this may be IDE. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I left it in since this seems to be the standard order followed by all other files |
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.apache.lucene.index.DirectoryReader; | ||
|
@@ -44,11 +40,14 @@ | |
import org.apache.lucene.index.SegmentInfos; | ||
import org.apache.lucene.index.StandardDirectoryReader; | ||
import org.apache.lucene.search.ReferenceManager; | ||
|
||
import org.apache.lucene.search.SearcherManager; | ||
import org.opensearch.common.SuppressForbidden; | ||
import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
/** | ||
* Utility class to safely share {@link OpenSearchDirectoryReader} instances across | ||
* multiple threads, while periodically reopening. This class ensures each | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -845,8 +845,7 @@ public Engine.IndexResult applyIndexOperationOnReplica( | |
boolean isRetry, | ||
SourceToParse sourceToParse | ||
) throws IOException { | ||
Boolean isSegRepEnabled = indexSettings.isSegrepEnabled(); | ||
if (isSegRepEnabled != null && isSegRepEnabled) { | ||
if (indexSettings.isSegrepEnabled()) { | ||
Engine.Index index; | ||
try { | ||
index = parseSourceAndPrepareIndex( | ||
|
@@ -1278,7 +1277,7 @@ public void refresh(String source) { | |
*/ | ||
public long getWritingBytes() { | ||
// TODO: Segrep: hack - if not the primary our IW is null and this blows up. | ||
if (shardRouting.primary() == false) { | ||
if (indexSettings.isSegrepEnabled() && (shardRouting.primary() == false)) { | ||
return 0L; | ||
} | ||
Engine engine = getEngineOrNull(); | ||
|
@@ -2030,7 +2029,9 @@ public void openEngineAndRecoverFromTranslog() throws IOException { | |
public void openEngineAndSkipTranslogRecovery() throws IOException { | ||
assert routingEntry().recoverySource().getType() == RecoverySource.Type.PEER : "not a peer recovery [" + routingEntry() + "]"; | ||
// TODO: Segrep - fix initial recovery stages from ReplicationTarget. | ||
// recoveryState.validateCurrentStage(RecoveryState.Stage.TRANSLOG); | ||
if (indexSettings.isSegrepEnabled() == false) { | ||
recoveryState.validateCurrentStage(RecoveryState.Stage.TRANSLOG); | ||
} | ||
loadGlobalCheckpointToReplicationTracker(); | ||
innerOpenEngineAndTranslog(replicationTracker); | ||
getEngine().skipTranslogRecovery(); | ||
|
@@ -2067,7 +2068,7 @@ private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) t | |
// which settings changes could possibly have happened, so here we forcefully push any config changes to the new engine. | ||
onSettingsChanged(); | ||
// TODO: Segrep - Fix | ||
// assert assertSequenceNumbersInCommit(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this method call may be ok to uncomment now with segrep enabled. We can take that as a separate change. |
||
assert assertSequenceNumbersInCommit(); | ||
recoveryState.validateCurrentStage(RecoveryState.Stage.TRANSLOG); | ||
} | ||
|
||
|
@@ -2239,7 +2240,7 @@ protected final void verifyActive() throws IllegalIndexShardStateException { | |
* Returns number of heap bytes used by the indexing buffer for this shard, or 0 if the shard is closed | ||
*/ | ||
public long getIndexBufferRAMBytesUsed() { | ||
if (shardRouting.primary() == false) { | ||
if (indexSettings.isSegrepEnabled() && (shardRouting.primary() == false)) { | ||
return 0; | ||
} | ||
Engine engine = getEngineOrNull(); | ||
|
@@ -2715,7 +2716,7 @@ public void syncRetentionLeases() { | |
assert assertPrimaryMode(); | ||
verifyNotClosed(); | ||
// TODO: Segrep - Fix retention leases | ||
// replicationTracker.renewPeerRecoveryRetentionLeases(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leaving a note here similar to the above - I think this may be ok to uncomment now with segrep enabled. This was commented before we were correctly wiring up retention leases for replicas. |
||
replicationTracker.renewPeerRecoveryRetentionLeases(); | ||
final Tuple<Boolean, RetentionLeases> retentionLeases = getRetentionLeases(true); | ||
if (retentionLeases.v1()) { | ||
logger.trace("syncing retention leases [{}] after expiration check", retentionLeases.v2()); | ||
|
@@ -3073,32 +3074,35 @@ public void startRecovery( | |
case PEER: | ||
try { | ||
markAsRecovering("from " + recoveryState.getSourceNode(), recoveryState); | ||
IndexShard indexShard = this; | ||
segmentReplicationReplicaService.prepareForReplication( | ||
this, | ||
recoveryState.getTargetNode(), | ||
recoveryState.getSourceNode(), | ||
new ActionListener<TrackShardResponse>() { | ||
@Override | ||
public void onResponse(TrackShardResponse unused) { | ||
replicationListener.onReplicationDone(replicationState); | ||
recoveryState.getIndex().setFileDetailsComplete(); | ||
finalizeRecovery(); | ||
postRecovery("Shard setup complete."); | ||
} | ||
if (indexSettings.isSegrepEnabled()) { | ||
IndexShard indexShard = this; | ||
segmentReplicationReplicaService.prepareForReplication( | ||
this, | ||
recoveryState.getTargetNode(), | ||
recoveryState.getSourceNode(), | ||
new ActionListener<TrackShardResponse>() { | ||
@Override | ||
public void onResponse(TrackShardResponse unused) { | ||
replicationListener.onReplicationDone(replicationState); | ||
recoveryState.getIndex().setFileDetailsComplete(); | ||
finalizeRecovery(); | ||
postRecovery("Shard setup complete."); | ||
} | ||
|
||
@Override | ||
public void onFailure(Exception e) { | ||
replicationListener.onReplicationFailure( | ||
replicationState, | ||
new ReplicationFailedException(indexShard, e), | ||
true | ||
); | ||
@Override | ||
public void onFailure(Exception e) { | ||
replicationListener.onReplicationFailure( | ||
replicationState, | ||
new ReplicationFailedException(indexShard, e), | ||
true | ||
); | ||
} | ||
} | ||
} | ||
); | ||
); | ||
} else { | ||
peerRecoveryTargetService.startRecovery(this, recoveryState.getSourceNode(), recoveryListener); | ||
} | ||
} catch (Exception e) { | ||
logger.error("Error preparing the shard for Segment replication", e); | ||
failShard("corrupted preexisting index", e); | ||
recoveryListener.onRecoveryFailure(recoveryState, new RecoveryFailedException(recoveryState, null, e), true); | ||
} | ||
|
@@ -3295,6 +3299,12 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { | |
this.warmer.warm(reader); | ||
} | ||
}; | ||
final List<ReferenceManager.RefreshListener> internalRefreshListener; | ||
if (indexSettings.isSegrepEnabled()) { | ||
internalRefreshListener = Arrays.asList(new RefreshMetricUpdater(refreshMetric), checkpointRefreshListener); | ||
} else { | ||
internalRefreshListener = Collections.singletonList(new RefreshMetricUpdater(refreshMetric)); | ||
} | ||
return this.engineConfigFactory.newEngineConfig( | ||
shardId, | ||
threadPool, | ||
|
@@ -3311,7 +3321,7 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { | |
translogConfig, | ||
IndexingMemoryController.SHARD_INACTIVE_TIME_SETTING.get(indexSettings.getSettings()), | ||
Arrays.asList(refreshListeners, refreshPendingLocationListener), | ||
Arrays.asList(new RefreshMetricUpdater(refreshMetric), checkpointRefreshListener), | ||
internalRefreshListener, | ||
indexSort, | ||
circuitBreakerService, | ||
globalCheckpointSupplier, | ||
|
@@ -3910,7 +3920,7 @@ ReplicationTracker getReplicationTracker() { | |
public boolean scheduledRefresh() { | ||
// skip if not primary shard. | ||
// TODO: Segrep - should split into primary/replica classes. | ||
if (shardRouting.primary() == false) { | ||
if ((indexSettings.isSegrepEnabled()) && (shardRouting.primary() == false)) { | ||
return false; | ||
} | ||
verifyNotClosed(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit - this isn't required bc we always initialize this variable in the constructor.
isSegrepEnabled = settings.getAsBoolean(IndexMetadata.SETTING_SEGMENT_REPLICATION, false);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True but leaving it in to convert to boolean which is faster since we don't need a thread safe implementation.