-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Bulk Fetch SnapshotInfo API to Repository #73570
Merged
original-brownbear
merged 25 commits into
elastic:master
from
original-brownbear:more-efficient-snapshot-info-api
Jun 14, 2021
Merged
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
4fb132b
worksish
original-brownbear 0b2b29f
fix things + docs
original-brownbear 4d15949
fix concurrency
original-brownbear d6f3e3c
adjust
original-brownbear 0ef6f1b
snapshot status API fetch logic
original-brownbear c02244a
some additonal docs
original-brownbear 9b27b0b
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear 24de437
fix todo
original-brownbear 97a7082
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear 749f882
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear bc4515e
cleanup
original-brownbear 2b082fd
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear f4355fa
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear 854f443
tests fixed
original-brownbear 7c3c422
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear a09ab69
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear df02ed9
spotless
original-brownbear 2e7b50c
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear 666d5f2
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear b38e4dc
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear fe4e0ce
CR: comments
original-brownbear b874ef5
CR: comments
original-brownbear 54e9dea
Merge remote-tracking branch 'elastic/master' into more-efficient-sna…
original-brownbear d875e8c
docs and assertions
original-brownbear 94f44ed
fix ccr
original-brownbear File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.elasticsearch.action.ActionListener; | ||
import org.elasticsearch.action.ActionRunnable; | ||
import org.elasticsearch.action.StepListener; | ||
import org.elasticsearch.action.support.ActionFilters; | ||
import org.elasticsearch.action.support.master.TransportMasterNodeAction; | ||
import org.elasticsearch.client.node.NodeClient; | ||
|
@@ -27,10 +27,10 @@ | |
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.inject.Inject; | ||
import org.elasticsearch.common.util.CollectionUtils; | ||
import org.elasticsearch.common.util.concurrent.ListenableFuture; | ||
import org.elasticsearch.common.util.set.Sets; | ||
import org.elasticsearch.index.shard.ShardId; | ||
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; | ||
import org.elasticsearch.repositories.GetSnapshotInfoContext; | ||
import org.elasticsearch.repositories.IndexId; | ||
import org.elasticsearch.repositories.RepositoriesService; | ||
import org.elasticsearch.repositories.Repository; | ||
|
@@ -52,6 +52,7 @@ | |
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
|
@@ -62,6 +63,7 @@ | |
import java.util.stream.Collectors; | ||
|
||
import static java.util.Collections.unmodifiableMap; | ||
import static org.elasticsearch.cluster.SnapshotsInProgress.ShardState.SUCCESS; | ||
|
||
public class TransportSnapshotsStatusAction extends TransportMasterNodeAction<SnapshotsStatusRequest, SnapshotsStatusResponse> { | ||
|
||
|
@@ -90,7 +92,7 @@ public TransportSnapshotsStatusAction( | |
SnapshotsStatusRequest::new, | ||
indexNameExpressionResolver, | ||
SnapshotsStatusResponse::new, | ||
ThreadPool.Names.GENERIC | ||
ThreadPool.Names.SAME | ||
); | ||
this.repositoriesService = repositoriesService; | ||
this.client = client; | ||
|
@@ -142,13 +144,14 @@ protected void masterOperation( | |
new TransportNodesSnapshotsStatus.Request(nodesIds.toArray(Strings.EMPTY_ARRAY)).snapshots(snapshots) | ||
.timeout(request.masterNodeTimeout()), | ||
ActionListener.wrap( | ||
nodeSnapshotStatuses -> threadPool.generic() | ||
.execute( | ||
ActionRunnable.wrap( | ||
listener, | ||
l -> buildResponse(snapshotsInProgress, request, currentSnapshots, nodeSnapshotStatuses, cancellableTask, l) | ||
) | ||
), | ||
nodeSnapshotStatuses -> buildResponse( | ||
snapshotsInProgress, | ||
request, | ||
currentSnapshots, | ||
nodeSnapshotStatuses, | ||
cancellableTask, | ||
listener | ||
), | ||
listener::onFailure | ||
) | ||
); | ||
|
@@ -192,8 +195,7 @@ private void buildResponse( | |
SnapshotIndexShardStatus shardStatus = shardStatues.get(shardEntry.key); | ||
if (shardStatus != null) { | ||
// We have full information about this shard | ||
if (shardStatus.getStage() == SnapshotIndexShardStage.DONE | ||
&& shardEntry.value.state() != SnapshotsInProgress.ShardState.SUCCESS) { | ||
if (shardStatus.getStage() == SnapshotIndexShardStage.DONE && shardEntry.value.state() != SUCCESS) { | ||
// Unlikely edge case: | ||
// Data node has finished snapshotting the shard but the cluster state has not yet been updated | ||
// to reflect this. We adjust the status to show up as snapshot metadata being written because | ||
|
@@ -286,9 +288,10 @@ private void loadRepositoryData( | |
ActionListener<SnapshotsStatusResponse> listener | ||
) { | ||
final Set<String> requestedSnapshotNames = Sets.newHashSet(request.snapshots()); | ||
final ListenableFuture<RepositoryData> repositoryDataListener = new ListenableFuture<>(); | ||
final StepListener<RepositoryData> repositoryDataListener = new StepListener<>(); | ||
repositoriesService.getRepositoryData(repositoryName, repositoryDataListener); | ||
repositoryDataListener.addListener(ActionListener.wrap(repositoryData -> { | ||
final Collection<SnapshotId> snapshotIdsToLoad = new ArrayList<>(); | ||
repositoryDataListener.whenComplete(repositoryData -> { | ||
ensureNotCancelled(task); | ||
final Map<String, SnapshotId> matchedSnapshotIds = repositoryData.getSnapshotIds() | ||
.stream() | ||
|
@@ -314,73 +317,62 @@ private void loadRepositoryData( | |
throw new SnapshotMissingException(repositoryName, snapshotName); | ||
} | ||
} | ||
SnapshotInfo snapshotInfo = snapshot(snapshotsInProgress, repositoryName, snapshotId); | ||
List<SnapshotIndexShardStatus> shardStatusBuilder = new ArrayList<>(); | ||
if (snapshotInfo.state().completed()) { | ||
Map<ShardId, IndexShardSnapshotStatus> shardStatuses = snapshotShards( | ||
repositoryName, | ||
repositoryData, | ||
task, | ||
snapshotInfo | ||
); | ||
for (Map.Entry<ShardId, IndexShardSnapshotStatus> shardStatus : shardStatuses.entrySet()) { | ||
IndexShardSnapshotStatus.Copy lastSnapshotStatus = shardStatus.getValue().asCopy(); | ||
shardStatusBuilder.add(new SnapshotIndexShardStatus(shardStatus.getKey(), lastSnapshotStatus)); | ||
} | ||
final SnapshotsInProgress.State state; | ||
switch (snapshotInfo.state()) { | ||
case FAILED: | ||
state = SnapshotsInProgress.State.FAILED; | ||
break; | ||
case SUCCESS: | ||
case PARTIAL: | ||
// Translating both PARTIAL and SUCCESS to SUCCESS for now | ||
// TODO: add the differentiation on the metadata level in the next major release | ||
state = SnapshotsInProgress.State.SUCCESS; | ||
break; | ||
default: | ||
throw new IllegalArgumentException("Unknown snapshot state " + snapshotInfo.state()); | ||
} | ||
final long startTime = snapshotInfo.startTime(); | ||
final long endTime = snapshotInfo.endTime(); | ||
assert endTime >= startTime || (endTime == 0L && snapshotInfo.state().completed() == false) | ||
: "Inconsistent timestamps found in SnapshotInfo [" + snapshotInfo + "]"; | ||
builder.add( | ||
new SnapshotStatus( | ||
new Snapshot(repositoryName, snapshotId), | ||
state, | ||
Collections.unmodifiableList(shardStatusBuilder), | ||
snapshotInfo.includeGlobalState(), | ||
startTime, | ||
// Use current time to calculate overall runtime for in-progress snapshots that have endTime == 0 | ||
(endTime == 0 ? threadPool.absoluteTimeInMillis() : endTime) - startTime | ||
) | ||
); | ||
if (snapshotsInProgress.snapshot(new Snapshot(repositoryName, snapshotId)) == null) { | ||
snapshotIdsToLoad.add(snapshotId); | ||
} | ||
} | ||
listener.onResponse(new SnapshotsStatusResponse(Collections.unmodifiableList(builder))); | ||
}, listener::onFailure), threadPool.generic(), null); | ||
} | ||
|
||
/** | ||
* Retrieves snapshot from repository | ||
* | ||
* @param snapshotsInProgress snapshots in progress in the cluster state | ||
* @param repositoryName repository name | ||
* @param snapshotId snapshot id | ||
* @return snapshot | ||
* @throws SnapshotMissingException if snapshot is not found | ||
*/ | ||
private SnapshotInfo snapshot(SnapshotsInProgress snapshotsInProgress, String repositoryName, SnapshotId snapshotId) { | ||
List<SnapshotsInProgress.Entry> entries = SnapshotsService.currentSnapshots( | ||
snapshotsInProgress, | ||
repositoryName, | ||
Collections.singletonList(snapshotId.getName()) | ||
); | ||
if (entries.isEmpty() == false) { | ||
return new SnapshotInfo(entries.iterator().next()); | ||
} | ||
return repositoriesService.repository(repositoryName).getSnapshotInfo(snapshotId); | ||
if (snapshotIdsToLoad.isEmpty()) { | ||
listener.onResponse(new SnapshotsStatusResponse(Collections.unmodifiableList(builder))); | ||
} else { | ||
final List<SnapshotStatus> threadSafeBuilder = Collections.synchronizedList(builder); | ||
repositoriesService.repository(repositoryName) | ||
.getSnapshotInfo(new GetSnapshotInfoContext(snapshotIdsToLoad, true, task::isCancelled, (context, snapshotInfo) -> { | ||
List<SnapshotIndexShardStatus> shardStatusBuilder = new ArrayList<>(); | ||
final Map<ShardId, IndexShardSnapshotStatus> shardStatuses; | ||
try { | ||
shardStatuses = snapshotShards(repositoryName, repositoryData, task, snapshotInfo); | ||
} catch (Exception e) { | ||
// stops all further fetches of snapshotInfo since context is fail-fast | ||
context.onFailure(e); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we could turn the |
||
return; | ||
} | ||
for (Map.Entry<ShardId, IndexShardSnapshotStatus> shardStatus : shardStatuses.entrySet()) { | ||
IndexShardSnapshotStatus.Copy lastSnapshotStatus = shardStatus.getValue().asCopy(); | ||
shardStatusBuilder.add(new SnapshotIndexShardStatus(shardStatus.getKey(), lastSnapshotStatus)); | ||
} | ||
final SnapshotsInProgress.State state; | ||
switch (snapshotInfo.state()) { | ||
case FAILED: | ||
state = SnapshotsInProgress.State.FAILED; | ||
break; | ||
case SUCCESS: | ||
case PARTIAL: | ||
// Translating both PARTIAL and SUCCESS to SUCCESS for now | ||
// TODO: add the differentiation on the metadata level in the next major release | ||
state = SnapshotsInProgress.State.SUCCESS; | ||
break; | ||
default: | ||
throw new IllegalArgumentException("Unknown snapshot state " + snapshotInfo.state()); | ||
} | ||
final long startTime = snapshotInfo.startTime(); | ||
final long endTime = snapshotInfo.endTime(); | ||
assert endTime >= startTime || (endTime == 0L && snapshotInfo.state().completed() == false) | ||
: "Inconsistent timestamps found in SnapshotInfo [" + snapshotInfo + "]"; | ||
threadSafeBuilder.add( | ||
new SnapshotStatus( | ||
new Snapshot(repositoryName, snapshotInfo.snapshotId()), | ||
state, | ||
Collections.unmodifiableList(shardStatusBuilder), | ||
snapshotInfo.includeGlobalState(), | ||
startTime, | ||
// Use current time to calculate overall runtime for in-progress snapshots that have endTime == 0 | ||
(endTime == 0 ? threadPool.absoluteTimeInMillis() : endTime) - startTime | ||
) | ||
); | ||
}, listener.map(v -> new SnapshotsStatusResponse(List.copyOf(threadSafeBuilder))))); | ||
} | ||
}, listener::onFailure); | ||
} | ||
|
||
/** | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure to understand this: we found a snapshot with a matching name in the repository data but we did not found it before in the in progress snapshots, how could it be in
snapshotsInProgress
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well spotted :) I think this is an impossible race to run into these days. I'll leave it as is here for now and will open a PR to clean this up from
master
if possible separately today :)