-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Track Snapshot Version in RepositoryData #50930
Changes from 6 commits
e99eaea
6f45d07
386f0c3
4ca7e7c
86ef75f
eb22ebb
99872cb
02e8c4e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ | |
import org.apache.lucene.store.RateLimiter; | ||
import org.apache.lucene.util.SetOnce; | ||
import org.elasticsearch.ExceptionsHelper; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.action.ActionListener; | ||
import org.elasticsearch.action.ActionRunnable; | ||
import org.elasticsearch.action.StepListener; | ||
|
@@ -121,6 +122,7 @@ | |
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.concurrent.BlockingQueue; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.Executor; | ||
import java.util.concurrent.LinkedBlockingQueue; | ||
import java.util.concurrent.TimeUnit; | ||
|
@@ -873,7 +875,7 @@ public void finalizeSnapshot(final SnapshotId snapshotId, | |
final SnapshotInfo snapshotInfo = snapshotInfos.iterator().next(); | ||
getRepositoryData(ActionListener.wrap(existingRepositoryData -> { | ||
final RepositoryData updatedRepositoryData = | ||
existingRepositoryData.addSnapshot(snapshotId, snapshotInfo.state(), shardGenerations); | ||
existingRepositoryData.addSnapshot(snapshotId, snapshotInfo.state(), Version.CURRENT, shardGenerations); | ||
writeIndexGen(updatedRepositoryData, repositoryStateId, writeShardGens, ActionListener.wrap(v -> { | ||
if (writeShardGens) { | ||
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData); | ||
|
@@ -1252,8 +1254,41 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS | |
} | ||
}); | ||
|
||
final StepListener<RepositoryData> filterRepositoryDataStep = new StepListener<>(); | ||
|
||
// Step 2: Write new index-N blob to repository and update index.latest | ||
setPendingStep.whenComplete(newGen -> threadPool().executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> { | ||
// BwC logic: Load snapshot version information if any snapshot is missing a version in RepositoryData so that the new | ||
// RepositoryData contains a version for every snapshot | ||
final List<SnapshotId> snapshotIdsWithoutVersion = repositoryData.getSnapshotIds().stream().filter( | ||
snapshotId -> repositoryData.getVersion(snapshotId) == null).collect(Collectors.toList()); | ||
if (snapshotIdsWithoutVersion.isEmpty() == false) { | ||
final Map<SnapshotId, Version> updatedVersionMap = new ConcurrentHashMap<>(); | ||
final GroupedActionListener<Void> loadAllVersionsListener = new GroupedActionListener<>( | ||
ActionListener.runAfter( | ||
new ActionListener<>() { | ||
@Override | ||
public void onResponse(Collection<Void> voids) { | ||
logger.info("Successfully loaded all snapshot's version information for {} from snapshot metadata", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be nice to have the concrete ids of the snapshots to help debug situations where users might be writing to the repo from various ES versions (that's my main motivation to have this). I figured the list isn't going to be so long that it would create real issues due to the log line length so worst case it's a bit of an annoyance right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Why do the exact names and versions matter in that case? Isn't it sufficient to know that some snapshots got version-tagged that previously were not? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Versions I don't care about much and the exact names aren't important either, but it would be nice to see if the list of snapshots changed if this gets logged repeatedly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah nice thanks => reused in 02e8c4e (can probably move that method to a more appropriate place in a follow up) |
||
snapshotIdsWithoutVersion); | ||
} | ||
|
||
@Override | ||
public void onFailure(Exception e) { | ||
logger.warn("Failure when trying to load missing version information from snapshot metadata", e); | ||
} | ||
}, () -> filterRepositoryDataStep.onResponse(repositoryData.withVersions(updatedVersionMap))), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not great but just like in last week's discussion of |
||
snapshotIdsWithoutVersion.size()); | ||
for (SnapshotId snapshotId : snapshotIdsWithoutVersion) { | ||
threadPool().executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.run(loadAllVersionsListener, () -> | ||
updatedVersionMap.put(snapshotId, getSnapshotInfo(snapshotId).version()))); | ||
} | ||
} else { | ||
filterRepositoryDataStep.onResponse(repositoryData); | ||
} | ||
})), listener::onFailure); | ||
filterRepositoryDataStep.whenComplete(filteredRepositoryData -> { | ||
final long newGen = setPendingStep.result(); | ||
if (latestKnownRepoGen.get() >= newGen) { | ||
throw new IllegalArgumentException( | ||
"Tried writing generation [" + newGen + "] but repository is at least at generation [" + latestKnownRepoGen.get() | ||
|
@@ -1263,7 +1298,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS | |
final String indexBlob = INDEX_FILE_PREFIX + Long.toString(newGen); | ||
logger.debug("Repository [{}] writing new index generational blob [{}]", metadata.name(), indexBlob); | ||
writeAtomic(indexBlob, | ||
BytesReference.bytes(repositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), writeShardGens)), true); | ||
BytesReference.bytes(filteredRepositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), writeShardGens)), true); | ||
// write the current generation to the index-latest file | ||
final BytesReference genBytes; | ||
try (BytesStreamOutput bStream = new BytesStreamOutput()) { | ||
|
@@ -1297,13 +1332,13 @@ public ClusterState execute(ClusterState currentState) { | |
|
||
@Override | ||
public void onFailure(String source, Exception e) { | ||
l.onFailure( | ||
listener.onFailure( | ||
new RepositoryException(metadata.name(), "Failed to execute cluster state update [" + source + "]", e)); | ||
} | ||
|
||
@Override | ||
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { | ||
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.run(l, () -> { | ||
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.run(listener, () -> { | ||
// Delete all now outdated index files up to 1000 blobs back from the new generation. | ||
// If there are more than 1000 dangling index-N cleanup functionality on repo delete will take care of them. | ||
// Deleting one older than the current expectedGen is done for BwC reasons as older versions used to keep | ||
|
@@ -1320,7 +1355,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS | |
})); | ||
} | ||
}); | ||
})), listener::onFailure); | ||
}, listener::onFailure); | ||
} | ||
|
||
private RepositoryMetaData getRepoMetaData(ClusterState state) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is somewhat annoying that we have to load all
SnapshotInfo
here one-off but I think it's still the best solution available.Ideally, I was hoping to be able to just work from the assumption that if no version is set for a snapshot then it must be from before
8.0
/7.6
. But I gave up on that idea since it breaks as soon as some older version cluster (for whatever reason), takes a snapshot and removes all theversion
fields when writing out a newindex-N
(maybe that's the wrong trade off though, glad to hear opinions on this :) ... obviously you could argue that loading all theSnapshotInfo
is too high a price to pay just so the repo can move to the new metadata version earlier, but IMO even on S3 etc. loading 100 snapshots or so won't take all that long or cost any meaningful amount of $$$).