-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ensures cleanup of temporary index-* generational blobs during snapshotting #21469
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -867,15 +867,16 @@ private long listBlobsToGetLatestIndexId() throws IOException { | |
} | ||
|
||
private void writeAtomic(final String blobName, final BytesReference bytesRef) throws IOException { | ||
final String tempBlobName = "pending-" + blobName; | ||
final String tempBlobName = "pending-" + blobName + "-" + UUIDs.randomBase64UUID(); | ||
try (InputStream stream = bytesRef.streamInput()) { | ||
snapshotsBlobContainer.writeBlob(tempBlobName, stream, bytesRef.length()); | ||
} | ||
try { | ||
snapshotsBlobContainer.move(tempBlobName, blobName); | ||
} catch (IOException ex) { | ||
// Move failed - try cleaning up | ||
snapshotsBlobContainer.deleteBlob(tempBlobName); | ||
try { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe readd comment here (temp file creation or move failed - clean up) |
||
snapshotsBlobContainer.deleteBlob(tempBlobName); | ||
} catch (IOException e) { | ||
ex.addSuppressed(e); | ||
} | ||
throw ex; | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2672,4 +2672,53 @@ public void testSnapshotCanceledOnRemovedShard() throws Exception { | |
assertEquals("IndexShardSnapshotFailedException[Aborted]", snapshotInfo.shardFailures().get(0).reason()); | ||
} | ||
|
||
public void testSnapshotSucceedsAfterSnapshotFailure() throws Exception { | ||
logger.info("--> creating repository"); | ||
final Path repoPath = randomRepoPath(); | ||
assertAcked(client().admin().cluster().preparePutRepository("test-repo").setType("mock").setVerify(false).setSettings( | ||
Settings.builder().put("location", repoPath).put("random_control_io_exception_rate", 0.2))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe add some randomization to the random_control_io_exception_rate? |
||
|
||
logger.info("--> indexing some data"); | ||
createIndex("test-idx"); | ||
ensureGreen(); | ||
final int numDocs = randomIntBetween(1, 5); | ||
for (int i = 0; i < numDocs; i++) { | ||
index("test-idx", "doc", Integer.toString(i), "foo", "bar" + i); | ||
} | ||
refresh(); | ||
assertThat(client().prepareSearch("test-idx").setSize(0).get().getHits().totalHits(), equalTo((long) numDocs)); | ||
|
||
logger.info("--> snapshot with potential I/O failures"); | ||
try { | ||
CreateSnapshotResponse createSnapshotResponse = | ||
client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap") | ||
.setWaitForCompletion(true) | ||
.setIndices("test-idx") | ||
.get(); | ||
if (createSnapshotResponse.getSnapshotInfo().totalShards() != createSnapshotResponse.getSnapshotInfo().successfulShards()) { | ||
assertThat(getFailureCount("test-repo"), greaterThan(0L)); | ||
assertThat(createSnapshotResponse.getSnapshotInfo().shardFailures().size(), greaterThan(0)); | ||
for (SnapshotShardFailure shardFailure : createSnapshotResponse.getSnapshotInfo().shardFailures()) { | ||
assertThat(shardFailure.reason(), containsString("Random IOException")); | ||
} | ||
} | ||
} catch (Exception ex) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we catch a more specific exception type? IOException with the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue here is that either There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can be specified as |
||
// sometimes, the snapshot will fail with a top level I/O exception, | ||
// we assert below that we can still create subsequent snapshots | ||
} | ||
|
||
logger.info("--> snapshot with no I/O failures"); | ||
assertAcked(client().admin().cluster().preparePutRepository("test-repo-2").setType("mock").setVerify(false).setSettings( | ||
Settings.builder().put("location", repoPath))); | ||
CreateSnapshotResponse createSnapshotResponse = | ||
client().admin().cluster().prepareCreateSnapshot("test-repo-2", "test-snap-2") | ||
.setWaitForCompletion(true) | ||
.setIndices("test-idx") | ||
.get(); | ||
assertEquals(0, createSnapshotResponse.getSnapshotInfo().failedShards()); | ||
GetSnapshotsResponse getSnapshotsResponse = client().admin().cluster().prepareGetSnapshots("test-repo-2") | ||
.addSnapshots("test-snap-2").get(); | ||
assertEquals(SnapshotState.SUCCESS, getSnapshotsResponse.getSnapshots().get(0).state()); | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -321,14 +321,21 @@ public Map<String, BlobMetaData> listBlobsByPrefix(String blobNamePrefix) throws | |
|
||
@Override | ||
public void move(String sourceBlob, String targetBlob) throws IOException { | ||
// simulate a non-atomic move, since many blob container implementations | ||
// will not have an atomic move, and we should be able to handle that | ||
maybeIOExceptionOrBlock(targetBlob); | ||
super.move(sourceBlob, targetBlob); | ||
super.writeBlob(targetBlob, readBlob(sourceBlob), 0L); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch |
||
maybeIOExceptionOrBlock(targetBlob); | ||
super.deleteBlob(sourceBlob); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, forgot to add here that we could randomize between atomic and non-atomic move. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I pushed adb7aad to randomize between atomic and non-atomic |
||
} | ||
|
||
@Override | ||
public void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException { | ||
maybeIOExceptionOrBlock(blobName); | ||
super.writeBlob(blobName, inputStream, blobSize); | ||
// for network based repositories, the blob may have been written but we may still | ||
// get an error with the client connection, so an IOException here simulates this | ||
maybeIOExceptionOrBlock(blobName); | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is can