Skip to content

Commit

Permalink
Fail Snapshot on Corrupted Metadata Blob (elastic#47009) (elastic#47096)
Browse files Browse the repository at this point in the history
We should not be quietly ignoring a corrupted shard-level index-N
blob. Simply creating a new empty shard-level index-N and moving
on means that all snapshots of that shard show `SUCESS` as their
state at the repository root but are in fact broken.
This change at least makes it visible to the user that they can't
snapshot the given shard any more and forces the user to move on
to a new repository since the current one is broken and will not
allow snapshotting the inconsistent shard again.

Also, this change stops the delete action for shards with broken
index-N blobs instead of simply deleting all blobs in the path
containing the broken index-N. This prevents a temporarily
broken/missing index-N blob from corrupting all snapshots of
that shard.
  • Loading branch information
original-brownbear authored Sep 25, 2019
1 parent 58d2bf7 commit 93fcd23
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
throw new IndexShardSnapshotFailedException(shardId, "failed to list blobs", e);
}

Tuple<BlobStoreIndexShardSnapshots, Long> tuple = buildBlobStoreIndexShardSnapshots(blobs, shardContainer);
Tuple<BlobStoreIndexShardSnapshots, Long> tuple = buildBlobStoreIndexShardSnapshots(blobs.keySet(), shardContainer);
BlobStoreIndexShardSnapshots snapshots = tuple.v1();
long fileListGeneration = tuple.v2();

Expand Down Expand Up @@ -1233,7 +1233,8 @@ public String toString() {
/**
* Delete shard snapshot
*/
private void deleteShardSnapshot(RepositoryData repositoryData, IndexId indexId, ShardId snapshotShardId, SnapshotId snapshotId) {
private void deleteShardSnapshot(RepositoryData repositoryData, IndexId indexId, ShardId snapshotShardId, SnapshotId snapshotId)
throws IOException {
final BlobContainer shardContainer = shardContainer(indexId, snapshotShardId);
final Map<String, BlobMetaData> blobs;
try {
Expand All @@ -1242,7 +1243,7 @@ private void deleteShardSnapshot(RepositoryData repositoryData, IndexId indexId,
throw new IndexShardSnapshotException(snapshotShardId, "Failed to list content of shard directory", e);
}

Tuple<BlobStoreIndexShardSnapshots, Long> tuple = buildBlobStoreIndexShardSnapshots(blobs, shardContainer);
Tuple<BlobStoreIndexShardSnapshots, Long> tuple = buildBlobStoreIndexShardSnapshots(blobs.keySet(), shardContainer);
BlobStoreIndexShardSnapshots snapshots = tuple.v1();
long fileListGeneration = tuple.v2();

Expand Down Expand Up @@ -1313,21 +1314,16 @@ private BlobStoreIndexShardSnapshot loadShardSnapshot(BlobContainer shardContain
* @param blobs list of blobs in repository
* @return tuple of BlobStoreIndexShardSnapshots and the last snapshot index generation
*/
private Tuple<BlobStoreIndexShardSnapshots, Long> buildBlobStoreIndexShardSnapshots(Map<String, BlobMetaData> blobs,
BlobContainer shardContainer) {
Set<String> blobKeys = blobs.keySet();
long latest = latestGeneration(blobKeys);
private Tuple<BlobStoreIndexShardSnapshots, Long> buildBlobStoreIndexShardSnapshots(Set<String> blobs, BlobContainer shardContainer)
throws IOException {
long latest = latestGeneration(blobs);
if (latest >= 0) {
try {
final BlobStoreIndexShardSnapshots shardSnapshots =
indexShardSnapshotsFormat.read(shardContainer, Long.toString(latest));
return new Tuple<>(shardSnapshots, latest);
} catch (IOException e) {
final String file = SNAPSHOT_INDEX_PREFIX + latest;
logger.warn(() -> new ParameterizedMessage("failed to read index file [{}]", file), e);
}
} else if (blobKeys.isEmpty() == false) {
logger.warn("Could not find a readable index-N file in a non-empty shard snapshot directory [{}]", shardContainer.path());
final BlobStoreIndexShardSnapshots shardSnapshots = indexShardSnapshotsFormat.read(shardContainer, Long.toString(latest));
return new Tuple<>(shardSnapshots, latest);
} else if (blobs.stream().anyMatch(b -> b.startsWith(SNAPSHOT_PREFIX) || b.startsWith(INDEX_FILE_PREFIX)
|| b.startsWith(DATA_BLOB_PREFIX))) {
throw new IllegalStateException(
"Could not find a readable index-N file in a non-empty shard snapshot directory [" + shardContainer.path() + "]");
}
return new Tuple<>(BlobStoreIndexShardSnapshots.EMPTY, latest);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2926,23 +2926,10 @@ public void testSnapshotWithCorruptedShardIndexFile() throws Exception {
.setWaitForCompletion(true)
.get()
.getSnapshotInfo();
assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS));
assertThat(snapshotInfo2.failedShards(), equalTo(0));
assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo.totalShards()));
assertThat(snapshotInfo2.state(), equalTo(SnapshotState.PARTIAL));
assertThat(snapshotInfo2.failedShards(), equalTo(1));
assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo.totalShards() - 1));
assertThat(snapshotInfo2.indices(), hasSize(1));

logger.info("--> deleting index [{}]", indexName);
assertAcked(client().admin().indices().prepareDelete(indexName));

logger.info("--> restoring snapshot [{}]", snapshot2);
client().admin().cluster().prepareRestoreSnapshot("test-repo", snapshot2)
.setRestoreGlobalState(randomBoolean())
.setWaitForCompletion(true)
.get();

ensureGreen();

assertHitCount(client().prepareSearch(indexName).setSize(0).get(), 2 * nDocs);
}

public void testCannotCreateSnapshotsWithSameName() throws Exception {
Expand Down

0 comments on commit 93fcd23

Please sign in to comment.