From fb88d31feed7ca630fa8f35885ebaef8123518d6 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 19 May 2021 13:59:28 +0100 Subject: [PATCH 1/5] Run CheckIndex on metadata index before loading The metadata index is small and important and only read at startup. Today we rely on Lucene to spot if any of its components is corrupt, but Lucene does not necesssarily verify all checksums in order to catch a corruption. With this commit we run `CheckIndex` on the metadata index first, and fail on startup if a corruption is detected. Closes #29358 --- .../gateway/PersistedClusterStateService.java | 44 ++++++++++++++++--- .../GatewayMetaStatePersistedStateTests.java | 6 +-- .../PersistedClusterStateServiceTests.java | 36 ++++++++++++++- 3 files changed, 75 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java index 4b8a04a099e0b..908a3d41301ae 100644 --- a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java +++ b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java @@ -16,6 +16,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; +import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexNotFoundException; import org.apache.lucene.index.IndexWriter; @@ -46,6 +47,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.RecyclingBytesStreamOutput; import org.elasticsearch.common.io.Streams; +import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.logging.Loggers; @@ -70,6 +72,8 @@ import java.io.Closeable; import java.io.IOError; import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -291,17 +295,45 @@ public static void overrideVersion(Version newVersion, Path... dataPaths) throws * Loads the available on-disk cluster state. Returns {@link OnDiskState#NO_ON_DISK_STATE} if no such state was found. */ public OnDiskState loadOnDiskState() throws IOException { + return loadOnDiskState(true); + } + + /** + * Loads the available on-disk cluster state. Returns {@link OnDiskState#NO_ON_DISK_STATE} if no such state was found. + * @param checkClean whether to check the index for corruption before loading, only for tests + */ + OnDiskState loadOnDiskState(boolean checkClean) throws IOException { OnDiskState onDiskState = OnDiskState.NO_ON_DISK_STATE; final Path indexPath = dataPath.resolve(METADATA_DIRECTORY_NAME); if (Files.exists(indexPath)) { - try (Directory directory = createDirectory(indexPath); - DirectoryReader directoryReader = DirectoryReader.open(directory)) { - onDiskState = loadOnDiskState(dataPath, directoryReader); + try (Directory directory = createDirectory(indexPath)) { + if (checkClean) { + try (BytesStreamOutput outputStream = new BytesStreamOutput()) { + final boolean isClean; + try (PrintStream printStream = new PrintStream(outputStream, true, StandardCharsets.UTF_8); + CheckIndex checkIndex = new CheckIndex(directory)) { + checkIndex.setInfoStream(printStream); + checkIndex.setChecksumsOnly(true); + isClean = checkIndex.checkIndex().clean; + } - if (nodeId.equals(onDiskState.nodeId) == false) { - throw new IllegalStateException("unexpected node ID in metadata, found [" + onDiskState.nodeId + - "] in [" + dataPath + "] but expected [" + nodeId + "]"); + if (isClean == false) { + if (logger.isErrorEnabled()) { + outputStream.bytes().utf8ToString().lines().forEach(l -> logger.error("checkIndex: {}", l)); + } + throw new IllegalStateException("metadata index at [" + indexPath + "] is unreadable"); + } + } + } + + try (DirectoryReader directoryReader = DirectoryReader.open(directory)) { + onDiskState = loadOnDiskState(dataPath, directoryReader); + + if (nodeId.equals(onDiskState.nodeId) == false) { + throw new IllegalStateException("unexpected node ID in metadata, found [" + onDiskState.nodeId + + "] in [" + dataPath + "] but expected [" + nodeId + "]"); + } } } catch (IndexNotFoundException e) { logger.debug(new ParameterizedMessage("no on-disk state at {}", indexPath), e); diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java index 1a3b3afb12622..cc46756ac8b87 100644 --- a/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -366,7 +366,7 @@ public void testDataOnlyNodePersistence() throws Exception { assertThat(persistedState.getLastAcceptedState().getLastAcceptedConfiguration(), not(equalTo(persistedState.getLastAcceptedState().getLastCommittedConfiguration()))); CoordinationMetadata persistedCoordinationMetadata = - persistedClusterStateService.loadOnDiskState().metadata.coordinationMetadata(); + persistedClusterStateService.loadOnDiskState(false).metadata.coordinationMetadata(); assertThat(persistedCoordinationMetadata.getLastAcceptedConfiguration(), equalTo(GatewayMetaState.AsyncLucenePersistedState.staleStateConfiguration)); assertThat(persistedCoordinationMetadata.getLastCommittedConfiguration(), @@ -382,12 +382,12 @@ public void testDataOnlyNodePersistence() throws Exception { .clusterUUID(state.metadata().clusterUUID()).clusterUUIDCommitted(true).build()).build(); assertClusterStateEqual(expectedClusterState, persistedState.getLastAcceptedState()); - persistedCoordinationMetadata = persistedClusterStateService.loadOnDiskState().metadata.coordinationMetadata(); + persistedCoordinationMetadata = persistedClusterStateService.loadOnDiskState(false).metadata.coordinationMetadata(); assertThat(persistedCoordinationMetadata.getLastAcceptedConfiguration(), equalTo(GatewayMetaState.AsyncLucenePersistedState.staleStateConfiguration)); assertThat(persistedCoordinationMetadata.getLastCommittedConfiguration(), equalTo(GatewayMetaState.AsyncLucenePersistedState.staleStateConfiguration)); - assertTrue(persistedClusterStateService.loadOnDiskState().metadata.clusterUUIDCommitted()); + assertTrue(persistedClusterStateService.loadOnDiskState(false).metadata.clusterUUIDCommitted()); // generate a series of updates and check if batching works final String indexName = randomAlphaOfLength(10); diff --git a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java index 80c0ea900e999..94d9b83a6d9e0 100644 --- a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; +import org.apache.lucene.mockfile.ExtrasFS; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; @@ -38,24 +39,32 @@ import org.elasticsearch.gateway.PersistedClusterStateService.Writer; import org.elasticsearch.index.Index; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.test.CorruptionUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.MockLogAppender; import org.elasticsearch.test.junit.annotations.TestLogging; import java.io.IOError; import java.io.IOException; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; +import static org.apache.lucene.index.IndexWriter.WRITE_LOCK_NAME; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.lessThan; import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.startsWith; public class PersistedClusterStateServiceTests extends ESTestCase { @@ -73,7 +82,7 @@ public void testPersistsAndReloadsTerm() throws IOException { assertThat(persistedClusterStateService.loadOnDiskState().currentTerm, equalTo(0L)); try (Writer writer = persistedClusterStateService.createWriter()) { writer.writeFullStateAndCommit(newTerm, ClusterState.EMPTY_STATE); - assertThat(persistedClusterStateService.loadOnDiskState().currentTerm, equalTo(newTerm)); + assertThat(persistedClusterStateService.loadOnDiskState(false).currentTerm, equalTo(newTerm)); } assertThat(persistedClusterStateService.loadOnDiskState().currentTerm, equalTo(newTerm)); @@ -638,6 +647,29 @@ public void testSlowLogging() throws IOException, IllegalAccessException { } } + public void testFailsIfCorrupt() throws IOException { + try (NodeEnvironment nodeEnvironment = newNodeEnvironment(createTempDir())) { + final PersistedClusterStateService persistedClusterStateService = newPersistedClusterStateService(nodeEnvironment); + + try (Writer writer = persistedClusterStateService.createWriter()) { + writer.writeFullStateAndCommit(1, ClusterState.EMPTY_STATE); + } + + try (DirectoryStream directoryStream = Files.newDirectoryStream(nodeEnvironment.nodeDataPath().resolve("_state"))) { + CorruptionUtils.corruptFile(random(), randomFrom(StreamSupport + .stream(directoryStream.spliterator(), false) + .filter(p -> { + final String filename = p.getFileName().toString(); + return ExtrasFS.isExtra(filename) == false && filename.equals(WRITE_LOCK_NAME) == false; + }) + .collect(Collectors.toList()))); + } + + assertThat(expectThrows(IllegalStateException.class, persistedClusterStateService::loadOnDiskState).getMessage(), + allOf(startsWith("metadata index at ["), endsWith("] is unreadable"))); + } + } + private void assertExpectedLogs(long currentTerm, ClusterState previousState, ClusterState clusterState, PersistedClusterStateService.Writer writer, MockLogAppender.LoggingExpectation expectation) throws IllegalAccessException, IOException { @@ -675,7 +707,7 @@ private NodeEnvironment newNodeEnvironment(Path dataPath) throws IOException { } private static ClusterState loadPersistedClusterState(PersistedClusterStateService persistedClusterStateService) throws IOException { - final PersistedClusterStateService.OnDiskState onDiskState = persistedClusterStateService.loadOnDiskState(); + final PersistedClusterStateService.OnDiskState onDiskState = persistedClusterStateService.loadOnDiskState(false); return clusterStateFromMetadata(onDiskState.lastAcceptedVersion, onDiskState.metadata); } From 268fbd529c655dd3c73a4e87a59862b86d3ff3e6 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 28 May 2021 07:08:50 +0100 Subject: [PATCH 2/5] Mention that the cause is an external force --- .../elasticsearch/gateway/PersistedClusterStateService.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java index 908a3d41301ae..9607dd286a6cc 100644 --- a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java +++ b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java @@ -322,7 +322,8 @@ OnDiskState loadOnDiskState(boolean checkClean) throws IOException { if (logger.isErrorEnabled()) { outputStream.bytes().utf8ToString().lines().forEach(l -> logger.error("checkIndex: {}", l)); } - throw new IllegalStateException("metadata index at [" + indexPath + "] is unreadable"); + throw new IllegalStateException("metadata index at [" + dataPath + + "] has been changed by an external force after it was last written by Elasticsearch and is now unreadable"); } } } From e619f051e58cdbce7b9ee2286a24d4407bd52a4e Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 28 May 2021 07:49:51 +0100 Subject: [PATCH 3/5] fix test --- .../gateway/PersistedClusterStateServiceTests.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java index 94d9b83a6d9e0..5a49684fd601c 100644 --- a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java @@ -665,8 +665,9 @@ public void testFailsIfCorrupt() throws IOException { .collect(Collectors.toList()))); } - assertThat(expectThrows(IllegalStateException.class, persistedClusterStateService::loadOnDiskState).getMessage(), - allOf(startsWith("metadata index at ["), endsWith("] is unreadable"))); + assertThat(expectThrows(IllegalStateException.class, persistedClusterStateService::loadOnDiskState).getMessage(), allOf( + startsWith("metadata index at ["), + endsWith("] has been changed by an external force after it was last written by Elasticsearch and is now unreadable"))); } } From 95378d23f4ea9c56c29d4fe3eecd45010871d336 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 7 Jun 2021 12:47:36 +0100 Subject: [PATCH 4/5] Expand 'metadata index' --- .../org/elasticsearch/gateway/PersistedClusterStateService.java | 2 +- .../gateway/PersistedClusterStateServiceTests.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java index 3b448668d69b5..dc13f746f089b 100644 --- a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java +++ b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java @@ -321,7 +321,7 @@ OnDiskState loadOnDiskState(boolean checkClean) throws IOException { if (logger.isErrorEnabled()) { outputStream.bytes().utf8ToString().lines().forEach(l -> logger.error("checkIndex: {}", l)); } - throw new IllegalStateException("metadata index at [" + dataPath + + throw new IllegalStateException("the index containing the cluster metadata under the data path [" + dataPath + "] has been changed by an external force after it was last written by Elasticsearch and is now unreadable"); } } diff --git a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java index 5a49684fd601c..f90ad2faf7ae8 100644 --- a/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/PersistedClusterStateServiceTests.java @@ -666,7 +666,7 @@ public void testFailsIfCorrupt() throws IOException { } assertThat(expectThrows(IllegalStateException.class, persistedClusterStateService::loadOnDiskState).getMessage(), allOf( - startsWith("metadata index at ["), + startsWith("the index containing the cluster metadata under the data path ["), endsWith("] has been changed by an external force after it was last written by Elasticsearch and is now unreadable"))); } } From 9e89fed6f2b2628d1ac09af6f631639ee05364ef Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 7 Jun 2021 12:50:20 +0100 Subject: [PATCH 5/5] Reword node ID mismatch too --- .../elasticsearch/gateway/PersistedClusterStateService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java index dc13f746f089b..b5a3c3201746a 100644 --- a/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java +++ b/server/src/main/java/org/elasticsearch/gateway/PersistedClusterStateService.java @@ -331,8 +331,8 @@ OnDiskState loadOnDiskState(boolean checkClean) throws IOException { onDiskState = loadOnDiskState(dataPath, directoryReader); if (nodeId.equals(onDiskState.nodeId) == false) { - throw new IllegalStateException("unexpected node ID in metadata, found [" + onDiskState.nodeId + - "] in [" + dataPath + "] but expected [" + nodeId + "]"); + throw new IllegalStateException("the index containing the cluster metadata under the data path [" + dataPath + + "] belongs to a node with ID [" + onDiskState.nodeId + "] but this node's ID is [" + nodeId + "]"); } } } catch (IndexNotFoundException e) {