From bc46fc0181af5a9ff6f9ed1066995930e0236d04 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 19 Feb 2021 14:43:49 +0000 Subject: [PATCH] [ML] Fix logic for moving .ml-state-write alias from legacy to new (#69039) When multiple jobs start up together on a node following an upgrade, each one of them will trigger a check that the .ml-state* indices are as expected and the .ml-state-write alias points to the correct index. There were a couple of flaws in the logic: 1. We were not considering the possibility that one or more existing .ml-state* indices might be hidden. 2. If multiple jobs tried to create a .ml-state-000001 index simultaneously all but the first would fail. We accounted for this, but then did not follow up with the correct alias update request for those index creation requests that failed. This could cause all but one of the jobs starting up on the node to spuriously fail. Both these problems are fixed by this PR. Fixes #68925 --- .../xpack/core/ml/utils/MlIndexAndAlias.java | 16 +++++++++++----- .../ml/integration/JobStorageDeletionTaskIT.java | 4 ++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java index 5c98904095740..82d5780ae9066 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MlIndexAndAlias.java @@ -118,7 +118,7 @@ public static void createIndexAndAliasIfNecessary(Client client, // The initial index name must be suitable for rollover functionality. String firstConcreteIndex = indexPatternPrefix + "-000001"; String[] concreteIndexNames = - resolver.concreteIndexNames(clusterState, IndicesOptions.lenientExpandOpen(), indexPattern); + resolver.concreteIndexNames(clusterState, IndicesOptions.lenientExpandHidden(), indexPattern); Optional indexPointedByCurrentWriteAlias = clusterState.getMetadata().hasAlias(alias) ? clusterState.getMetadata().getIndicesLookup().get(alias).getIndices().stream().findFirst() : Optional.empty(); @@ -241,11 +241,17 @@ private static void createFirstConcreteIndex(Client client, ActionListener.wrap( createIndexResponse -> listener.onResponse(true), createIndexFailure -> { - // If it was created between our last check, and this request being handled, we should add the alias - // Adding an alias that already exists is idempotent. So, no need to double check if the alias exists - // as well. if (ExceptionsHelper.unwrapCause(createIndexFailure) instanceof ResourceAlreadyExistsException) { - updateWriteAlias(client, alias, null, index, listener); + // If it was created between our last check and this request being handled, we should add the alias + // if we were asked to add it on creation. Adding an alias that already exists is idempotent. So + // no need to double check if the alias exists as well. But if we weren't asked to add the alias + // on creation then we should leave it up to the caller to decide what to do next (some call sites + // already have more advanced alias update logic in their success handlers). + if (addAlias) { + updateWriteAlias(client, alias, null, index, listener); + } else { + listener.onResponse(true); + } } else { listener.onFailure(createIndexFailure); } diff --git a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/JobStorageDeletionTaskIT.java b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/JobStorageDeletionTaskIT.java index 061023f04e457..3b3ea7696b9a7 100644 --- a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/JobStorageDeletionTaskIT.java +++ b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/JobStorageDeletionTaskIT.java @@ -54,7 +54,7 @@ */ public class JobStorageDeletionTaskIT extends BaseMlIntegTestCase { - private static long bucketSpan = AnalysisConfig.Builder.DEFAULT_BUCKET_SPAN.getMillis(); + private static final long bucketSpan = AnalysisConfig.Builder.DEFAULT_BUCKET_SPAN.getMillis(); private static final String UNRELATED_INDEX = "unrelated-data"; private JobResultsProvider jobResultsProvider; @@ -179,7 +179,7 @@ public void testDeleteDedicatedJobWithDataInShared() throws Exception { .indices() .prepareGetIndex() .setIndices(dedicatedIndex) - .setIndicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN) + .setIndicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_CLOSED_HIDDEN) .get() .indices().length, equalTo(0));