From dc726e60a9a247cecee5566497cdbd6f3c274b60 Mon Sep 17 00:00:00 2001 From: Andrei Dan Date: Mon, 20 Mar 2023 11:50:55 -0400 Subject: [PATCH] Allow ILM to transition to implicit cached steps (#91779) ILM tries to honour the cached phase however, some steps are implicit (e.g. injected actions or the terminal policy/phase step) Currently, ILM would throw an exception if the currently cached phase was removed: ``` step [{"phase":"warm","action":"complete","name":"complete"}] for index [index] with policy [my-policy] does not exist ``` ILM would currently also throw an exception if the next step was an implicit one and the phase was removed from the underlying policy e.g. if the index is on the `migrate` step and looking to transition to the `check-migration` step, whilt the `warm` phase doesn't exist in the policy anymore ``` step [{"phase":"warm","action":"migrate","name":"check-migration"}] for index [index] with policy [my-policy] does not exist ``` This fixes these scenarios by enhancing the `PolicyStepsRegistry#parseStepKeysFromPhase` method to compute all the steps in the phase (including all implicit steps) --- docs/changelog/91779.yaml | 6 ++ .../xpack/core/ilm/PhaseCacheManagement.java | 2 +- .../xpack/ilm/IndexLifecycleTransition.java | 5 +- .../xpack/ilm/PolicyStepsRegistry.java | 52 ++++++++++++++-- .../ilm/IndexLifecycleTransitionTests.java | 60 +++++++++++++++++++ .../ilm/MoveToNextStepUpdateTaskTests.java | 56 +++++++++++++++-- 6 files changed, 168 insertions(+), 13 deletions(-) create mode 100644 docs/changelog/91779.yaml diff --git a/docs/changelog/91779.yaml b/docs/changelog/91779.yaml new file mode 100644 index 0000000000000..504951b5c5075 --- /dev/null +++ b/docs/changelog/91779.yaml @@ -0,0 +1,6 @@ +pr: 91779 +summary: Allow ILM to transition to implicit cached steps +area: ILM+SLM +type: bug +issues: + - 91749 diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/PhaseCacheManagement.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/PhaseCacheManagement.java index a18e84f92cb0b..9be37e9193b64 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/PhaseCacheManagement.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ilm/PhaseCacheManagement.java @@ -263,7 +263,7 @@ public static boolean isIndexPhaseDefinitionUpdatable( * information, returns null. */ @Nullable - public static Set readStepKeys( + static Set readStepKeys( final NamedXContentRegistry xContentRegistry, final Client client, final String phaseDef, diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java index de277282b4af1..a87f2d4d2151e 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransition.java @@ -85,8 +85,9 @@ public static void validateTransition( } final Set cachedStepKeys = stepRegistry.parseStepKeysFromPhase( - lifecycleState.phaseDefinition(), - lifecycleState.phase() + policyName, + lifecycleState.phase(), + lifecycleState.phaseDefinition() ); boolean isNewStepCached = cachedStepKeys != null && cachedStepKeys.contains(newStepKey); diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java index 1ef108c74d195..a8a6f211c232f 100644 --- a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java +++ b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/ilm/PolicyStepsRegistry.java @@ -33,7 +33,6 @@ import org.elasticsearch.xpack.core.ilm.LifecyclePolicy; import org.elasticsearch.xpack.core.ilm.LifecyclePolicyMetadata; import org.elasticsearch.xpack.core.ilm.Phase; -import org.elasticsearch.xpack.core.ilm.PhaseCacheManagement; import org.elasticsearch.xpack.core.ilm.PhaseExecutionInfo; import org.elasticsearch.xpack.core.ilm.Step; import org.elasticsearch.xpack.core.ilm.TerminalPolicyStep; @@ -42,12 +41,14 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; -import java.util.Optional; +import java.util.Objects; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; public class PolicyStepsRegistry { private static final Logger logger = LogManager.getLogger(PolicyStepsRegistry.class); @@ -245,13 +246,50 @@ public Step.StepKey getFirstStepForPhaseAndAction(ClusterState state, Index inde /* * Parses the step keys from the {@code phaseDef} for the given phase. + * ILM makes use of some implicit steps that belong to actions that we automatically inject + * (eg. unfollow and migrate) or special purpose steps like the phase `complete` step. + * + * The {@code phaseDef} is *mostly* a valid json we store in the lifecycle execution state. However, + * we have a few of exceptional cases: + * - null is treated as the `new` phase (see {@code InitializePolicyContextStep}) + * - the `new` phase is not stored as json but ... "new" + * - there's a legacy step, the {@code TerminalPolicyStep} which is also not stored as json but as "completed" + * (note: this step exists only for BWC reasons as these days we move to the {@code PhaseCompleteStep} when reaching + * the end of the phase) + * + * This method returns **all** the steps that are part of the phase definition including the implicit steps. + * * Returns null if there's a parsing error. */ @Nullable - public Set parseStepKeysFromPhase(String phaseDef, String currentPhase) { - return PhaseCacheManagement.readStepKeys(xContentRegistry, client, phaseDef, currentPhase, licenseState); + public Set parseStepKeysFromPhase(String policy, String currentPhase, String phaseDef) { + try { + String phaseDefNonNull = Objects.requireNonNullElse(phaseDef, InitializePolicyContextStep.INITIALIZATION_PHASE); + return parseStepsFromPhase(policy, currentPhase, phaseDefNonNull).stream().map(Step::getKey).collect(Collectors.toSet()); + } catch (IOException e) { + logger.trace( + () -> String.format( + Locale.ROOT, + "unable to parse steps for policy [{}], phase [{}], and phase definition [{}]", + policy, + currentPhase, + phaseDef + ), + e + ); + return null; + } } + /** + * The {@code phaseDef} is *mostly* a valid json we store in the lifecycle execution state. However, + * we have a few of exceptional cases: + * - null is treated as the `new` phase (see {@code InitializePolicyContextStep}) + * - the `new` phase is not stored as json but ... "new" + * - there's a legacy step, the {@code TerminalPolicyStep} which is also not stored as json but as "completed" + * (note: this step exists only for BWC reasons as these days we move to the {@code PhaseCompleteStep} when reaching + * the end of the phase) + */ private List parseStepsFromPhase(String policy, String currentPhase, String phaseDef) throws IOException { final PhaseExecutionInfo phaseExecutionInfo; LifecyclePolicyMetadata policyMetadata = lifecyclePolicyMap.get(policy); @@ -341,8 +379,10 @@ public Step getStep(final IndexMetadata indexMetadata, final Step.StepKey stepKe } // parse phase steps from the phase definition in the index settings - final String phaseJson = Optional.ofNullable(indexMetadata.getLifecycleExecutionState().phaseDefinition()) - .orElse(InitializePolicyContextStep.INITIALIZATION_PHASE); + final String phaseJson = Objects.requireNonNullElse( + indexMetadata.getLifecycleExecutionState().phaseDefinition(), + InitializePolicyContextStep.INITIALIZATION_PHASE + ); final List phaseSteps; try { diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java index db5a7c5369347..57c5b9fbb2605 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/IndexLifecycleTransitionTests.java @@ -684,6 +684,66 @@ public void testValidateTransitionToCachedStepWhenMissingPhaseFromPolicy() { } } + public void testValidateTransitionToInjectedMissingStep() { + // we'll test the case when the warm phase was deleted and the next step is an injected one + + LifecycleExecutionState.Builder executionState = LifecycleExecutionState.builder() + .setPhase("warm") + .setAction("migrate") + .setStep("migrate") + .setPhaseDefinition(""" + { + "policy" : "my-policy", + "phase_definition" : { + "min_age" : "20m", + "actions" : { + "set_priority" : { + "priority" : 150 + } + } + }, + "version" : 1, + "modified_date_in_millis" : 1578521007076 + }"""); + + IndexMetadata meta = buildIndexMetadata("my-policy", executionState); + + try (Client client = new NoOpClient(getTestName())) { + Step.StepKey currentStepKey = new Step.StepKey("warm", MigrateAction.NAME, MigrateAction.NAME); + Step.StepKey nextStepKey = new Step.StepKey("warm", MigrateAction.NAME, DataTierMigrationRoutedStep.NAME); + + Step.StepKey waitForRolloverStepKey = new Step.StepKey("hot", RolloverAction.NAME, WaitForRolloverReadyStep.NAME); + Step.StepKey rolloverStepKey = new Step.StepKey("hot", RolloverAction.NAME, RolloverStep.NAME); + Step waitForRolloverReadyStep = new WaitForRolloverReadyStep( + waitForRolloverStepKey, + rolloverStepKey, + client, + null, + null, + null, + 1L, + null, + null, + null, + null, + null, + null + ); + + try { + IndexLifecycleTransition.validateTransition( + meta, + currentStepKey, + nextStepKey, + createOneStepPolicyStepRegistry("my-policy", waitForRolloverReadyStep) + ); + } catch (Exception e) { + logger.error(e.getMessage(), e); + fail("validateTransition should not throw exception on valid transitions"); + } + } + } + public void testMoveClusterStateToFailedStep() { String indexName = "my_index"; String policyName = "my_policy"; diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/MoveToNextStepUpdateTaskTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/MoveToNextStepUpdateTaskTests.java index bdf6229b3ac1c..32175ae2fdda7 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/MoveToNextStepUpdateTaskTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/ilm/MoveToNextStepUpdateTaskTests.java @@ -8,6 +8,7 @@ import org.apache.lucene.util.SetOnce; import org.elasticsearch.Version; +import org.elasticsearch.client.internal.Client; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -27,20 +28,34 @@ import org.elasticsearch.xpack.core.ilm.Step.StepKey; import org.junit.Before; +import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import static org.elasticsearch.cluster.metadata.LifecycleExecutionState.ILM_CUSTOM_METADATA_KEY; import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class MoveToNextStepUpdateTaskTests extends ESTestCase { + private static final NamedXContentRegistry REGISTRY; + String policy; ClusterState clusterState; Index index; LifecyclePolicy lifecyclePolicy; + static { + try (IndexLifecycle indexLifecycle = new IndexLifecycle(Settings.EMPTY)) { + List entries = new ArrayList<>(indexLifecycle.getNamedXContent()); + REGISTRY = new NamedXContentRegistry(entries); + } + } + @Before public void setupClusterState() { policy = randomAlphaOfLength(10); @@ -75,13 +90,22 @@ public void testExecuteSuccessfullyMoved() throws Exception { setStateToKey(currentStepKey, now); AtomicBoolean changed = new AtomicBoolean(false); + Client client = mock(Client.class); + when(client.settings()).thenReturn(Settings.EMPTY); + AlwaysExistingStepRegistry stepRegistry = new AlwaysExistingStepRegistry(client); + stepRegistry.update( + new IndexLifecycleMetadata( + Map.of(policy, new LifecyclePolicyMetadata(lifecyclePolicy, Collections.emptyMap(), 2L, 2L)), + OperationMode.RUNNING + ) + ); MoveToNextStepUpdateTask task = new MoveToNextStepUpdateTask( index, policy, currentStepKey, nextStepKey, () -> now, - new AlwaysExistingStepRegistry(), + stepRegistry, state -> changed.set(true) ); ClusterState newState = task.execute(clusterState); @@ -140,13 +164,22 @@ public void testExecuteSuccessfulMoveWithInvalidNextStep() throws Exception { setStateToKey(currentStepKey, now); SetOnce changed = new SetOnce<>(); + Client client = mock(Client.class); + when(client.settings()).thenReturn(Settings.EMPTY); + AlwaysExistingStepRegistry stepRegistry = new AlwaysExistingStepRegistry(client); + stepRegistry.update( + new IndexLifecycleMetadata( + Map.of(policy, new LifecyclePolicyMetadata(lifecyclePolicy, Collections.emptyMap(), 2L, 2L)), + OperationMode.RUNNING + ) + ); MoveToNextStepUpdateTask task = new MoveToNextStepUpdateTask( index, policy, currentStepKey, invalidNextStep, () -> now, - new AlwaysExistingStepRegistry(), + stepRegistry, s -> changed.set(true) ); ClusterState newState = task.execute(clusterState); @@ -186,7 +219,11 @@ public void testOnFailure() { private static class AlwaysExistingStepRegistry extends PolicyStepsRegistry { AlwaysExistingStepRegistry() { - super(new NamedXContentRegistry(Collections.emptyList()), null, null); + this(null); + } + + AlwaysExistingStepRegistry(Client client) { + super(REGISTRY, client, null); } @Override @@ -215,7 +252,18 @@ private void setStateToKey(StepKey stepKey, long now) { lifecycleState.setActionTime(now); lifecycleState.setStep(stepKey.name()); lifecycleState.setStepTime(now); - lifecycleState.setPhaseDefinition("{\"actions\":{\"TEST_ACTION\":{}}}"); + + lifecycleState.setPhaseDefinition(String.format(Locale.ROOT, """ + { + "policy" : "%s", + "phase_definition" : { + "min_age" : "20m", + "actions" : { + } + }, + "version" : 1, + "modified_date_in_millis" : 1578521007076 + }""", policy)); clusterState = ClusterState.builder(clusterState) .metadata( Metadata.builder(clusterState.getMetadata())