From 45acf93c3cbdcd6605e0c77b54ed278cd586f42b Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Fri, 18 Mar 2022 19:21:02 +0000 Subject: [PATCH 01/13] Updates shrink action to new interface Signed-off-by: Clay Downs --- spi/build.gradle | 1 + .../model/ActionProperties.kt | 23 +- .../model/ShrinkActionProperties.kt | 101 +++++ .../indexstatemanagement/model/StepContext.kt | 4 +- .../indexstatemanagement/ISMActionsParser.kt | 2 + .../ManagedIndexRunner.kt | 6 +- .../action/ShrinkAction.kt | 139 +++++++ .../action/ShrinkActionParser.kt | 78 ++++ .../step/shrink/AttemptMoveShardsStep.kt | 352 ++++++++++++++++ .../step/shrink/AttemptShrinkStep.kt | 100 +++++ .../step/shrink/WaitForMoveShardsStep.kt | 149 +++++++ .../step/shrink/WaitForShrinkStep.kt | 129 ++++++ .../indexstatemanagement/util/StepUtils.kt | 83 ++++ .../opensearchapi/OpenSearchExtensions.kt | 8 + .../mappings/opendistro-ism-config.json | 29 +- .../mappings/opendistro-ism-history.json | 6 +- .../IndexManagementRestTestCase.kt | 4 +- .../indexstatemanagement/TestHelpers.kt | 31 ++ .../action/ShrinkActionIT.kt | 389 ++++++++++++++++++ .../model/XContentTests.kt | 9 + .../step/AttemptCloseStepTests.kt | 14 +- .../step/AttemptDeleteStepTests.kt | 10 +- .../step/AttemptOpenStepTests.kt | 8 +- .../step/AttemptSetIndexPriorityStepTests.kt | 10 +- .../step/AttemptSetReplicaCountStepTests.kt | 8 +- .../step/AttemptSnapshotStepTests.kt | 16 +- .../step/AttemptTransitionStepTests.kt | 8 +- .../step/SetReadOnlyStepTests.kt | 8 +- .../step/SetReadWriteStepTests.kt | 8 +- .../step/WaitForRollupCompletionStepTests.kt | 4 +- .../step/WaitForSnapshotStepTests.kt | 22 +- .../cached-opendistro-ism-config.json | 29 +- .../cached-opendistro-ism-history.json | 6 +- 33 files changed, 1730 insertions(+), 64 deletions(-) create mode 100644 spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt create mode 100644 src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt diff --git a/spi/build.gradle b/spi/build.gradle index a9806df4f..f8ba3f69b 100644 --- a/spi/build.gradle +++ b/spi/build.gradle @@ -53,6 +53,7 @@ configurations.all { dependencies { compileOnly "org.opensearch:opensearch:${opensearch_version}" + compileOnly "org.opensearch:opensearch-job-scheduler-spi:${job_scheduler_version}" compileOnly "org.jetbrains.kotlin:kotlin-stdlib:${kotlin_version}" compileOnly "org.jetbrains.kotlin:kotlin-stdlib-common:${kotlin_version}" compileOnly "org.jetbrains.kotlin:kotlin-stdlib-jdk8:${kotlin_version}" diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt index 175dc447d..19a050e49 100644 --- a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt @@ -12,7 +12,9 @@ import org.opensearch.common.xcontent.ToXContent import org.opensearch.common.xcontent.ToXContentFragment import org.opensearch.common.xcontent.XContentBuilder import org.opensearch.common.xcontent.XContentParser -import org.opensearch.common.xcontent.XContentParserUtils +import org.opensearch.common.xcontent.XContentParser.Token +import org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken +import org.opensearch.indexmanagement.spi.indexstatemanagement.addObject /** Properties that will persist across steps of a single Action. Will be stored in the [ActionMetaData]. */ // TODO: Create namespaces to group properties together @@ -20,7 +22,8 @@ data class ActionProperties( val maxNumSegments: Int? = null, val snapshotName: String? = null, val rollupId: String? = null, - val hasRollupFailed: Boolean? = null + val hasRollupFailed: Boolean? = null, + val shrinkActionProperties: ShrinkActionProperties? = null ) : Writeable, ToXContentFragment { override fun writeTo(out: StreamOutput) { @@ -28,6 +31,7 @@ data class ActionProperties( out.writeOptionalString(snapshotName) out.writeOptionalString(rollupId) out.writeOptionalBoolean(hasRollupFailed) + out.writeOptionalWriteable(shrinkActionProperties) } override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { @@ -35,6 +39,7 @@ data class ActionProperties( if (snapshotName != null) builder.field(Properties.SNAPSHOT_NAME.key, snapshotName) if (rollupId != null) builder.field(Properties.ROLLUP_ID.key, rollupId) if (hasRollupFailed != null) builder.field(Properties.HAS_ROLLUP_FAILED.key, hasRollupFailed) + if (shrinkActionProperties != null) builder.addObject(ShrinkActionProperties.SHRINK_ACTION_PROPERTIES, shrinkActionProperties, params) return builder } @@ -46,8 +51,8 @@ data class ActionProperties( val snapshotName: String? = si.readOptionalString() val rollupId: String? = si.readOptionalString() val hasRollupFailed: Boolean? = si.readOptionalBoolean() - - return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed) + val shrinkActionProperties: ShrinkActionProperties? = si.readOptionalWriteable { ShrinkActionProperties.fromStreamInput(it) } + return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed, shrinkActionProperties) } fun parse(xcp: XContentParser): ActionProperties { @@ -55,9 +60,10 @@ data class ActionProperties( var snapshotName: String? = null var rollupId: String? = null var hasRollupFailed: Boolean? = null + var shrinkActionProperties: ShrinkActionProperties? = null - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) - while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + ensureExpectedToken(Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != Token.END_OBJECT) { val fieldName = xcp.currentName() xcp.nextToken() @@ -66,10 +72,13 @@ data class ActionProperties( Properties.SNAPSHOT_NAME.key -> snapshotName = xcp.text() Properties.ROLLUP_ID.key -> rollupId = xcp.text() Properties.HAS_ROLLUP_FAILED.key -> hasRollupFailed = xcp.booleanValue() + ShrinkActionProperties.SHRINK_ACTION_PROPERTIES -> { + shrinkActionProperties = if (xcp.currentToken() == Token.VALUE_NULL) null else ShrinkActionProperties.parse(xcp) + } } } - return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed) + return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed, shrinkActionProperties) } } diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt new file mode 100644 index 000000000..f5d236c5c --- /dev/null +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt @@ -0,0 +1,101 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.spi.indexstatemanagement.model + +import org.opensearch.common.io.stream.StreamInput +import org.opensearch.common.io.stream.StreamOutput +import org.opensearch.common.io.stream.Writeable +import org.opensearch.common.xcontent.ToXContent +import org.opensearch.common.xcontent.ToXContentFragment +import org.opensearch.common.xcontent.XContentBuilder +import org.opensearch.common.xcontent.XContentParser +import org.opensearch.common.xcontent.XContentParserUtils + +data class ShrinkActionProperties( + val nodeName: String, + val targetIndexName: String, + val targetNumShards: Int, + val lockPrimaryTerm: Long, + val lockSeqNo: Long, + val lockEpochSecond: Long +) : Writeable, ToXContentFragment { + + override fun writeTo(out: StreamOutput) { + out.writeString(nodeName) + out.writeString(targetIndexName) + out.writeInt(targetNumShards) + out.writeLong(lockPrimaryTerm) + out.writeLong(lockSeqNo) + out.writeLong(lockEpochSecond) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.field(ShrinkProperties.NODE_NAME.key, nodeName) + builder.field(ShrinkProperties.TARGET_INDEX_NAME.key, targetIndexName) + builder.field(ShrinkProperties.TARGET_NUM_SHARDS.key, targetNumShards) + builder.field(ShrinkProperties.LOCK_SEQ_NO.key, lockSeqNo) + builder.field(ShrinkProperties.LOCK_PRIMARY_TERM.key, lockPrimaryTerm) + builder.field(ShrinkProperties.LOCK_EPOCH_SECOND.key, lockEpochSecond) + return builder + } + + companion object { + const val SHRINK_ACTION_PROPERTIES = "shrink_action_properties" + + fun fromStreamInput(si: StreamInput): ShrinkActionProperties { + val nodeName: String = si.readString() + val targetIndexName: String = si.readString() + val targetNumShards: Int = si.readInt() + val lockPrimaryTerm: Long = si.readLong() + val lockSeqNo: Long = si.readLong() + val lockEpochSecond: Long = si.readLong() + + return ShrinkActionProperties(nodeName, targetIndexName, targetNumShards, lockPrimaryTerm, lockSeqNo, lockEpochSecond) + } + + fun parse(xcp: XContentParser): ShrinkActionProperties { + var nodeName: String? = null + var targetIndexName: String? = null + var targetNumShards: Int? = null + var lockPrimaryTerm: Long? = null + var lockSeqNo: Long? = null + var lockEpochSecond: Long? = null + + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + + when (fieldName) { + ShrinkProperties.NODE_NAME.key -> nodeName = xcp.text() + ShrinkProperties.TARGET_INDEX_NAME.key -> targetIndexName = xcp.text() + ShrinkProperties.TARGET_NUM_SHARDS.key -> targetNumShards = xcp.intValue() + ShrinkProperties.LOCK_PRIMARY_TERM.key -> lockPrimaryTerm = xcp.longValue() + ShrinkProperties.LOCK_SEQ_NO.key -> lockSeqNo = xcp.longValue() + ShrinkProperties.LOCK_EPOCH_SECOND.key -> lockEpochSecond = xcp.longValue() + } + } + + return ShrinkActionProperties( + requireNotNull(nodeName), + requireNotNull(targetIndexName), + requireNotNull(targetNumShards), + requireNotNull(lockPrimaryTerm), + requireNotNull(lockSeqNo), + requireNotNull(lockEpochSecond) + ) + } + } + + enum class ShrinkProperties(val key: String) { + NODE_NAME("node_name"), + TARGET_INDEX_NAME("target_index_name"), + TARGET_NUM_SHARDS("target_num_shards"), + LOCK_SEQ_NO("lock_seq_no"), + LOCK_PRIMARY_TERM("lock_primary_term"), + LOCK_EPOCH_SECOND("lock_epoch_second") + } +} diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt index 6773d08f4..b1c60d85f 100644 --- a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt @@ -10,6 +10,7 @@ import org.opensearch.cluster.service.ClusterService import org.opensearch.common.settings.Settings import org.opensearch.common.util.concurrent.ThreadContext import org.opensearch.commons.authuser.User +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService class StepContext( @@ -20,8 +21,9 @@ class StepContext( val user: User?, val scriptService: ScriptService, val settings: Settings, + val jobContext: JobExecutionContext ) { fun getUpdatedContext(metadata: ManagedIndexMetaData): StepContext { - return StepContext(metadata, this.clusterService, this.client, this.threadContext, this.user, this.scriptService, this.settings) + return StepContext(metadata, this.clusterService, this.client, this.threadContext, this.user, this.scriptService, this.settings, this.jobContext) } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt index 9b0160658..7783250e5 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt @@ -20,6 +20,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.action.ReadWriteActio import org.opensearch.indexmanagement.indexstatemanagement.action.ReplicaCountActionParser import org.opensearch.indexmanagement.indexstatemanagement.action.RolloverActionParser import org.opensearch.indexmanagement.indexstatemanagement.action.RollupActionParser +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkActionParser import org.opensearch.indexmanagement.indexstatemanagement.action.SnapshotActionParser import org.opensearch.indexmanagement.spi.indexstatemanagement.Action import org.opensearch.indexmanagement.spi.indexstatemanagement.ActionParser @@ -45,6 +46,7 @@ class ISMActionsParser private constructor() { ReplicaCountActionParser(), RollupActionParser(), RolloverActionParser(), + ShrinkActionParser(), SnapshotActionParser() ) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt index c4cdce159..be30fbda7 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt @@ -220,7 +220,7 @@ object ManagedIndexRunner : if (lock == null) { logger.debug("Could not acquire lock [${lock?.lockId}] for ${job.index}") } else { - runManagedIndexConfig(job) + runManagedIndexConfig(job, context) // Release lock val released: Boolean = context.lockService.suspendUntil { release(lock, it) } if (!released) { @@ -231,7 +231,7 @@ object ManagedIndexRunner : } @Suppress("ReturnCount", "ComplexMethod", "LongMethod", "ComplexCondition", "NestedBlockDepth") - private suspend fun runManagedIndexConfig(managedIndexConfig: ManagedIndexConfig) { + private suspend fun runManagedIndexConfig(managedIndexConfig: ManagedIndexConfig, jobContext: JobExecutionContext) { logger.debug("Run job for index ${managedIndexConfig.index}") // doing a check of local cluster health as we do not want to overload master node with potentially a lot of calls if (clusterIsRed()) { @@ -304,7 +304,7 @@ object ManagedIndexRunner : val state = policy.getStateToExecute(managedIndexMetaData) val action: Action? = state?.getActionToExecute(managedIndexMetaData, indexMetadataProvider) - val stepContext = StepContext(managedIndexMetaData, clusterService, client, threadPool.threadContext, policy.user, scriptService, settings) + val stepContext = StepContext(managedIndexMetaData, clusterService, client, threadPool.threadContext, policy.user, scriptService, settings, jobContext) val step: Step? = action?.getStepToExecute(stepContext) val currentActionMetaData = action?.getUpdatedActionMetadata(managedIndexMetaData, state.name) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt new file mode 100644 index 000000000..675b74f01 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -0,0 +1,139 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.action + +import org.opensearch.action.admin.indices.alias.Alias +import org.opensearch.common.io.stream.StreamOutput +import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.common.xcontent.ToXContent +import org.opensearch.common.xcontent.XContentBuilder +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptShrinkStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForShrinkStep +import org.opensearch.indexmanagement.opensearchapi.aliasesField +import org.opensearch.indexmanagement.spi.indexstatemanagement.Action +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext + +@Suppress("LongParameterList") +class ShrinkAction( + val numNewShards: Int?, + val maxShardSize: ByteSizeValue?, + val percentageDecrease: Double?, + val targetIndexSuffix: String?, + val aliases: List?, + val forceUnsafe: Boolean?, + index: Int +) : Action(name, index) { + init { + /* The numbers associated with each shard config are all k % mod 3 == 1. + * Because of the % 3 == 1 property, we can check if more than one shard configs are specified by + * modding the sum by 3. Any sum % 3 != 1 is a sum of more than one of the configs and thus invalid. + * We can then check the error message by checking the sum against each unique sum combination. + */ + val maxShardSizeNotNull = if (maxShardSize != null) MAX_SHARD_NOT_NULL else 0 + val percentageDecreaseNotNull = if (percentageDecrease != null) PERCENTAGE_DECREASE_NOT_NULL else 0 + val numNewShardsNotNull = if (numNewShards != null) NUM_SHARDS_NOT_NULL else 0 + val numSet = maxShardSizeNotNull + percentageDecreaseNotNull + numNewShardsNotNull + require(numSet % NUM_SHARD_CONFIGS == 1) { + when (numSet) { + MAX_SHARD_NOT_NULL + PERCENTAGE_DECREASE_NOT_NULL -> + "Cannot specify both maximum shard size and percentage decrease. Please pick one." + MAX_SHARD_NOT_NULL + NUM_SHARDS_NOT_NULL -> + "Cannot specify both maximum shard size and number of new shards. Please pick one." + PERCENTAGE_DECREASE_NOT_NULL + NUM_SHARDS_NOT_NULL -> + "Cannot specify both percentage decrease and number of new shards. Please pick one." + MAX_SHARD_NOT_NULL + PERCENTAGE_DECREASE_NOT_NULL + NUM_SHARDS_NOT_NULL -> + "Cannot specify maximum shard size, percentage decrease, and number of new shards. Please pick one." + // Never executes this code block. + else -> "" + } + } + if (percentageDecreaseNotNull != 0) { + require(percentageDecrease!!.compareTo(0.0) == 1 && percentageDecrease.compareTo(1.0) == -1) { + "Percentage decrease must be between 0.0 and 1.0 exclusively" + } + } + if (maxShardSizeNotNull != 0) { + require(maxShardSize!!.bytes > 0) { "The max_shard_size must be greater than 0." } + } + } + + private val attemptMoveShardsStep = AttemptMoveShardsStep(this) + private val waitForMoveShardsStep = WaitForMoveShardsStep(this) + private val attemptShrinkStep = AttemptShrinkStep(this) + private val waitForShrinkStep = WaitForShrinkStep(this) + + private val stepNameToStep: LinkedHashMap = linkedMapOf( + AttemptMoveShardsStep.name to attemptMoveShardsStep, + WaitForMoveShardsStep.name to waitForMoveShardsStep, + AttemptShrinkStep.name to attemptShrinkStep, + WaitForShrinkStep.name to waitForShrinkStep + ) + override fun getSteps(): List = listOf(attemptMoveShardsStep, waitForMoveShardsStep, attemptShrinkStep, waitForShrinkStep) + + @SuppressWarnings("ReturnCount") + override fun getStepToExecute(context: StepContext): Step { + val stepMetaData = context.metadata.stepMetaData ?: return attemptMoveShardsStep + val currentStep = stepMetaData.name + + // If the current step is not from this action, assume it is from another action. + if (!stepNameToStep.containsKey(currentStep)) return attemptMoveShardsStep + + val currentStepStatus = stepMetaData.stepStatus + if (currentStepStatus == Step.StepStatus.COMPLETED) { + return when (currentStep) { + AttemptMoveShardsStep.name -> waitForMoveShardsStep + WaitForMoveShardsStep.name -> attemptShrinkStep + AttemptShrinkStep.name -> waitForShrinkStep + else -> stepNameToStep[currentStep]!! + } + } + // step not completed + return stepNameToStep[currentStep]!! + } + + override fun populateAction(builder: XContentBuilder, params: ToXContent.Params) { + builder.startObject(type) + if (numNewShards != null) builder.field(NUM_NEW_SHARDS_FIELD, numNewShards) + if (maxShardSize != null) builder.field(MAX_SHARD_SIZE_FIELD, maxShardSize.stringRep) + if (percentageDecrease != null) builder.field(PERCENTAGE_DECREASE_FIELD, percentageDecrease) + if (targetIndexSuffix != null) builder.field(TARGET_INDEX_SUFFIX_FIELD, targetIndexSuffix) + if (aliases != null) { builder.aliasesField(aliases) } + if (forceUnsafe != null) builder.field(FORCE_UNSAFE_FIELD, forceUnsafe) + builder.endObject() + } + + override fun populateAction(out: StreamOutput) { + out.writeOptionalInt(numNewShards) + out.writeOptionalWriteable(maxShardSize) + out.writeOptionalDouble(percentageDecrease) + out.writeOptionalString(targetIndexSuffix) + if (aliases != null) { + out.writeBoolean(true) + out.writeList(aliases) + } else { + out.writeBoolean(false) + } + out.writeOptionalBoolean(forceUnsafe) + out.writeInt(actionIndex) + } + + companion object { + const val name = "shrink" + const val NUM_NEW_SHARDS_FIELD = "num_new_shards" + const val PERCENTAGE_DECREASE_FIELD = "percentage_decrease" + const val MAX_SHARD_SIZE_FIELD = "max_shard_size" + const val TARGET_INDEX_SUFFIX_FIELD = "target_index_suffix" + const val ALIASES_FIELD = "aliases" + const val FORCE_UNSAFE_FIELD = "force_unsafe" + const val MAX_SHARD_NOT_NULL = 1 + const val PERCENTAGE_DECREASE_NOT_NULL = 4 + const val NUM_SHARDS_NOT_NULL = 7 + const val NUM_SHARD_CONFIGS = 3 + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt new file mode 100644 index 000000000..1e99b4c01 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt @@ -0,0 +1,78 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.action + +import org.opensearch.action.admin.indices.alias.Alias +import org.opensearch.common.io.stream.StreamInput +import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.common.xcontent.XContentParser +import org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.ALIASES_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.FORCE_UNSAFE_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.MAX_SHARD_SIZE_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.NUM_NEW_SHARDS_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.PERCENTAGE_DECREASE_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.TARGET_INDEX_SUFFIX_FIELD +import org.opensearch.indexmanagement.spi.indexstatemanagement.Action +import org.opensearch.indexmanagement.spi.indexstatemanagement.ActionParser + +class ShrinkActionParser : ActionParser() { + override fun fromStreamInput(sin: StreamInput): Action { + val numNewShards = sin.readOptionalInt() + val maxShardSize = sin.readOptionalWriteable(::ByteSizeValue) + val percentageDecrease = sin.readOptionalDouble() + val targetIndexSuffix = sin.readOptionalString() + val aliases = if (sin.readBoolean()) sin.readList(::Alias) else null + val forceUnsafe = sin.readOptionalBoolean() + val index = sin.readInt() + + return ShrinkAction(numNewShards, maxShardSize, percentageDecrease, targetIndexSuffix, aliases, forceUnsafe, index) + } + + @Suppress("NestedBlockDepth") + override fun fromXContent(xcp: XContentParser, index: Int): Action { + var numNewShards: Int? = null + var maxShardSize: ByteSizeValue? = null + var percentageDecrease: Double? = null + var targetIndexSuffix: String? = null + var aliases: List? = null + var forceUnsafe: Boolean? = null + + ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + + when (fieldName) { + NUM_NEW_SHARDS_FIELD -> numNewShards = xcp.intValue() + MAX_SHARD_SIZE_FIELD -> maxShardSize = ByteSizeValue.parseBytesSizeValue(xcp.textOrNull(), MAX_SHARD_SIZE_FIELD) + PERCENTAGE_DECREASE_FIELD -> percentageDecrease = xcp.doubleValue() + TARGET_INDEX_SUFFIX_FIELD -> targetIndexSuffix = xcp.textOrNull() + ALIASES_FIELD -> { + if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { + aliases = mutableListOf() + when (xcp.currentToken()) { + XContentParser.Token.START_OBJECT -> { + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + aliases.add(Alias.fromXContent(xcp)) + } + } + else -> ensureExpectedToken(XContentParser.Token.START_ARRAY, xcp.currentToken(), xcp) + } + } + } + FORCE_UNSAFE_FIELD -> forceUnsafe = xcp.booleanValue() + else -> throw IllegalArgumentException("Invalid field: [$fieldName] found in ShrinkAction.") + } + } + + return ShrinkAction(numNewShards, maxShardSize, percentageDecrease, targetIndexSuffix, aliases, forceUnsafe, index) + } + + override fun getActionType(): String { + return ShrinkAction.name + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt new file mode 100644 index 000000000..f28363bfc --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -0,0 +1,352 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse +import org.opensearch.action.admin.cluster.reroute.ClusterRerouteRequest +import org.opensearch.action.admin.cluster.reroute.ClusterRerouteResponse +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse +import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.cluster.metadata.IndexMetadata +import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand +import org.opensearch.cluster.routing.allocation.decider.Decision +import org.opensearch.common.collect.Tuple +import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime +import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockModel +import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.jobscheduler.repackage.com.cronutils.utils.VisibleForTesting +import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.LockModel +import java.lang.Exception +import java.time.Duration +import java.time.Instant +import java.util.PriorityQueue +import kotlin.collections.ArrayList +import kotlin.collections.HashMap +import kotlin.collections.HashSet +import kotlin.math.ceil +import kotlin.math.floor +import kotlin.math.min +import kotlin.math.sqrt + +@SuppressWarnings("TooManyFunctions") +class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "LongMethod") + override suspend fun execute(): Step { + val context = this.context ?: return this + val client = context.client + val indexName = context.metadata.index + try { + checkTimeOut(context.metadata) + // check whether the target index name is available. + val indexNameSuffix = action.targetIndexSuffix ?: DEFAULT_TARGET_SUFFIX + val shrinkTargetIndexName = indexName + indexNameSuffix + val indexExists = context.clusterService.state().metadata.indices.containsKey(shrinkTargetIndexName) + if (indexExists) { + info = mapOf("message" to getIndexExistsMessage(shrinkTargetIndexName)) + stepStatus = StepStatus.FAILED + return this + } + + // get cluster health + val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() + val response: ClusterHealthResponse = client.admin().cluster().suspendUntil { health(healthReq, it) } + // check status of cluster health + if (response.isTimedOut) { + info = mapOf("message" to FAILURE_MESSAGE) + stepStatus = StepStatus.CONDITION_NOT_MET + return this + } + + // force_unsafe check + val numReplicas = context.clusterService.state().metadata.indices[indexName].numberOfReplicas + val shouldFailForceUnsafeCheck = numReplicas == 0 && ((action.forceUnsafe != null && !action.forceUnsafe) || (action.forceUnsafe == null)) + if (shouldFailForceUnsafeCheck) { + info = mapOf("message" to UNSAFE_FAILURE_MESSAGE) + stepStatus = StepStatus.FAILED + return this + } + // Get the number of primary shards in the index -- all will be active because index health is green + val numOriginalShards = context.clusterService.state().metadata.indices[indexName].numberOfShards + if (numOriginalShards == 1) { + info = mapOf("message" to ONE_PRIMARY_SHARD_FAILURE_MESSAGE) + stepStatus = StepStatus.FAILED + return this + } + // Get the size of the index + val statsRequest = IndicesStatsRequest().indices(indexName) + val statsResponse: IndicesStatsResponse = client.admin().indices().suspendUntil { + stats(statsRequest, it) + } + val statsStore = statsResponse.total.store + if (statsStore == null) { + info = mapOf("message" to FAILURE_MESSAGE) + stepStatus = StepStatus.FAILED + return this + } + val indexSize = statsStore.sizeInBytes + + // get the number of shards that the target index will have + val numTargetShards = getNumTargetShards(numOriginalShards, indexSize) + // get the nodes with enough memory + val suitableNodes = findSuitableNodes(context, statsResponse, indexSize, bufferPercentage, numOriginalShards) + // iterate through the nodes and try to acquire a lock on those nodes + val lock = acquireLockOnNode(context.jobContext, suitableNodes) + if (lock == null) { + logger.info("$indexName could not find available node to shrink onto.") + info = mapOf("message" to NO_AVAILABLE_NODES_MESSAGE) + stepStatus = StepStatus.CONDITION_NOT_MET + return this + } + // move the shards + val nodeName = lock.resource[RESOURCE_NAME] as String + shrinkActionProperties = ShrinkActionProperties( + nodeName, + shrinkTargetIndexName, + numTargetShards, + lock.primaryTerm, + lock.seqNo, + lock.lockTime.epochSecond + ) + setToReadOnlyAndMoveIndexToNode(context, nodeName) + info = mapOf("message" to getSuccessMessage(nodeName)) + stepStatus = StepStatus.COMPLETED + return this + } catch (e: Exception) { + info = mapOf("message" to FAILURE_MESSAGE, "cause" to "{${e.message}}") + stepStatus = StepStatus.FAILED + return this + } + } + + private suspend fun setToReadOnlyAndMoveIndexToNode(stepContext: StepContext, node: String) { + val updateSettings = Settings.builder() + .put(IndexMetadata.SETTING_BLOCKS_WRITE, true) + .put(ROUTING_SETTING, node) + .build() + issueUpdateAndUnlockIfFail(stepContext, updateSettings, UPDATE_FAILED_MESSAGE) + } + + private suspend fun issueUpdateAndUnlockIfFail(stepContext: StepContext, settings: Settings, failureMessage: String) { + val jobContext = stepContext.jobContext + try { + val response: AcknowledgedResponse = issueUpdateSettingsRequest(stepContext.client, stepContext.metadata, settings) + if (!response.isAcknowledged) { + stepStatus = StepStatus.FAILED + info = mapOf("message" to failureMessage) + } + } catch (e: Exception) { + handleException(e, failureMessage) + val copyProperties = shrinkActionProperties + if (copyProperties != null) { + val lock = getShrinkLockModel( + copyProperties.nodeName, + jobContext.jobIndexName, + jobContext.jobId, + copyProperties.lockEpochSecond, + copyProperties.lockPrimaryTerm, + copyProperties.lockSeqNo + ) + jobContext.lockService.suspendUntil { release(lock, it) } + } + } + } + + private suspend fun acquireLockOnNode(jobContext: JobExecutionContext, suitableNodes: List): LockModel? { + var lock: LockModel? = null + for (node in suitableNodes) { + val nodeResourceObject: HashMap = HashMap() + nodeResourceObject[RESOURCE_NAME] = node + val lockTime = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS + lock = jobContext.lockService.suspendUntil { + acquireLockOnResource(jobContext, lockTime, RESOURCE_TYPE, nodeResourceObject as Map?, it) + } + if (lock != null) { + return lock + } + } + return lock + } + + @VisibleForTesting + @SuppressWarnings("NestedBlockDepth", "ComplexMethod") + private suspend fun findSuitableNodes( + stepContext: StepContext, + indicesStatsResponse: IndicesStatsResponse, + indexSize: Long, + buffer: Long, + numOriginalShards: Int + ): List { + val nodesStatsReq = NodesStatsRequest().addMetric(OS_METRIC) + val nodeStatsResponse: NodesStatsResponse = stepContext.client.admin().cluster().suspendUntil { nodesStats(nodesStatsReq, it) } + val nodesList = nodeStatsResponse.nodes + val comparator = kotlin.Comparator { o1: Tuple, o2: Tuple -> o1.v1().compareTo(o2.v1()) } + val nodesWithSpace = PriorityQueue(comparator) + for (node in nodesList) { + val osStats = node.os + if (osStats != null) { + val memLeftInNode = osStats.mem.free.bytes + val totalNodeMem = osStats.mem.total.bytes + val bufferSize = ByteSizeValue(buffer * totalNodeMem) + val requiredBytes = (2 * indexSize) + bufferSize.bytes + if (memLeftInNode > requiredBytes) { + val memLeftAfterTransfer: Long = memLeftInNode - requiredBytes + nodesWithSpace.add(Tuple(memLeftAfterTransfer, node.node.name)) + } + } + } + val suitableNodes: ArrayList = ArrayList() + for (sizeNodeTuple in nodesWithSpace) { + val nodeName = sizeNodeTuple.v2() + val movableShardIds = HashSet() + for (shard in indicesStatsResponse.shards) { + val shardId = shard.shardRouting.shardId() + val currentShardNode = stepContext.clusterService.state().nodes[shard.shardRouting.currentNodeId()] + if (currentShardNode.name.equals(nodeName)) { + movableShardIds.add(shardId.id) + } else { + val indexName = stepContext.metadata.index + val allocationCommand = MoveAllocationCommand(indexName, shardId.id, currentShardNode.name, nodeName) + val rerouteRequest = ClusterRerouteRequest().explain(true).dryRun(true).add(allocationCommand) + + val clusterRerouteResponse: ClusterRerouteResponse = + stepContext.client.admin().cluster().suspendUntil { reroute(rerouteRequest, it) } + val filteredExplanations = clusterRerouteResponse.explanations.explanations().filter { + it.decisions().type().equals(Decision.Type.YES) + } + if (filteredExplanations.isNotEmpty()) { + movableShardIds.add(shardId.id) + } + } + } + if (movableShardIds.size >= numOriginalShards) { + suitableNodes.add(sizeNodeTuple.v2()) + } + } + return suitableNodes + } + + @SuppressWarnings("ReturnCount") + private fun getNumTargetShards(numOriginalShards: Int, indexSize: Long): Int { + // case where user specifies a certain number of shards in the target index + if (action.numNewShards != null) return getGreatestFactorLessThan(numOriginalShards, action.numNewShards) + + // case where user specifies a percentage decrease in the number of shards in the target index + if (action.percentageDecrease != null) { + val numTargetShards = floor((action.percentageDecrease) * numOriginalShards).toInt() + return getGreatestFactorLessThan(numOriginalShards, numTargetShards) + } + // case where the user specifies a max shard size in the target index + val maxShardSizeInBytes = action.maxShardSize!!.bytes + // ensures that numTargetShards is never less than 1 + val minNumTargetShards = ceil(indexSize / maxShardSizeInBytes.toDouble()).toInt() + return getMinFactorGreaterThan(numOriginalShards, minNumTargetShards) + } + + @SuppressWarnings("ReturnCount") + private fun getGreatestFactorLessThan(n: Int, k: Int): Int { + if (k >= n) return n + val bound: Int = min(floor(sqrt(n.toDouble())).toInt(), k) + var greatestFactor = 1 + for (i in 2..bound + 1) { + if (n % i == 0) { + val complement: Int = n / i + if (complement <= k) { + return complement + } else { + greatestFactor = i + } + } + } + return greatestFactor + } + + @SuppressWarnings("ReturnCount") + private fun getMinFactorGreaterThan(n: Int, k: Int): Int { + if (k >= n) { + return n + } + for (i in k..n + 1) { + if (n % i == 0) return i + } + return n + } + + private fun handleException(e: Exception, message: String) { + logger.error(message, e) + stepStatus = StepStatus.FAILED + val mutableInfo = mutableMapOf("message" to message) + val errorMessage = e.message + if (errorMessage != null) mutableInfo["cause"] = errorMessage + info = mutableInfo.toMap() + } + + private fun checkTimeOut(managedIndexMetadata: ManagedIndexMetaData) { + val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) + val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS + // Get ActionTimeout if given, otherwise use default timeout of 12 hours + if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { + info = mapOf("message" to TIMEOUT_MESSAGE) + stepStatus = StepStatus.FAILED + } + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + val currentActionMetaData = currentMetadata.actionMetaData + return currentMetadata.copy( + actionMetaData = currentActionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), + stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + transitionTo = null, + info = info + ) + } + + override fun isIdempotent() = true + + companion object { + const val OS_METRIC = "os" + const val ROUTING_SETTING = "index.routing.allocation.require._name" + const val RESOURCE_NAME = "node_name" + const val DEFAULT_TARGET_SUFFIX = "_shrunken" + const val bufferPercentage = 0.05.toLong() + const val MOVE_SHARDS_TIMEOUT_IN_SECONDS = 43200L // 12hrs in seconds + const val name = "attempt_move_shards_step" + const val RESOURCE_TYPE = "shrink" + const val TIMEOUT_MESSAGE = "Timed out waiting for finding node." + const val UPDATE_FAILED_MESSAGE = "Shrink failed because settings could not be updated.." + const val NO_AVAILABLE_NODES_MESSAGE = + "There are no available nodes for to move to to execute a shrink. Delaying until node becomes available." + const val UNSAFE_FAILURE_MESSAGE = "Shrink failed because index has no replicas and force_unsafe is not set to true." + const val ONE_PRIMARY_SHARD_FAILURE_MESSAGE = "Shrink failed because index only has one primary shard." + const val FAILURE_MESSAGE = "Shrink failed to start moving shards." + fun getSuccessMessage(node: String) = "Successfully started moving the shards to $node." + fun getIndexExistsMessage(newIndex: String) = "Shrink failed because $newIndex already exists." + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt new file mode 100644 index 000000000..a2ec3e5f4 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -0,0 +1,100 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse +import org.opensearch.action.admin.indices.shrink.ResizeRequest +import org.opensearch.action.admin.indices.shrink.ResizeResponse +import org.opensearch.common.settings.Settings +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.util.INDEX_NUMBER_OF_SHARDS +import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.transport.RemoteTransportException +import java.lang.Exception + +class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount") + override suspend fun execute(): AttemptShrinkStep { + val context = this.context ?: return this + val indexName = context.metadata.index + val actionMetadata = context.metadata.actionMetaData + val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + if (shrinkActionProperties == null) { + info = mapOf("message" to "Metadata not properly populated") + stepStatus = StepStatus.FAILED + return this + } + try { + val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() + val response: ClusterHealthResponse = context.client.admin().cluster().suspendUntil { health(healthReq, it) } + // check status of cluster health + if (response.isTimedOut) { + stepStatus = StepStatus.CONDITION_NOT_MET + info = mapOf("message" to INDEX_HEALTH_NOT_GREEN_MESSAGE) + return this + } + val targetIndexName = shrinkActionProperties.targetIndexName + val aliases = action.aliases + val req = ResizeRequest(targetIndexName, indexName) + req.targetIndexRequest.settings( + Settings.builder() + .put(AttemptMoveShardsStep.ROUTING_SETTING, shrinkActionProperties.nodeName) + .put(INDEX_NUMBER_OF_SHARDS, shrinkActionProperties.targetNumShards) + .build() + ) + aliases?.forEach { req.targetIndexRequest.alias(it) } + val resizeResponse: ResizeResponse = context.client.admin().indices().suspendUntil { resizeIndex(req, it) } + if (!resizeResponse.isAcknowledged) { + info = mapOf("message" to FAILURE_MESSAGE) + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + stepStatus = StepStatus.FAILED + return this + } + info = mapOf("message" to getSuccessMessage(targetIndexName)) + stepStatus = StepStatus.COMPLETED + return this + } catch (e: RemoteTransportException) { + info = mapOf("message" to FAILURE_MESSAGE) + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + stepStatus = StepStatus.FAILED + return this + } catch (e: Exception) { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + info = mapOf("message" to FAILURE_MESSAGE, "cause" to "{${e.message}}") + stepStatus = StepStatus.FAILED + return this + } + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + val currentActionMetaData = currentMetadata.actionMetaData + return currentMetadata.copy( + actionMetaData = currentActionMetaData?.copy(), + stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + transitionTo = null, + info = info + ) + } + + override fun isIdempotent() = false + + companion object { + const val name = "attempt_shrink_step" + const val FAILURE_MESSAGE = "Shrink failed when sending shrink request." + const val INDEX_HEALTH_NOT_GREEN_MESSAGE = "Shrink delayed because index health is not green." + fun getSuccessMessage(newIndex: String) = "Shrink started. $newIndex currently being populated." + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt new file mode 100644 index 000000000..d2ae8d203 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -0,0 +1,149 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse +import org.opensearch.action.admin.indices.stats.ShardStats +import org.opensearch.index.shard.ShardId +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime +import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.transport.RemoteTransportException +import java.lang.Exception +import java.time.Duration +import java.time.Instant + +class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "NestedBlockDepth") + override suspend fun execute(): WaitForMoveShardsStep { + val context = this.context ?: return this + val indexName = context.metadata.index + val actionMetadata = context.metadata.actionMetaData + val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + if (shrinkActionProperties == null) { + info = mapOf("message" to "Metadata not properly populated") + stepStatus = StepStatus.FAILED + return this + } + try { + val indexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(indexName) + val response: IndicesStatsResponse = context.client.admin().indices().suspendUntil { stats(indexStatsRequests, it) } + val numPrimaryShards = context.clusterService.state().metadata.indices[indexName].numberOfShards + val nodeToMoveOnto = shrinkActionProperties.nodeName + var numShardsOnNode = 0 + val shardToCheckpointSetMap: MutableMap> = mutableMapOf() + for (shard: ShardStats in response.shards) { + val seqNoStats = shard.seqNoStats + val routingInfo = shard.shardRouting + if (seqNoStats != null) { + val checkpoint = seqNoStats.localCheckpoint + val shardId = shard.shardRouting.shardId() + val checkpointsOfShard = shardToCheckpointSetMap.getOrDefault(shardId, mutableSetOf()) + checkpointsOfShard.add(checkpoint) + shardToCheckpointSetMap[shardId] = checkpointsOfShard + } + // TODO: Test if we can make this appear / if we can, fail the action. + shardToCheckpointSetMap.entries.forEach { + (_, checkpointSet) -> + if (checkpointSet.size > 1) { + logger.warn("There are shards with varying local checkpoints") + } + } + val nodeIdShardIsOn = routingInfo.currentNodeId() + val nodeShardIsOn = context.clusterService.state().nodes()[nodeIdShardIsOn].name + if (nodeShardIsOn.equals(nodeToMoveOnto) && routingInfo.started()) { + numShardsOnNode++ + } + } + if (numShardsOnNode >= numPrimaryShards) { + info = mapOf("message" to getSuccessMessage(nodeToMoveOnto)) + stepStatus = StepStatus.COMPLETED + return this + } + val numShardsLeft = numPrimaryShards - numShardsOnNode + checkTimeOut(context, shrinkActionProperties, numShardsLeft, nodeToMoveOnto) + return this + } catch (e: RemoteTransportException) { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + info = mapOf("message" to FAILURE_MESSAGE) + stepStatus = StepStatus.FAILED + return this + } catch (e: Exception) { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + info = mapOf("message" to FAILURE_MESSAGE, "cause" to "{${e.message}}") + stepStatus = StepStatus.FAILED + return this + } + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + // Saving maxNumSegments in ActionProperties after the force merge operation has begun so that if a ChangePolicy occurred + // in between this step and WaitForForceMergeStep, a cached segment count expected from the operation is available + val currentActionMetaData = currentMetadata.actionMetaData + return currentMetadata.copy( + actionMetaData = currentActionMetaData?.copy(), + stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + transitionTo = null, + info = info + ) + } + + private suspend fun checkTimeOut( + stepContext: StepContext, + shrinkActionProperties: ShrinkActionProperties, + numShardsLeft: Int, + nodeToMoveOnto: String + ) { + val managedIndexMetadata = stepContext.metadata + val indexName = managedIndexMetadata.index + val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) + val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS + // Get ActionTimeout if given, otherwise use default timeout of 12 hours + stepStatus = if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { + logger.debug( + "Move shards failing on [$indexName] because" + + " [$numShardsLeft] shards still needing to be moved" + ) + if (managedIndexMetadata.actionMetaData?.actionProperties?.shrinkActionProperties != null) { + releaseShrinkLock(shrinkActionProperties, stepContext.jobContext, logger) + } + info = mapOf("message" to getTimeoutFailure(nodeToMoveOnto)) + StepStatus.FAILED + } else { + logger.debug( + "Move shards still running on [$indexName] with" + + " [$numShardsLeft] shards still needing to be moved" + ) + info = mapOf("message" to getTimeoutDelay(nodeToMoveOnto)) + StepStatus.CONDITION_NOT_MET + } + } + + override fun isIdempotent() = true + + companion object { + const val name = "wait_for_move_shards_step" + fun getSuccessMessage(node: String) = "The shards successfully moved to $node." + fun getTimeoutFailure(node: String) = "Shrink failed because it took to long to move shards to $node" + fun getTimeoutDelay(node: String) = "Shrink delayed because it took to long to move shards to $node" + const val FAILURE_MESSAGE = "Shrink failed when waiting for shards to move." + const val MOVE_SHARDS_TIMEOUT_IN_SECONDS = 43200L // 12hrs in seconds + const val RESOURCE_NAME = "node_name" + const val RESOURCE_TYPE = "shrink" + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt new file mode 100644 index 000000000..ec979a47d --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -0,0 +1,129 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse +import org.opensearch.action.admin.indices.stats.ShardStats +import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.common.settings.Settings +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime +import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest +import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.transport.RemoteTransportException +import java.time.Duration +import java.time.Instant + +class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "LongMethod") + override suspend fun execute(): WaitForShrinkStep { + val context = this.context ?: return this + val actionMetadata = context.metadata.actionMetaData + val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + if (shrinkActionProperties == null) { + info = mapOf("message" to "Metadata not properly populated") + stepStatus = StepStatus.FAILED + return this + } + try { + val targetIndex = shrinkActionProperties.targetIndexName + val targetIndexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(targetIndex) + val targetStatsResponse: IndicesStatsResponse = context.client.admin().indices().suspendUntil { stats(targetIndexStatsRequests, it) } + var numShardsStarted = 0 + for (shard: ShardStats in targetStatsResponse.shards) { + if (shard.shardRouting.started()) { + numShardsStarted++ + } + } + if (numShardsStarted < shrinkActionProperties.targetNumShards) { + checkTimeOut(context, shrinkActionProperties, targetIndex) + return this + } + val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).build() + val response: AcknowledgedResponse = context.client.admin().indices().suspendUntil { + updateSettings(UpdateSettingsRequest(allocationSettings, targetIndex), it) + } + if (!response.isAcknowledged) { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + stepStatus = StepStatus.FAILED + info = mapOf("message" to getFailureMessage(targetIndex)) + return this + } + issueUpdateSettingsRequest(context.client, context.metadata, allocationSettings) + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + stepStatus = StepStatus.COMPLETED + info = mapOf("message" to SUCCESS_MESSAGE) + return this + } catch (e: RemoteTransportException) { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + info = mapOf("message" to getFailureMessage(shrinkActionProperties.targetIndexName)) + stepStatus = StepStatus.FAILED + return this + } catch (e: Exception) { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + info = mapOf("message" to GENERIC_FAILURE_MESSAGE, "cause" to "{${e.message}}") + stepStatus = StepStatus.FAILED + return this + } + } + + private suspend fun checkTimeOut(stepContext: StepContext, shrinkActionProperties: ShrinkActionProperties, targetIndex: String) { + val managedIndexMetadata = stepContext.metadata + val indexName = managedIndexMetadata.index + val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) + val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: WaitForMoveShardsStep.MOVE_SHARDS_TIMEOUT_IN_SECONDS + // Get ActionTimeout if given, otherwise use default timeout of 12 hours + stepStatus = if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { + logger.error( + "Shards of $indexName have still not started." + ) + releaseShrinkLock(shrinkActionProperties, stepContext.jobContext, logger) + info = mapOf("message" to getFailureMessage(targetIndex)) + StepStatus.FAILED + } else { + logger.debug( + "Shards of $indexName have still not started." + ) + info = mapOf("message" to getDelayedMessage(targetIndex)) + StepStatus.CONDITION_NOT_MET + } + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + // Saving maxNumSegments in ActionProperties after the force merge operation has begun so that if a ChangePolicy occurred + // in between this step and WaitForForceMergeStep, a cached segment count expected from the operation is available + val currentActionMetaData = currentMetadata.actionMetaData + return currentMetadata.copy( + actionMetaData = currentActionMetaData?.copy(), + stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + transitionTo = null, + info = info + ) + } + + override fun isIdempotent() = true + + companion object { + const val name = "wait_for_shrink_step" + const val SUCCESS_MESSAGE = "Shrink finished successfully." + const val GENERIC_FAILURE_MESSAGE = "Shrink failed while waiting for shards to start." + fun getDelayedMessage(newIndex: String) = "Shrink delayed because $newIndex shards not in started state." + fun getFailureMessage(newIndex: String) = "Shrink failed while waiting for $newIndex shards to start." + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt new file mode 100644 index 000000000..09ad517c4 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.util + +import org.apache.logging.log4j.Logger +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest +import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.client.Client +import org.opensearch.common.settings.Settings +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.LockModel +import java.time.Instant + +suspend fun issueUpdateSettingsRequest(client: Client, managedIndexMetaData: ManagedIndexMetaData, settings: Settings): AcknowledgedResponse { + return client.admin() + .indices() + .suspendUntil { updateSettings(UpdateSettingsRequest(settings, managedIndexMetaData.index), it) } +} + +suspend fun releaseShrinkLock( + shrinkActionProperties: ShrinkActionProperties, + jobExecutionContext: JobExecutionContext, + logger: Logger +) { + val lock: LockModel = getShrinkLockModel(shrinkActionProperties, jobExecutionContext) + val released: Boolean = jobExecutionContext.lockService.suspendUntil { release(lock, it) } + if (!released) { + logger.warn("Lock not released on failure") + } +} + +fun getShrinkLockModel( + shrinkActionProperties: ShrinkActionProperties, + jobExecutionContext: JobExecutionContext +): LockModel { + return getShrinkLockModel( + shrinkActionProperties.nodeName, + jobExecutionContext.jobIndexName, + jobExecutionContext.jobId, + shrinkActionProperties.lockEpochSecond, + shrinkActionProperties.lockPrimaryTerm, + shrinkActionProperties.lockSeqNo + ) +} + +@SuppressWarnings("LongParameterList") +fun getShrinkLockModel( + nodeName: String, + jobIndexName: String, + jobId: String, + lockEpochSecond: Long, + lockPrimaryTerm: Long, + lockSeqNo: Long +): LockModel { + val resource: HashMap = HashMap() + resource[WaitForMoveShardsStep.RESOURCE_NAME] = nodeName + val lockCreationInstant: Instant = Instant.ofEpochSecond(lockEpochSecond) + return LockModel( + jobIndexName, + jobId, + WaitForMoveShardsStep.RESOURCE_TYPE, + resource as Map?, + lockCreationInstant, + WaitForMoveShardsStep.MOVE_SHARDS_TIMEOUT_IN_SECONDS, + false, + lockSeqNo, + lockPrimaryTerm + ) +} + +fun getActionStartTime(managedIndexMetaData: ManagedIndexMetaData): Instant { + val actionMetadata = managedIndexMetaData.actionMetaData + // Return the action start time, or if that is null return now + actionMetadata?.startTime?.let { return Instant.ofEpochMilli(it) } + return Instant.now() +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt b/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt index 80e293ba6..8d6c23a6e 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt @@ -16,6 +16,7 @@ import org.apache.logging.log4j.Logger import org.opensearch.ExceptionsHelper import org.opensearch.OpenSearchException import org.opensearch.action.ActionListener +import org.opensearch.action.admin.indices.alias.Alias import org.opensearch.action.bulk.BackoffPolicy import org.opensearch.action.get.GetResponse import org.opensearch.action.search.SearchResponse @@ -39,6 +40,7 @@ import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.InjectSecurity import org.opensearch.commons.authuser.User import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.model.ISMTemplate import org.opensearch.indexmanagement.indexstatemanagement.model.Policy import org.opensearch.indexmanagement.util.NO_ID @@ -80,6 +82,12 @@ fun XContentParser.instant(): Instant? { } } +fun XContentBuilder.aliasesField(aliases: List): XContentBuilder { + val builder = this.startObject(ShrinkAction.ALIASES_FIELD) + aliases.forEach { it.toXContent(builder, ToXContent.EMPTY_PARAMS) } + return builder.endObject() +} + fun XContentBuilder.optionalTimeField(name: String, instant: Instant?): XContentBuilder { if (instant == null) { return nullField(name) diff --git a/src/main/resources/mappings/opendistro-ism-config.json b/src/main/resources/mappings/opendistro-ism-config.json index 074257bf8..66fee073b 100644 --- a/src/main/resources/mappings/opendistro-ism-config.json +++ b/src/main/resources/mappings/opendistro-ism-config.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 13 + "schema_version": 14 }, "dynamic": "strict", "properties": { @@ -430,6 +430,29 @@ } } }, + "shrink": { + "properties": { + "num_new_shards": { + "type": "integer" + }, + "max_shard_size": { + "type": "keyword" + }, + "percentage_decrease": { + "type": "double" + }, + "target_index_suffix": { + "type": "text" + }, + "aliases": { + "type": "object", + "enabled": false + }, + "force_unsafe": { + "type": "boolean" + } + } + }, "custom": { "enabled": false, "type": "object" @@ -733,6 +756,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } } diff --git a/src/main/resources/mappings/opendistro-ism-history.json b/src/main/resources/mappings/opendistro-ism-history.json index 44c7ab896..ca5a8d8de 100644 --- a/src/main/resources/mappings/opendistro-ism-history.json +++ b/src/main/resources/mappings/opendistro-ism-history.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 4 + "schema_version": 5 }, "dynamic": "strict", "properties": { @@ -108,6 +108,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } } diff --git a/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt b/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt index 3cc3a1bba..fbe64cf8a 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt @@ -27,8 +27,8 @@ import javax.management.remote.JMXServiceURL abstract class IndexManagementRestTestCase : ODFERestTestCase() { - val configSchemaVersion = 13 - val historySchemaVersion = 4 + val configSchemaVersion = 14 + val historySchemaVersion = 5 // Having issues with tests leaking into other tests and mappings being incorrect and they are not caught by any pending task wait check as // they do not go through the pending task queue. Ideally this should probably be written in a way to wait for the diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt index 842ded4a5..99927b6ed 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt @@ -5,10 +5,12 @@ package org.opensearch.indexmanagement.indexstatemanagement +import org.opensearch.action.admin.indices.alias.Alias import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.ToXContent import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.index.RandomCreateIndexGenerator.randomAlias import org.opensearch.index.seqno.SequenceNumbers import org.opensearch.indexmanagement.indexstatemanagement.action.AllocationAction import org.opensearch.indexmanagement.indexstatemanagement.action.CloseAction @@ -22,6 +24,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.action.ReadWriteActio import org.opensearch.indexmanagement.indexstatemanagement.action.ReplicaCountAction import org.opensearch.indexmanagement.indexstatemanagement.action.RolloverAction import org.opensearch.indexmanagement.indexstatemanagement.action.RollupAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.action.SnapshotAction import org.opensearch.indexmanagement.indexstatemanagement.model.ChangePolicy import org.opensearch.indexmanagement.indexstatemanagement.model.Conditions @@ -48,6 +51,11 @@ import org.opensearch.jobscheduler.spi.schedule.IntervalSchedule import org.opensearch.jobscheduler.spi.schedule.Schedule import org.opensearch.script.Script import org.opensearch.script.ScriptType +import org.opensearch.test.OpenSearchTestCase.randomAlphaOfLength +import org.opensearch.test.OpenSearchTestCase.randomBoolean +import org.opensearch.test.OpenSearchTestCase.randomDoubleBetween +import org.opensearch.test.OpenSearchTestCase.randomInt +import org.opensearch.test.OpenSearchTestCase.randomList import org.opensearch.test.rest.OpenSearchRestTestCase import java.time.Instant import java.time.ZoneId @@ -131,6 +139,24 @@ fun randomRolloverActionConfig( ) } +fun randomShrinkAction( + numNewShards: Int? = null, + maxShardSize: ByteSizeValue? = null, + percentageDecrease: Double? = null, + targetIndexSuffix: String? = if (randomBoolean()) randomAlphaOfLength(10) else null, + aliases: List? = if (randomBoolean()) randomList(10) { randomAlias() } else null, + forceUnsafe: Boolean? = if (randomBoolean()) randomBoolean() else null +): ShrinkAction { + if (numNewShards == null && maxShardSize == null && percentageDecrease == null) { + when (randomInt(2)) { + 0 -> return ShrinkAction(randomInt(), null, null, targetIndexSuffix, aliases, forceUnsafe, 0) + 1 -> return ShrinkAction(null, randomByteSizeValue(), null, targetIndexSuffix, aliases, forceUnsafe, 0) + 2 -> return ShrinkAction(null, null, randomDoubleBetween(0.0, 1.0, true), targetIndexSuffix, aliases, forceUnsafe, 0) + } + } + return ShrinkAction(numNewShards, maxShardSize, percentageDecrease, targetIndexSuffix, aliases, forceUnsafe, 0) +} + fun randomReadOnlyActionConfig(): ReadOnlyAction { return ReadOnlyAction(index = 0) } @@ -378,6 +404,11 @@ fun ReadWriteAction.toJsonString(): String { return this.toXContent(builder, ToXContent.EMPTY_PARAMS).string() } +fun ShrinkAction.toJsonString(): String { + val builder = XContentFactory.jsonBuilder() + return this.toXContent(builder, ToXContent.EMPTY_PARAMS).string() +} + fun ReplicaCountAction.toJsonString(): String { val builder = XContentFactory.jsonBuilder() return this.toXContent(builder, ToXContent.EMPTY_PARAMS).string() diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt new file mode 100644 index 000000000..e0b4e8d3e --- /dev/null +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -0,0 +1,389 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.action + +import org.apache.logging.log4j.LogManager +import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.indexmanagement.indexstatemanagement.IndexStateManagementRestTestCase +import org.opensearch.indexmanagement.indexstatemanagement.model.Policy +import org.opensearch.indexmanagement.indexstatemanagement.model.State +import org.opensearch.indexmanagement.indexstatemanagement.randomErrorNotification +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptShrinkStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForShrinkStep +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.waitFor +import java.time.Instant +import java.time.temporal.ChronoUnit +import java.util.Locale + +class ShrinkActionIT : IndexStateManagementRestTestCase() { + private val testIndexName = javaClass.simpleName.toLowerCase(Locale.ROOT) + fun `test basic workflow number of shards`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + + // Create a Policy with one State that only preforms a force_merge Action + val shrinkAction = ShrinkAction( + numNewShards = 1, + maxShardSize = null, + percentageDecrease = null, + targetIndexSuffix = "_shrink_test", + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + shrinkAction.targetIndexSuffix + waitFor { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + val instant: Instant = Instant.ofEpochSecond(50) + waitFor(instant) { + // assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals(Step.StepStatus.COMPLETED, getExplainManagedIndexMetaData(indexName).stepMetaData?.stepStatus) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + } + + fun `test basic workflow max shard size`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + val testMaxShardSize: ByteSizeValue = ByteSizeValue.parseBytesSizeValue("1GB", "test") + // Create a Policy with one State that only preforms a force_merge Action + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = testMaxShardSize, + percentageDecrease = null, + targetIndexSuffix = "_shrink_test", + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + shrinkAction.targetIndexSuffix + waitFor { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(50)) { + assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + } + + fun `test basic workflow percentage decrease`() { + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + // Create a Policy with one State that only preforms a force_merge Action + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = null, + percentageDecrease = 0.5, + targetIndexSuffix = "_shrink_test", + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + shrinkAction.targetIndexSuffix + waitFor { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(50)) { + assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + } + + fun `test allocation block picks correct node`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val nodes = getNodes() + if (nodes.size > 1) { + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + // Create a Policy with one State that only preforms a force_merge Action + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = null, + percentageDecrease = 0.5, + targetIndexSuffix = "_shrink_test", + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + val excludedNode = nodes.iterator().next() + logger.info("Excluded node: $excludedNode") + updateIndexSettings( + indexName, + Settings.builder().put("index.routing.allocation.exclude._name", excludedNode) + ) + insertSampleData(indexName, 3) + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + logger.info("index settings: \n ${getFlatSettings(indexName)}") + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + val targetIndexName = indexName + shrinkAction.targetIndexSuffix + waitFor { + assertEquals( + targetIndexName, + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName + ) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + val nodeName = + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertNotNull("Couldn't find node to shrink onto.", nodeName) + assertNotEquals(nodeName, excludedNode) + val settings = getFlatSettings(indexName) + val nodeToShrink = + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + val nodeToShrink = + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(50)) { + assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + } + } +} diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt index 7397d4d40..1fe960408 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt @@ -28,6 +28,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.randomReadWriteAction import org.opensearch.indexmanagement.indexstatemanagement.randomReplicaCountActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRolloverActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRollupActionConfig +import org.opensearch.indexmanagement.indexstatemanagement.randomShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.randomSnapshotActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomState import org.opensearch.indexmanagement.indexstatemanagement.randomTransition @@ -223,6 +224,14 @@ class XContentTests : OpenSearchTestCase() { assertEquals("Round tripping OpenAction doesn't work", openAction.convertToMap(), parsedOpenAction.convertToMap()) } + fun `test shrink action parsing`() { + val shrinkAction = randomShrinkAction() + val shrinkActionString = shrinkAction.toJsonString() + val parsedShrinkAction = ISMActionsParser.instance.parse(parser(shrinkActionString), 0) + + assertEquals("Round tripping ShrinkAction doesn't work", shrinkAction.convertToMap(), parsedShrinkAction.convertToMap()) + } + fun `test managed index metadata parsing`() { val metadata = ManagedIndexMetaData( index = randomAlphaOfLength(10), diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt index 11e01c6a5..88a3ea9e5 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.close.AttemptClo import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.snapshots.SnapshotInProgressException import org.opensearch.test.OpenSearchTestCase @@ -33,6 +34,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() fun `test close step sets step status to completed when successful`() { val closeIndexResponse = CloseIndexResponse(true, true, listOf()) @@ -41,7 +43,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -55,7 +57,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -69,7 +71,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -83,7 +85,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -97,7 +99,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -111,7 +113,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt index 7b6963dba..dbdd72f72 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.delete.AttemptDe import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.snapshots.SnapshotInProgressException import org.opensearch.test.OpenSearchTestCase @@ -31,6 +32,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() fun `test delete step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) @@ -39,7 +41,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) @@ -82,7 +84,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt index 2ed8669e2..8ff70e89e 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.open.AttemptOpen import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -31,6 +32,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() fun `test open step sets step status to failed when not acknowledged`() { val openIndexResponse = OpenIndexResponse(false, false) @@ -39,7 +41,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt index 796baf8ab..cef20a075 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt @@ -23,6 +23,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.indexpriority.At import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -32,6 +33,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() fun `test set priority step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) @@ -41,7 +43,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -56,7 +58,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -71,7 +73,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) @@ -87,7 +89,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt index c5e589dd9..387d20c95 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt @@ -23,6 +23,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.replicacount.Att import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -32,6 +33,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() fun `test replica step sets step status to failed when not acknowledged`() { val replicaCountResponse = AcknowledgedResponse(false) @@ -41,7 +43,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -56,7 +58,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -71,7 +73,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt index bea3c41ae..11d4faa00 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt @@ -30,6 +30,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionPrope import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.ingest.TestTemplateService.MockTemplateScript +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.rest.RestStatus import org.opensearch.script.ScriptService import org.opensearch.script.TemplateScript @@ -44,6 +45,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { private val settings: Settings = Settings.EMPTY private val snapshotAction = randomSnapshotActionConfig("repo", "snapshot-name") private val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(AttemptSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) + private val jobContext: JobExecutionContext = mock() @Before fun settings() { @@ -58,7 +60,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.ACCEPTED) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.OK) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -76,7 +78,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.INTERNAL_SERVER_ERROR) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -88,7 +90,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -101,7 +103,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -114,7 +116,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -127,7 +129,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt index e30ce2e33..041c72348 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt @@ -37,6 +37,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.rest.RestStatus import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase @@ -60,6 +61,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock { on { state() } doReturn clusterState } private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() private val docsStats: DocsStats = mock() private val primaries: CommonStats = mock { on { getDocs() } doReturn docsStats } @@ -83,7 +85,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, jobContext) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -101,7 +103,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, jobContext) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -119,7 +121,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, jobContext) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt index cfcc73142..625b72c36 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.readonly.SetRead import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -31,6 +32,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() fun `test read only step sets step status to failed when not acknowledged`() { val setReadOnlyResponse = AcknowledgedResponse(false) @@ -39,7 +41,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt index c332a0c90..c5a8ca75a 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.readwrite.SetRea import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -31,6 +32,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() fun `test read write step sets step status to failed when not acknowledged`() { val setReadWriteResponse = AcknowledgedResponse(false) @@ -39,7 +41,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt index 83fd61145..7d0293cfa 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt @@ -18,6 +18,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaD import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import java.time.Instant @@ -41,11 +42,12 @@ class WaitForRollupCompletionStepTests : OpenSearchTestCase() { ) private val client: Client = mock() private val step = WaitForRollupCompletionStep() + private val jobContext: JobExecutionContext = mock() fun `test wait for rollup when missing rollup id`() { val actionMetadata = metadata.actionMetaData!!.copy(actionProperties = ActionProperties()) val metadata = metadata.copy(actionMetaData = actionMetadata) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) val step = WaitForRollupCompletionStep() runBlocking { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt index 8af94805f..437772b14 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt @@ -27,6 +27,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaD import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.script.ScriptService import org.opensearch.snapshots.Snapshot import org.opensearch.snapshots.SnapshotId @@ -38,6 +39,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val jobContext: JobExecutionContext = mock() val snapshot = "snapshot-name" fun `test snapshot missing snapshot name in action properties`() { @@ -48,7 +50,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, emptyActionProperties), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -60,7 +62,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, nullActionProperties), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -80,7 +82,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -92,7 +94,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -104,7 +106,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -116,7 +118,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -128,7 +130,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -147,7 +149,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -162,7 +164,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -177,7 +179,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/resources/mappings/cached-opendistro-ism-config.json b/src/test/resources/mappings/cached-opendistro-ism-config.json index 074257bf8..66fee073b 100644 --- a/src/test/resources/mappings/cached-opendistro-ism-config.json +++ b/src/test/resources/mappings/cached-opendistro-ism-config.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 13 + "schema_version": 14 }, "dynamic": "strict", "properties": { @@ -430,6 +430,29 @@ } } }, + "shrink": { + "properties": { + "num_new_shards": { + "type": "integer" + }, + "max_shard_size": { + "type": "keyword" + }, + "percentage_decrease": { + "type": "double" + }, + "target_index_suffix": { + "type": "text" + }, + "aliases": { + "type": "object", + "enabled": false + }, + "force_unsafe": { + "type": "boolean" + } + } + }, "custom": { "enabled": false, "type": "object" @@ -733,6 +756,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } } diff --git a/src/test/resources/mappings/cached-opendistro-ism-history.json b/src/test/resources/mappings/cached-opendistro-ism-history.json index 44c7ab896..ca5a8d8de 100644 --- a/src/test/resources/mappings/cached-opendistro-ism-history.json +++ b/src/test/resources/mappings/cached-opendistro-ism-history.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 4 + "schema_version": 5 }, "dynamic": "strict", "properties": { @@ -108,6 +108,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } } From db570dd72e3beb7e421102a93abbd6c85be49590 Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Wed, 30 Mar 2022 00:04:16 +0000 Subject: [PATCH 02/13] Refactors shrink action steps and fixes bugs Signed-off-by: Clay Downs --- .../action/ShrinkAction.kt | 43 ++--- .../step/shrink/AttemptMoveShardsStep.kt | 153 ++++++++++-------- .../step/shrink/AttemptShrinkStep.kt | 53 +++--- .../step/shrink/WaitForMoveShardsStep.kt | 62 ++++--- .../step/shrink/WaitForShrinkStep.kt | 51 +++--- .../indexstatemanagement/util/StepUtils.kt | 14 +- .../indexstatemanagement/TestHelpers.kt | 3 +- 7 files changed, 191 insertions(+), 188 deletions(-) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt index 675b74f01..1d35e4901 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -30,36 +30,15 @@ class ShrinkAction( index: Int ) : Action(name, index) { init { - /* The numbers associated with each shard config are all k % mod 3 == 1. - * Because of the % 3 == 1 property, we can check if more than one shard configs are specified by - * modding the sum by 3. Any sum % 3 != 1 is a sum of more than one of the configs and thus invalid. - * We can then check the error message by checking the sum against each unique sum combination. - */ - val maxShardSizeNotNull = if (maxShardSize != null) MAX_SHARD_NOT_NULL else 0 - val percentageDecreaseNotNull = if (percentageDecrease != null) PERCENTAGE_DECREASE_NOT_NULL else 0 - val numNewShardsNotNull = if (numNewShards != null) NUM_SHARDS_NOT_NULL else 0 - val numSet = maxShardSizeNotNull + percentageDecreaseNotNull + numNewShardsNotNull - require(numSet % NUM_SHARD_CONFIGS == 1) { - when (numSet) { - MAX_SHARD_NOT_NULL + PERCENTAGE_DECREASE_NOT_NULL -> - "Cannot specify both maximum shard size and percentage decrease. Please pick one." - MAX_SHARD_NOT_NULL + NUM_SHARDS_NOT_NULL -> - "Cannot specify both maximum shard size and number of new shards. Please pick one." - PERCENTAGE_DECREASE_NOT_NULL + NUM_SHARDS_NOT_NULL -> - "Cannot specify both percentage decrease and number of new shards. Please pick one." - MAX_SHARD_NOT_NULL + PERCENTAGE_DECREASE_NOT_NULL + NUM_SHARDS_NOT_NULL -> - "Cannot specify maximum shard size, percentage decrease, and number of new shards. Please pick one." - // Never executes this code block. - else -> "" - } - } - if (percentageDecreaseNotNull != 0) { - require(percentageDecrease!!.compareTo(0.0) == 1 && percentageDecrease.compareTo(1.0) == -1) { - "Percentage decrease must be between 0.0 and 1.0 exclusively" - } - } - if (maxShardSizeNotNull != 0) { - require(maxShardSize!!.bytes > 0) { "The max_shard_size must be greater than 0." } + val numSet = arrayOf(maxShardSize != null, percentageDecrease != null, numNewShards != null).count { it } + require(numSet == 1) { "Exactly one option specifying the number of shards to shrink to must be used." } + + if (maxShardSize != null) { + require(maxShardSize.bytes > 0) { "Shrink action maxShardSize must be greater than 0." } + } else if (percentageDecrease != null) { + require(percentageDecrease > 0.0 && percentageDecrease < 1.0) { "Percentage decrease must be between 0.0 and 1.0 exclusively" } + } else if (numNewShards != null) { + require(numNewShards > 0) { "Shrink action numNewShards must be greater than 0." } } } @@ -131,9 +110,5 @@ class ShrinkAction( const val TARGET_INDEX_SUFFIX_FIELD = "target_index_suffix" const val ALIASES_FIELD = "aliases" const val FORCE_UNSAFE_FIELD = "force_unsafe" - const val MAX_SHARD_NOT_NULL = 1 - const val PERCENTAGE_DECREASE_NOT_NULL = 4 - const val NUM_SHARDS_NOT_NULL = 7 - const val NUM_SHARD_CONFIGS = 3 } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index f28363bfc..2d849be93 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -6,8 +6,6 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager -import org.opensearch.action.admin.cluster.health.ClusterHealthRequest -import org.opensearch.action.admin.cluster.health.ClusterHealthResponse import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse import org.opensearch.action.admin.cluster.reroute.ClusterRerouteRequest @@ -17,13 +15,13 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand -import org.opensearch.cluster.routing.allocation.decider.Decision +import org.opensearch.cluster.service.ClusterService import org.opensearch.common.collect.Tuple import org.opensearch.common.settings.Settings import org.opensearch.common.unit.ByteSizeValue import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime -import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockModel +import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step @@ -40,7 +38,6 @@ import java.time.Duration import java.time.Instant import java.util.PriorityQueue import kotlin.collections.ArrayList -import kotlin.collections.HashMap import kotlin.collections.HashSet import kotlin.math.ceil import kotlin.math.floor @@ -60,42 +57,27 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val client = context.client val indexName = context.metadata.index try { - checkTimeOut(context.metadata) - // check whether the target index name is available. - val indexNameSuffix = action.targetIndexSuffix ?: DEFAULT_TARGET_SUFFIX - val shrinkTargetIndexName = indexName + indexNameSuffix - val indexExists = context.clusterService.state().metadata.indices.containsKey(shrinkTargetIndexName) - if (indexExists) { - info = mapOf("message" to getIndexExistsMessage(shrinkTargetIndexName)) - stepStatus = StepStatus.FAILED - return this - } + if (actionTimedOut(context.metadata)) return this + + val shrinkTargetIndexName = indexName + (action.targetIndexSuffix ?: DEFAULT_TARGET_SUFFIX) + if (targetIndexNameExists(context.clusterService, shrinkTargetIndexName)) return this - // get cluster health - val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() - val response: ClusterHealthResponse = client.admin().cluster().suspendUntil { health(healthReq, it) } - // check status of cluster health - if (response.isTimedOut) { + if (!isIndexGreen(client, indexName)) { info = mapOf("message" to FAILURE_MESSAGE) stepStatus = StepStatus.CONDITION_NOT_MET return this } - // force_unsafe check - val numReplicas = context.clusterService.state().metadata.indices[indexName].numberOfReplicas - val shouldFailForceUnsafeCheck = numReplicas == 0 && ((action.forceUnsafe != null && !action.forceUnsafe) || (action.forceUnsafe == null)) - if (shouldFailForceUnsafeCheck) { - info = mapOf("message" to UNSAFE_FAILURE_MESSAGE) - stepStatus = StepStatus.FAILED - return this - } - // Get the number of primary shards in the index -- all will be active because index health is green + if (shouldFailUnsafe(context.clusterService, indexName)) return this + + // Fail if there is only one primary shard, as that cannot be shrunk val numOriginalShards = context.clusterService.state().metadata.indices[indexName].numberOfShards if (numOriginalShards == 1) { info = mapOf("message" to ONE_PRIMARY_SHARD_FAILURE_MESSAGE) stepStatus = StepStatus.FAILED return this } + // Get the size of the index val statsRequest = IndicesStatsRequest().indices(indexName) val statsResponse: IndicesStatsResponse = client.admin().indices().suspendUntil { @@ -109,11 +91,10 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } val indexSize = statsStore.sizeInBytes - // get the number of shards that the target index will have val numTargetShards = getNumTargetShards(numOriginalShards, indexSize) - // get the nodes with enough memory + // get the nodes with enough memory in increasing order of free space val suitableNodes = findSuitableNodes(context, statsResponse, indexSize, bufferPercentage, numOriginalShards) - // iterate through the nodes and try to acquire a lock on those nodes + // iterate through the nodes and try to acquire a lock on one val lock = acquireLockOnNode(context.jobContext, suitableNodes) if (lock == null) { logger.info("$indexName could not find available node to shrink onto.") @@ -121,7 +102,6 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { stepStatus = StepStatus.CONDITION_NOT_MET return this } - // move the shards val nodeName = lock.resource[RESOURCE_NAME] as String shrinkActionProperties = ShrinkActionProperties( nodeName, @@ -131,7 +111,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { lock.seqNo, lock.lockTime.epochSecond ) - setToReadOnlyAndMoveIndexToNode(context, nodeName) + setToReadOnlyAndMoveIndexToNode(context, nodeName, lock) info = mapOf("message" to getSuccessMessage(nodeName)) stepStatus = StepStatus.COMPLETED return this @@ -142,67 +122,88 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } } - private suspend fun setToReadOnlyAndMoveIndexToNode(stepContext: StepContext, node: String) { + /* + * Returns whether the action should fail due to being unsafe. The action is unsafe if there are no replicas. If forceUnsafe + * is set, then this always returns false. + */ + private fun shouldFailUnsafe(clusterService: ClusterService, indexName: String): Boolean { + // If forceUnsafe is set and is true, then we don't even need to check the number of replicas + if (action.forceUnsafe == true) return false + val numReplicas = clusterService.state().metadata.indices[indexName].numberOfReplicas + val shouldFailForceUnsafeCheck = numReplicas == 0 + if (shouldFailForceUnsafeCheck) { + info = mapOf("message" to UNSAFE_FAILURE_MESSAGE) + stepStatus = StepStatus.FAILED + return true + } + return false + } + + private fun targetIndexNameExists(clusterService: ClusterService, shrinkTargetIndexName: String): Boolean { + val indexExists = clusterService.state().metadata.indices.containsKey(shrinkTargetIndexName) + if (indexExists) { + info = mapOf("message" to getIndexExistsMessage(shrinkTargetIndexName)) + stepStatus = StepStatus.FAILED + return true + } + return false + } + + private suspend fun setToReadOnlyAndMoveIndexToNode(stepContext: StepContext, node: String, lock: LockModel) { val updateSettings = Settings.builder() .put(IndexMetadata.SETTING_BLOCKS_WRITE, true) .put(ROUTING_SETTING, node) .build() - issueUpdateAndUnlockIfFail(stepContext, updateSettings, UPDATE_FAILED_MESSAGE) - } - - private suspend fun issueUpdateAndUnlockIfFail(stepContext: StepContext, settings: Settings, failureMessage: String) { val jobContext = stepContext.jobContext try { - val response: AcknowledgedResponse = issueUpdateSettingsRequest(stepContext.client, stepContext.metadata, settings) + val response: AcknowledgedResponse = issueUpdateSettingsRequest(stepContext.client, stepContext.metadata.index, updateSettings) if (!response.isAcknowledged) { stepStatus = StepStatus.FAILED - info = mapOf("message" to failureMessage) - } - } catch (e: Exception) { - handleException(e, failureMessage) - val copyProperties = shrinkActionProperties - if (copyProperties != null) { - val lock = getShrinkLockModel( - copyProperties.nodeName, - jobContext.jobIndexName, - jobContext.jobId, - copyProperties.lockEpochSecond, - copyProperties.lockPrimaryTerm, - copyProperties.lockSeqNo - ) + info = mapOf("message" to UPDATE_FAILED_MESSAGE) jobContext.lockService.suspendUntil { release(lock, it) } } + } catch (e: Exception) { + stepStatus = StepStatus.FAILED + handleException(e, UPDATE_FAILED_MESSAGE) + jobContext.lockService.suspendUntil { release(lock, it) } } } + /* + * Iterates through each suitable node in order, attempting to acquire a resource lock. Returns the first lock which + * is successfully acquired. + */ private suspend fun acquireLockOnNode(jobContext: JobExecutionContext, suitableNodes: List): LockModel? { - var lock: LockModel? = null for (node in suitableNodes) { - val nodeResourceObject: HashMap = HashMap() - nodeResourceObject[RESOURCE_NAME] = node + val nodeResourceObject = mapOf(RESOURCE_NAME to node) + // TODO CLAY, the lock should be the timeout for all steps, not just one?? val lockTime = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS - lock = jobContext.lockService.suspendUntil { - acquireLockOnResource(jobContext, lockTime, RESOURCE_TYPE, nodeResourceObject as Map?, it) + val lock: LockModel? = jobContext.lockService.suspendUntil { + acquireLockOnResource(jobContext, lockTime, RESOURCE_TYPE, nodeResourceObject, it) } if (lock != null) { return lock } } - return lock + return null } + /* + * Returns the list of node names for nodes with enough space to shrink to, in increasing order of space available + */ @VisibleForTesting @SuppressWarnings("NestedBlockDepth", "ComplexMethod") private suspend fun findSuitableNodes( stepContext: StepContext, indicesStatsResponse: IndicesStatsResponse, - indexSize: Long, + indexSizeInBytes: Long, buffer: Long, numOriginalShards: Int ): List { val nodesStatsReq = NodesStatsRequest().addMetric(OS_METRIC) val nodeStatsResponse: NodesStatsResponse = stepContext.client.admin().cluster().suspendUntil { nodesStats(nodesStatsReq, it) } val nodesList = nodeStatsResponse.nodes + // Sort in increasing order of keys, in our case this is memory left val comparator = kotlin.Comparator { o1: Tuple, o2: Tuple -> o1.v1().compareTo(o2.v1()) } val nodesWithSpace = PriorityQueue(comparator) for (node in nodesList) { @@ -211,7 +212,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val memLeftInNode = osStats.mem.free.bytes val totalNodeMem = osStats.mem.total.bytes val bufferSize = ByteSizeValue(buffer * totalNodeMem) - val requiredBytes = (2 * indexSize) + bufferSize.bytes + val requiredBytes = (2 * indexSizeInBytes) + bufferSize.bytes if (memLeftInNode > requiredBytes) { val memLeftAfterTransfer: Long = memLeftInNode - requiredBytes nodesWithSpace.add(Tuple(memLeftAfterTransfer, node.node.name)) @@ -230,14 +231,13 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } else { val indexName = stepContext.metadata.index val allocationCommand = MoveAllocationCommand(indexName, shardId.id, currentShardNode.name, nodeName) + // Do a dry run to make sure the shard can successfully move to the target node val rerouteRequest = ClusterRerouteRequest().explain(true).dryRun(true).add(allocationCommand) val clusterRerouteResponse: ClusterRerouteResponse = stepContext.client.admin().cluster().suspendUntil { reroute(rerouteRequest, it) } - val filteredExplanations = clusterRerouteResponse.explanations.explanations().filter { - it.decisions().type().equals(Decision.Type.YES) - } - if (filteredExplanations.isNotEmpty()) { + // As there is only a single shard, there should be a single decision. If it is yes, note the shard as moveable + if (clusterRerouteResponse.explanations.yesDecisionMessages.isNotEmpty()) { movableShardIds.add(shardId.id) } } @@ -261,17 +261,24 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } // case where the user specifies a max shard size in the target index val maxShardSizeInBytes = action.maxShardSize!!.bytes - // ensures that numTargetShards is never less than 1 + // ceiling ensures that numTargetShards is never less than 1 val minNumTargetShards = ceil(indexSize / maxShardSizeInBytes.toDouble()).toInt() + // In order to not violate the max shard size condition, this value must be >= minNumTargetShards. + // If that value doesn't exist, numOriginalShards will be returned return getMinFactorGreaterThan(numOriginalShards, minNumTargetShards) } + /* + * Returns the greatest number which is <= k and is a factor of n. In the context of the shrink action, + * n is the original number of shards, k is the attempted number of shards to shrink to. If k is 0, 1 is returned. + */ @SuppressWarnings("ReturnCount") private fun getGreatestFactorLessThan(n: Int, k: Int): Int { if (k >= n) return n + // The bound is set to the floor of the square root of n, or just k, whichever is lower val bound: Int = min(floor(sqrt(n.toDouble())).toInt(), k) var greatestFactor = 1 - for (i in 2..bound + 1) { + for (i in 2..bound) { if (n % i == 0) { val complement: Int = n / i if (complement <= k) { @@ -284,12 +291,16 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { return greatestFactor } + /* + * Returns the smallest number which is >= k and is a factor of n. In the context of the shrink action, + * n is the original number of shards, k is the attempted number of shards to shrink to in a case + */ @SuppressWarnings("ReturnCount") private fun getMinFactorGreaterThan(n: Int, k: Int): Int { if (k >= n) { return n } - for (i in k..n + 1) { + for (i in k..n) { if (n % i == 0) return i } return n @@ -304,14 +315,16 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { info = mutableInfo.toMap() } - private fun checkTimeOut(managedIndexMetadata: ManagedIndexMetaData) { + private fun actionTimedOut(managedIndexMetadata: ManagedIndexMetaData): Boolean { val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS // Get ActionTimeout if given, otherwise use default timeout of 12 hours if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { info = mapOf("message" to TIMEOUT_MESSAGE) stepStatus = StepStatus.FAILED + return true } + return false } override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt index a2ec3e5f4..ca8962b07 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -6,17 +6,18 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager -import org.opensearch.action.admin.cluster.health.ClusterHealthRequest -import org.opensearch.action.admin.cluster.health.ClusterHealthResponse import org.opensearch.action.admin.indices.shrink.ResizeRequest import org.opensearch.action.admin.indices.shrink.ResizeResponse import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.INDEX_NUMBER_OF_SHARDS +import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData import org.opensearch.transport.RemoteTransportException import java.lang.Exception @@ -33,37 +34,19 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { val actionMetadata = context.metadata.actionMetaData val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties if (shrinkActionProperties == null) { - info = mapOf("message" to "Metadata not properly populated") + info = mapOf("message" to "Shrink action properties are null, metadata was not properly populated") stepStatus = StepStatus.FAILED return this } try { - val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() - val response: ClusterHealthResponse = context.client.admin().cluster().suspendUntil { health(healthReq, it) } - // check status of cluster health - if (response.isTimedOut) { + if (!isIndexGreen(context.client, indexName)) { stepStatus = StepStatus.CONDITION_NOT_MET info = mapOf("message" to INDEX_HEALTH_NOT_GREEN_MESSAGE) return this } - val targetIndexName = shrinkActionProperties.targetIndexName - val aliases = action.aliases - val req = ResizeRequest(targetIndexName, indexName) - req.targetIndexRequest.settings( - Settings.builder() - .put(AttemptMoveShardsStep.ROUTING_SETTING, shrinkActionProperties.nodeName) - .put(INDEX_NUMBER_OF_SHARDS, shrinkActionProperties.targetNumShards) - .build() - ) - aliases?.forEach { req.targetIndexRequest.alias(it) } - val resizeResponse: ResizeResponse = context.client.admin().indices().suspendUntil { resizeIndex(req, it) } - if (!resizeResponse.isAcknowledged) { - info = mapOf("message" to FAILURE_MESSAGE) - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - stepStatus = StepStatus.FAILED - return this - } - info = mapOf("message" to getSuccessMessage(targetIndexName)) + // If the resize index api fails, the step will be set to failed and resizeIndex will return false + if (!resizeIndex(indexName, shrinkActionProperties, context)) return this + info = mapOf("message" to getSuccessMessage(shrinkActionProperties.targetIndexName)) stepStatus = StepStatus.COMPLETED return this } catch (e: RemoteTransportException) { @@ -79,6 +62,26 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { } } + private suspend fun resizeIndex(sourceIndex: String, shrinkActionProperties: ShrinkActionProperties, context: StepContext): Boolean { + val targetIndex = shrinkActionProperties.targetIndexName + val req = ResizeRequest(targetIndex, sourceIndex) + req.targetIndexRequest.settings( + Settings.builder() + .put(AttemptMoveShardsStep.ROUTING_SETTING, shrinkActionProperties.nodeName) + .put(INDEX_NUMBER_OF_SHARDS, shrinkActionProperties.targetNumShards) + .build() + ) + action.aliases?.forEach { req.targetIndexRequest.alias(it) } + val resizeResponse: ResizeResponse = context.client.admin().indices().suspendUntil { resizeIndex(req, it) } + if (!resizeResponse.isAcknowledged) { + info = mapOf("message" to FAILURE_MESSAGE) + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + stepStatus = StepStatus.FAILED + return false + } + return true + } + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { val currentActionMetaData = currentMetadata.actionMetaData return currentMetadata.copy( diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt index d2ae8d203..02e4c8d0f 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -9,7 +9,7 @@ import org.apache.logging.log4j.LogManager import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.admin.indices.stats.ShardStats -import org.opensearch.index.shard.ShardId +import org.opensearch.common.collect.ImmutableOpenIntMap import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock @@ -36,7 +36,7 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { val actionMetadata = context.metadata.actionMetaData val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties if (shrinkActionProperties == null) { - info = mapOf("message" to "Metadata not properly populated") + info = mapOf("message" to "Shrink action properties are null, metadata was not properly populated") stepStatus = StepStatus.FAILED return this } @@ -45,38 +45,31 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { val response: IndicesStatsResponse = context.client.admin().indices().suspendUntil { stats(indexStatsRequests, it) } val numPrimaryShards = context.clusterService.state().metadata.indices[indexName].numberOfShards val nodeToMoveOnto = shrinkActionProperties.nodeName + val inSyncAllocations = context.clusterService.state().metadata.indices[indexName].inSyncAllocationIds + val numReplicas = context.clusterService.state().metadata.indices[indexName].numberOfReplicas var numShardsOnNode = 0 - val shardToCheckpointSetMap: MutableMap> = mutableMapOf() + var numShardsInSync = 0 for (shard: ShardStats in response.shards) { - val seqNoStats = shard.seqNoStats val routingInfo = shard.shardRouting - if (seqNoStats != null) { - val checkpoint = seqNoStats.localCheckpoint - val shardId = shard.shardRouting.shardId() - val checkpointsOfShard = shardToCheckpointSetMap.getOrDefault(shardId, mutableSetOf()) - checkpointsOfShard.add(checkpoint) - shardToCheckpointSetMap[shardId] = checkpointsOfShard - } - // TODO: Test if we can make this appear / if we can, fail the action. - shardToCheckpointSetMap.entries.forEach { - (_, checkpointSet) -> - if (checkpointSet.size > 1) { - logger.warn("There are shards with varying local checkpoints") - } - } val nodeIdShardIsOn = routingInfo.currentNodeId() - val nodeShardIsOn = context.clusterService.state().nodes()[nodeIdShardIsOn].name - if (nodeShardIsOn.equals(nodeToMoveOnto) && routingInfo.started()) { - numShardsOnNode++ + val nodeNameShardIsOn = context.clusterService.state().nodes()[nodeIdShardIsOn].name + if (routingInfo.primary()) { + if (nodeNameShardIsOn.equals(nodeToMoveOnto) && routingInfo.started()) { + numShardsOnNode++ + } + if (numReplicas == 0 || inSyncReplicaExists(routingInfo.id, inSyncAllocations)) { + numShardsInSync++ + } } } - if (numShardsOnNode >= numPrimaryShards) { + if (numShardsOnNode >= numPrimaryShards && numShardsInSync >= numPrimaryShards) { info = mapOf("message" to getSuccessMessage(nodeToMoveOnto)) stepStatus = StepStatus.COMPLETED - return this + } else { + val numShardsNotOnNode = numPrimaryShards - numShardsOnNode + val numShardsNotInSync = numPrimaryShards - numShardsInSync + checkTimeOut(context, shrinkActionProperties, numShardsNotOnNode, numShardsNotInSync, nodeToMoveOnto) } - val numShardsLeft = numPrimaryShards - numShardsOnNode - checkTimeOut(context, shrinkActionProperties, numShardsLeft, nodeToMoveOnto) return this } catch (e: RemoteTransportException) { releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) @@ -91,6 +84,8 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { } } + private fun inSyncReplicaExists(shardId: Int, inSyncAllocations: ImmutableOpenIntMap>): Boolean = inSyncAllocations[shardId].size > 1 + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { // Saving maxNumSegments in ActionProperties after the force merge operation has begun so that if a ChangePolicy occurred // in between this step and WaitForForceMergeStep, a cached segment count expected from the operation is available @@ -106,18 +101,19 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { private suspend fun checkTimeOut( stepContext: StepContext, shrinkActionProperties: ShrinkActionProperties, - numShardsLeft: Int, + numShardsNotOnNode: Int, + numShardsNotInSync: Int, nodeToMoveOnto: String ) { val managedIndexMetadata = stepContext.metadata val indexName = managedIndexMetadata.index - val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) + val timeSinceActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS // Get ActionTimeout if given, otherwise use default timeout of 12 hours - stepStatus = if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { - logger.debug( - "Move shards failing on [$indexName] because" + - " [$numShardsLeft] shards still needing to be moved" + stepStatus = if (timeSinceActionStarted.toSeconds() > timeOutInSeconds) { + logger.error( + "Shrink Action move shards failed on [$indexName], the action timed out with [$numShardsNotOnNode] shards not yet " + + "moved and [$numShardsNotInSync] shards without an in sync replica." ) if (managedIndexMetadata.actionMetaData?.actionProperties?.shrinkActionProperties != null) { releaseShrinkLock(shrinkActionProperties, stepContext.jobContext, logger) @@ -126,8 +122,8 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { StepStatus.FAILED } else { logger.debug( - "Move shards still running on [$indexName] with" + - " [$numShardsLeft] shards still needing to be moved" + "Shrink action move shards step running on [$indexName], [$numShardsNotOnNode] shards need to be moved, " + + "[$numShardsNotInSync] shards need an in sync replica." ) info = mapOf("message" to getTimeoutDelay(nodeToMoveOnto)) StepStatus.CONDITION_NOT_MET diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt index ec979a47d..554c76be2 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -6,11 +6,10 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager -import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse -import org.opensearch.action.admin.indices.stats.ShardStats import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.client.Client import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime @@ -37,35 +36,23 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { val actionMetadata = context.metadata.actionMetaData val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties if (shrinkActionProperties == null) { - info = mapOf("message" to "Metadata not properly populated") + info = mapOf("message" to "Shrink action properties are null, metadata was not properly populated") stepStatus = StepStatus.FAILED return this } try { val targetIndex = shrinkActionProperties.targetIndexName - val targetIndexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(targetIndex) - val targetStatsResponse: IndicesStatsResponse = context.client.admin().indices().suspendUntil { stats(targetIndexStatsRequests, it) } - var numShardsStarted = 0 - for (shard: ShardStats in targetStatsResponse.shards) { - if (shard.shardRouting.started()) { - numShardsStarted++ - } - } - if (numShardsStarted < shrinkActionProperties.targetNumShards) { + val numPrimaryShardsStarted = getNumPrimaryShardsStarted(context.client, targetIndex) + val numPrimaryShards = context.clusterService.state().metadata.indices[targetIndex].numberOfShards + if (numPrimaryShards != shrinkActionProperties.targetNumShards || numPrimaryShardsStarted != shrinkActionProperties.targetNumShards) { checkTimeOut(context, shrinkActionProperties, targetIndex) return this } - val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).build() - val response: AcknowledgedResponse = context.client.admin().indices().suspendUntil { - updateSettings(UpdateSettingsRequest(allocationSettings, targetIndex), it) - } - if (!response.isAcknowledged) { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - stepStatus = StepStatus.FAILED - info = mapOf("message" to getFailureMessage(targetIndex)) - return this - } - issueUpdateSettingsRequest(context.client, context.metadata, allocationSettings) + + // Clear source and target allocation, if either fails the step will be set to failed and the function will return false + if (!clearAllocationSettings(context, targetIndex, shrinkActionProperties)) return this + if (!clearAllocationSettings(context, context.metadata.index, shrinkActionProperties)) return this + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) stepStatus = StepStatus.COMPLETED info = mapOf("message" to SUCCESS_MESSAGE) @@ -83,6 +70,24 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { } } + private suspend fun clearAllocationSettings(context: StepContext, index: String, shrinkActionProperties: ShrinkActionProperties): Boolean { + val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).build() + val response: AcknowledgedResponse = issueUpdateSettingsRequest(context.client, index, allocationSettings) + if (!response.isAcknowledged) { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + stepStatus = StepStatus.FAILED + info = mapOf("message" to getFailureMessage(index)) + return false + } + return true + } + + private suspend fun getNumPrimaryShardsStarted(client: Client, targetIndex: String): Int { + val targetIndexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(targetIndex) + val targetStatsResponse: IndicesStatsResponse = client.admin().indices().suspendUntil { stats(targetIndexStatsRequests, it) } + return targetStatsResponse.shards.filter { it.shardRouting.started() && it.shardRouting.primary() }.size + } + private suspend fun checkTimeOut(stepContext: StepContext, shrinkActionProperties: ShrinkActionProperties, targetIndex: String) { val managedIndexMetadata = stepContext.metadata val indexName = managedIndexMetadata.index diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt index 09ad517c4..2c9c506b2 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -6,6 +6,8 @@ package org.opensearch.indexmanagement.indexstatemanagement.util import org.apache.logging.log4j.Logger +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client @@ -18,10 +20,10 @@ import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.jobscheduler.spi.LockModel import java.time.Instant -suspend fun issueUpdateSettingsRequest(client: Client, managedIndexMetaData: ManagedIndexMetaData, settings: Settings): AcknowledgedResponse { +suspend fun issueUpdateSettingsRequest(client: Client, indexName: String, settings: Settings): AcknowledgedResponse { return client.admin() .indices() - .suspendUntil { updateSettings(UpdateSettingsRequest(settings, managedIndexMetaData.index), it) } + .suspendUntil { updateSettings(UpdateSettingsRequest(settings, indexName), it) } } suspend fun releaseShrinkLock( @@ -81,3 +83,11 @@ fun getActionStartTime(managedIndexMetaData: ManagedIndexMetaData): Instant { actionMetadata?.startTime?.let { return Instant.ofEpochMilli(it) } return Instant.now() } + +suspend fun isIndexGreen(client: Client, indexName: String): Boolean { + // get index health, waiting for a green status + val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() + val response: ClusterHealthResponse = client.admin().cluster().suspendUntil { health(healthReq, it) } + // The request was set to wait for green index, if the request timed out, the index never was green + return !response.isTimedOut +} diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt index 99927b6ed..13d777b15 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt @@ -60,6 +60,7 @@ import org.opensearch.test.rest.OpenSearchRestTestCase import java.time.Instant import java.time.ZoneId import java.time.temporal.ChronoUnit +import kotlin.math.abs fun randomPolicy( id: String = OpenSearchRestTestCase.randomAlphaOfLength(10), @@ -149,7 +150,7 @@ fun randomShrinkAction( ): ShrinkAction { if (numNewShards == null && maxShardSize == null && percentageDecrease == null) { when (randomInt(2)) { - 0 -> return ShrinkAction(randomInt(), null, null, targetIndexSuffix, aliases, forceUnsafe, 0) + 0 -> return ShrinkAction(abs(randomInt()) + 1, null, null, targetIndexSuffix, aliases, forceUnsafe, 0) 1 -> return ShrinkAction(null, randomByteSizeValue(), null, targetIndexSuffix, aliases, forceUnsafe, 0) 2 -> return ShrinkAction(null, null, randomDoubleBetween(0.0, 1.0, true), targetIndexSuffix, aliases, forceUnsafe, 0) } From 4c7ef20075f20425a8e3485e61a159bf30b39c0d Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Wed, 30 Mar 2022 23:16:38 +0000 Subject: [PATCH 03/13] Adds additional validation and no-op on single shard Signed-off-by: Clay Downs --- .../action/ShrinkAction.kt | 14 ++-- .../action/ShrinkActionParser.kt | 12 +-- .../step/shrink/AttemptMoveShardsStep.kt | 84 +++++++++++-------- .../indexstatemanagement/util/StepUtils.kt | 19 +++++ .../mappings/opendistro-ism-config.json | 2 +- .../indexstatemanagement/TestHelpers.kt | 6 +- .../action/ShrinkActionIT.kt | 68 +++++++++++++-- .../indexstatemanagement/model/ActionTests.kt | 32 +++++++ .../cached-opendistro-ism-config.json | 2 +- 9 files changed, 181 insertions(+), 58 deletions(-) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt index 1d35e4901..4af08c345 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -23,20 +23,20 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext class ShrinkAction( val numNewShards: Int?, val maxShardSize: ByteSizeValue?, - val percentageDecrease: Double?, + val percentageOfSourceShards: Double?, val targetIndexSuffix: String?, val aliases: List?, val forceUnsafe: Boolean?, index: Int ) : Action(name, index) { init { - val numSet = arrayOf(maxShardSize != null, percentageDecrease != null, numNewShards != null).count { it } + val numSet = arrayOf(maxShardSize != null, percentageOfSourceShards != null, numNewShards != null).count { it } require(numSet == 1) { "Exactly one option specifying the number of shards to shrink to must be used." } if (maxShardSize != null) { require(maxShardSize.bytes > 0) { "Shrink action maxShardSize must be greater than 0." } - } else if (percentageDecrease != null) { - require(percentageDecrease > 0.0 && percentageDecrease < 1.0) { "Percentage decrease must be between 0.0 and 1.0 exclusively" } + } else if (percentageOfSourceShards != null) { + require(percentageOfSourceShards > 0.0 && percentageOfSourceShards < 1.0) { "Percentage of source shards must be between 0.0 and 1.0 exclusively" } } else if (numNewShards != null) { require(numNewShards > 0) { "Shrink action numNewShards must be greater than 0." } } @@ -80,7 +80,7 @@ class ShrinkAction( builder.startObject(type) if (numNewShards != null) builder.field(NUM_NEW_SHARDS_FIELD, numNewShards) if (maxShardSize != null) builder.field(MAX_SHARD_SIZE_FIELD, maxShardSize.stringRep) - if (percentageDecrease != null) builder.field(PERCENTAGE_DECREASE_FIELD, percentageDecrease) + if (percentageOfSourceShards != null) builder.field(PERCENTAGE_OF_SOURCE_SHARDS_FIELD, percentageOfSourceShards) if (targetIndexSuffix != null) builder.field(TARGET_INDEX_SUFFIX_FIELD, targetIndexSuffix) if (aliases != null) { builder.aliasesField(aliases) } if (forceUnsafe != null) builder.field(FORCE_UNSAFE_FIELD, forceUnsafe) @@ -90,7 +90,7 @@ class ShrinkAction( override fun populateAction(out: StreamOutput) { out.writeOptionalInt(numNewShards) out.writeOptionalWriteable(maxShardSize) - out.writeOptionalDouble(percentageDecrease) + out.writeOptionalDouble(percentageOfSourceShards) out.writeOptionalString(targetIndexSuffix) if (aliases != null) { out.writeBoolean(true) @@ -105,7 +105,7 @@ class ShrinkAction( companion object { const val name = "shrink" const val NUM_NEW_SHARDS_FIELD = "num_new_shards" - const val PERCENTAGE_DECREASE_FIELD = "percentage_decrease" + const val PERCENTAGE_OF_SOURCE_SHARDS_FIELD = "percentage_of_source_shards" const val MAX_SHARD_SIZE_FIELD = "max_shard_size" const val TARGET_INDEX_SUFFIX_FIELD = "target_index_suffix" const val ALIASES_FIELD = "aliases" diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt index 1e99b4c01..f5b7d9d99 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt @@ -14,7 +14,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.C import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.FORCE_UNSAFE_FIELD import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.MAX_SHARD_SIZE_FIELD import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.NUM_NEW_SHARDS_FIELD -import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.PERCENTAGE_DECREASE_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.PERCENTAGE_OF_SOURCE_SHARDS_FIELD import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.TARGET_INDEX_SUFFIX_FIELD import org.opensearch.indexmanagement.spi.indexstatemanagement.Action import org.opensearch.indexmanagement.spi.indexstatemanagement.ActionParser @@ -23,20 +23,20 @@ class ShrinkActionParser : ActionParser() { override fun fromStreamInput(sin: StreamInput): Action { val numNewShards = sin.readOptionalInt() val maxShardSize = sin.readOptionalWriteable(::ByteSizeValue) - val percentageDecrease = sin.readOptionalDouble() + val percentageOfSourceShards = sin.readOptionalDouble() val targetIndexSuffix = sin.readOptionalString() val aliases = if (sin.readBoolean()) sin.readList(::Alias) else null val forceUnsafe = sin.readOptionalBoolean() val index = sin.readInt() - return ShrinkAction(numNewShards, maxShardSize, percentageDecrease, targetIndexSuffix, aliases, forceUnsafe, index) + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexSuffix, aliases, forceUnsafe, index) } @Suppress("NestedBlockDepth") override fun fromXContent(xcp: XContentParser, index: Int): Action { var numNewShards: Int? = null var maxShardSize: ByteSizeValue? = null - var percentageDecrease: Double? = null + var percentageOfSourceShards: Double? = null var targetIndexSuffix: String? = null var aliases: List? = null var forceUnsafe: Boolean? = null @@ -49,7 +49,7 @@ class ShrinkActionParser : ActionParser() { when (fieldName) { NUM_NEW_SHARDS_FIELD -> numNewShards = xcp.intValue() MAX_SHARD_SIZE_FIELD -> maxShardSize = ByteSizeValue.parseBytesSizeValue(xcp.textOrNull(), MAX_SHARD_SIZE_FIELD) - PERCENTAGE_DECREASE_FIELD -> percentageDecrease = xcp.doubleValue() + PERCENTAGE_OF_SOURCE_SHARDS_FIELD -> percentageOfSourceShards = xcp.doubleValue() TARGET_INDEX_SUFFIX_FIELD -> targetIndexSuffix = xcp.textOrNull() ALIASES_FIELD -> { if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { @@ -69,7 +69,7 @@ class ShrinkActionParser : ActionParser() { } } - return ShrinkAction(numNewShards, maxShardSize, percentageDecrease, targetIndexSuffix, aliases, forceUnsafe, index) + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexSuffix, aliases, forceUnsafe, index) } override fun getActionType(): String { diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 2d849be93..9f6ba0ec7 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -18,9 +18,10 @@ import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand import org.opensearch.cluster.service.ClusterService import org.opensearch.common.collect.Tuple import org.opensearch.common.settings.Settings -import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.index.shard.DocsStats import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime +import org.opensearch.indexmanagement.indexstatemanagement.util.getFreeBytesThresholdHigh import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest import org.opensearch.indexmanagement.opensearchapi.suspendUntil @@ -38,7 +39,6 @@ import java.time.Duration import java.time.Instant import java.util.PriorityQueue import kotlin.collections.ArrayList -import kotlin.collections.HashSet import kotlin.math.ceil import kotlin.math.floor import kotlin.math.min @@ -72,9 +72,10 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { // Fail if there is only one primary shard, as that cannot be shrunk val numOriginalShards = context.clusterService.state().metadata.indices[indexName].numberOfShards + // if (numOriginalShards == 1) { - info = mapOf("message" to ONE_PRIMARY_SHARD_FAILURE_MESSAGE) - stepStatus = StepStatus.FAILED + info = mapOf("message" to ONE_PRIMARY_SHARD_MESSAGE) + stepStatus = StepStatus.COMPLETED return this } @@ -84,16 +85,19 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { stats(statsRequest, it) } val statsStore = statsResponse.total.store - if (statsStore == null) { + val statsDocs = statsResponse.total.docs + if (statsStore == null || statsDocs == null) { info = mapOf("message" to FAILURE_MESSAGE) stepStatus = StepStatus.FAILED return this } val indexSize = statsStore.sizeInBytes - val numTargetShards = getNumTargetShards(numOriginalShards, indexSize) + + if (shouldFailTooManyDocuments(statsDocs, numTargetShards)) return this + // get the nodes with enough memory in increasing order of free space - val suitableNodes = findSuitableNodes(context, statsResponse, indexSize, bufferPercentage, numOriginalShards) + val suitableNodes = findSuitableNodes(context, statsResponse, indexSize) // iterate through the nodes and try to acquire a lock on one val lock = acquireLockOnNode(context.jobContext, suitableNodes) if (lock == null) { @@ -122,6 +126,19 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } } + private fun shouldFailTooManyDocuments(docsStats: DocsStats, numTargetShards: Int): Boolean { + val totalDocs: Long = docsStats.count + val docsPerTargetShard: Long = totalDocs / numTargetShards + // The maximum number of documents per shard is 2^31 + val maximumDocsPerShard = 0x80000000 + if (docsPerTargetShard > maximumDocsPerShard) { + info = mapOf("message" to TOO_MANY_DOCS_FAILURE_MESSAGE) + stepStatus = StepStatus.FAILED + return true + } + return false + } + /* * Returns whether the action should fail due to being unsafe. The action is unsafe if there are no replicas. If forceUnsafe * is set, then this always returns false. @@ -196,9 +213,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { private suspend fun findSuitableNodes( stepContext: StepContext, indicesStatsResponse: IndicesStatsResponse, - indexSizeInBytes: Long, - buffer: Long, - numOriginalShards: Int + indexSizeInBytes: Long ): List { val nodesStatsReq = NodesStatsRequest().addMetric(OS_METRIC) val nodeStatsResponse: NodesStatsResponse = stepContext.client.admin().cluster().suspendUntil { nodesStats(nodesStatsReq, it) } @@ -211,8 +226,9 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { if (osStats != null) { val memLeftInNode = osStats.mem.free.bytes val totalNodeMem = osStats.mem.total.bytes - val bufferSize = ByteSizeValue(buffer * totalNodeMem) - val requiredBytes = (2 * indexSizeInBytes) + bufferSize.bytes + val freeBytesThresholdHigh = getFreeBytesThresholdHigh(stepContext.settings, stepContext.clusterService.clusterSettings, totalNodeMem) + // We require that a node has enough space to be below the high watermark disk level with an additional 2 * the index size free + val requiredBytes = (2 * indexSizeInBytes) + freeBytesThresholdHigh if (memLeftInNode > requiredBytes) { val memLeftAfterTransfer: Long = memLeftInNode - requiredBytes nodesWithSpace.add(Tuple(memLeftAfterTransfer, node.node.name)) @@ -220,29 +236,21 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } } val suitableNodes: ArrayList = ArrayList() + // For each node, do a dry run of moving all shards to the node to make sure there is enough space. + // This should be rejected if allocation puts it above the low disk watermark setting for (sizeNodeTuple in nodesWithSpace) { val nodeName = sizeNodeTuple.v2() - val movableShardIds = HashSet() + val indexName = stepContext.metadata.index + val clusterRerouteRequest = ClusterRerouteRequest().explain(true).dryRun(true) for (shard in indicesStatsResponse.shards) { val shardId = shard.shardRouting.shardId() val currentShardNode = stepContext.clusterService.state().nodes[shard.shardRouting.currentNodeId()] - if (currentShardNode.name.equals(nodeName)) { - movableShardIds.add(shardId.id) - } else { - val indexName = stepContext.metadata.index - val allocationCommand = MoveAllocationCommand(indexName, shardId.id, currentShardNode.name, nodeName) - // Do a dry run to make sure the shard can successfully move to the target node - val rerouteRequest = ClusterRerouteRequest().explain(true).dryRun(true).add(allocationCommand) - - val clusterRerouteResponse: ClusterRerouteResponse = - stepContext.client.admin().cluster().suspendUntil { reroute(rerouteRequest, it) } - // As there is only a single shard, there should be a single decision. If it is yes, note the shard as moveable - if (clusterRerouteResponse.explanations.yesDecisionMessages.isNotEmpty()) { - movableShardIds.add(shardId.id) - } - } + clusterRerouteRequest.add(MoveAllocationCommand(indexName, shardId.id, currentShardNode.name, nodeName)) } - if (movableShardIds.size >= numOriginalShards) { + val clusterRerouteResponse: ClusterRerouteResponse = + stepContext.client.admin().cluster().suspendUntil { reroute(clusterRerouteRequest, it) } + // Should be the same number of yes decisions as the number of primary shards + if (clusterRerouteResponse.explanations.yesDecisionMessages.size == indicesStatsResponse.shards.size) { suitableNodes.add(sizeNodeTuple.v2()) } } @@ -254,9 +262,9 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { // case where user specifies a certain number of shards in the target index if (action.numNewShards != null) return getGreatestFactorLessThan(numOriginalShards, action.numNewShards) - // case where user specifies a percentage decrease in the number of shards in the target index - if (action.percentageDecrease != null) { - val numTargetShards = floor((action.percentageDecrease) * numOriginalShards).toInt() + // case where user specifies a percentage of source shards to shrink to in the number of shards in the target index + if (action.percentageOfSourceShards != null) { + val numTargetShards = floor((action.percentageOfSourceShards) * numOriginalShards).toInt() return getGreatestFactorLessThan(numOriginalShards, numTargetShards) } // case where the user specifies a max shard size in the target index @@ -329,13 +337,19 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { val currentActionMetaData = currentMetadata.actionMetaData + // If we succeeded because there was only one source primary shard, we no-op by skipping to the last step + val stepMetaData = if (info?.get("message") == ONE_PRIMARY_SHARD_MESSAGE) { + StepMetaData(WaitForShrinkStep.name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus) + } else { + StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus) + } return currentMetadata.copy( actionMetaData = currentActionMetaData?.copy( actionProperties = ActionProperties( shrinkActionProperties = shrinkActionProperties ) ), - stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + stepMetaData = stepMetaData, transitionTo = null, info = info ) @@ -348,7 +362,6 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { const val ROUTING_SETTING = "index.routing.allocation.require._name" const val RESOURCE_NAME = "node_name" const val DEFAULT_TARGET_SUFFIX = "_shrunken" - const val bufferPercentage = 0.05.toLong() const val MOVE_SHARDS_TIMEOUT_IN_SECONDS = 43200L // 12hrs in seconds const val name = "attempt_move_shards_step" const val RESOURCE_TYPE = "shrink" @@ -357,7 +370,8 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { const val NO_AVAILABLE_NODES_MESSAGE = "There are no available nodes for to move to to execute a shrink. Delaying until node becomes available." const val UNSAFE_FAILURE_MESSAGE = "Shrink failed because index has no replicas and force_unsafe is not set to true." - const val ONE_PRIMARY_SHARD_FAILURE_MESSAGE = "Shrink failed because index only has one primary shard." + const val ONE_PRIMARY_SHARD_MESSAGE = "Shrink action did not do anything because source index only has one primary shard." + const val TOO_MANY_DOCS_FAILURE_MESSAGE = "Shrink failed because there would be too many documents on each target shard following the shrink." const val FAILURE_MESSAGE = "Shrink failed to start moving shards." fun getSuccessMessage(node: String) = "Successfully started moving the shards to $node." fun getIndexExistsMessage(newIndex: String) = "Shrink failed because $newIndex already exists." diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt index 2c9c506b2..6b9609add 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -11,6 +11,8 @@ import org.opensearch.action.admin.cluster.health.ClusterHealthResponse import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client +import org.opensearch.cluster.routing.allocation.DiskThresholdSettings +import org.opensearch.common.settings.ClusterSettings import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep import org.opensearch.indexmanagement.opensearchapi.suspendUntil @@ -84,6 +86,23 @@ fun getActionStartTime(managedIndexMetaData: ManagedIndexMetaData): Instant { return Instant.now() } +/* + * For disk threshold, if the values are set as a percentage, the percent parameter will return a value and the bytes + * parameter will return 0, and vice versa for when the values are set as bytes. This method provides a single place to + * parse either and get the byte value back. + */ +fun getFreeBytesThresholdHigh(settings: Settings, clusterSettings: ClusterSettings?, totalNodeBytes: Long): Long { + val diskThresholdSettings = DiskThresholdSettings(settings, clusterSettings) + // Depending on how a user provided input, this setting may be a percentage or byte value + val diskThresholdPercent = diskThresholdSettings.freeDiskThresholdHigh + val diskThresholdBytes = diskThresholdSettings.freeBytesThresholdHigh + // If the disk threshold is set as a percentage, use it and convert it to bytes. If + return if (diskThresholdPercent > 0.001) { + // If the user set value is 95%, diskThresholdPercent will be returned as 5% from the DiskThresholdSettings object + ((diskThresholdPercent / 100) * totalNodeBytes).toLong() + } else diskThresholdBytes.bytes +} + suspend fun isIndexGreen(client: Client, indexName: String): Boolean { // get index health, waiting for a green status val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() diff --git a/src/main/resources/mappings/opendistro-ism-config.json b/src/main/resources/mappings/opendistro-ism-config.json index 66fee073b..5bc299417 100644 --- a/src/main/resources/mappings/opendistro-ism-config.json +++ b/src/main/resources/mappings/opendistro-ism-config.json @@ -438,7 +438,7 @@ "max_shard_size": { "type": "keyword" }, - "percentage_decrease": { + "percentage_of_source_shards": { "type": "double" }, "target_index_suffix": { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt index 13d777b15..3dd516863 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt @@ -143,19 +143,19 @@ fun randomRolloverActionConfig( fun randomShrinkAction( numNewShards: Int? = null, maxShardSize: ByteSizeValue? = null, - percentageDecrease: Double? = null, + percentageOfSourceShards: Double? = null, targetIndexSuffix: String? = if (randomBoolean()) randomAlphaOfLength(10) else null, aliases: List? = if (randomBoolean()) randomList(10) { randomAlias() } else null, forceUnsafe: Boolean? = if (randomBoolean()) randomBoolean() else null ): ShrinkAction { - if (numNewShards == null && maxShardSize == null && percentageDecrease == null) { + if (numNewShards == null && maxShardSize == null && percentageOfSourceShards == null) { when (randomInt(2)) { 0 -> return ShrinkAction(abs(randomInt()) + 1, null, null, targetIndexSuffix, aliases, forceUnsafe, 0) 1 -> return ShrinkAction(null, randomByteSizeValue(), null, targetIndexSuffix, aliases, forceUnsafe, 0) 2 -> return ShrinkAction(null, null, randomDoubleBetween(0.0, 1.0, true), targetIndexSuffix, aliases, forceUnsafe, 0) } } - return ShrinkAction(numNewShards, maxShardSize, percentageDecrease, targetIndexSuffix, aliases, forceUnsafe, 0) + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexSuffix, aliases, forceUnsafe, 0) } fun randomReadOnlyActionConfig(): ReadOnlyAction { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index e0b4e8d3e..084609ada 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -33,7 +33,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val shrinkAction = ShrinkAction( numNewShards = 1, maxShardSize = null, - percentageDecrease = null, + percentageOfSourceShards = null, targetIndexSuffix = "_shrink_test", aliases = null, forceUnsafe = true, @@ -122,7 +122,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val shrinkAction = ShrinkAction( numNewShards = null, maxShardSize = testMaxShardSize, - percentageDecrease = null, + percentageOfSourceShards = null, targetIndexSuffix = "_shrink_test", aliases = null, forceUnsafe = true, @@ -200,14 +200,14 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { } } - fun `test basic workflow percentage decrease`() { + fun `test basic workflow percentage to decrease to`() { val indexName = "${testIndexName}_index_1" val policyID = "${testIndexName}_testPolicyName_1" // Create a Policy with one State that only preforms a force_merge Action val shrinkAction = ShrinkAction( numNewShards = null, maxShardSize = null, - percentageDecrease = 0.5, + percentageOfSourceShards = 0.5, targetIndexSuffix = "_shrink_test", aliases = null, forceUnsafe = true, @@ -296,7 +296,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val shrinkAction = ShrinkAction( numNewShards = null, maxShardSize = null, - percentageDecrease = 0.5, + percentageOfSourceShards = 0.5, targetIndexSuffix = "_shrink_test", aliases = null, forceUnsafe = true, @@ -386,4 +386,62 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { } } } + + fun `test no-op with single source index primary shard`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_index_1_shard_noop" + val policyID = "${testIndexName}_testPolicyName_1_shard_noop" + + // Create a Policy with one State that only preforms a force_merge Action + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = null, + percentageOfSourceShards = 0.5, + targetIndexSuffix = "_shrink_test", + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "1", "") + + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + // The action should be done after the no-op + waitFor { + val metadata = getExplainManagedIndexMetaData(indexName) + assertEquals( + "Did not get the no-op due to single primary shard message", + AttemptMoveShardsStep.ONE_PRIMARY_SHARD_MESSAGE, + metadata.info?.get("message") + ) + assertEquals( + "Was not on the last step after no-op due to single primary shard", + WaitForShrinkStep.name, + metadata.stepMetaData?.name + ) + } + } } diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt index e73e92345..f7829f525 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt @@ -5,8 +5,11 @@ package org.opensearch.indexmanagement.indexstatemanagement.model +import org.opensearch.cluster.routing.allocation.DiskThresholdSettings import org.opensearch.common.io.stream.InputStreamStreamInput import org.opensearch.common.io.stream.OutputStreamStreamOutput +import org.opensearch.common.settings.ClusterSettings +import org.opensearch.common.settings.Settings import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.LoggingDeprecationHandler @@ -15,6 +18,7 @@ import org.opensearch.common.xcontent.XContentType import org.opensearch.indexmanagement.indexstatemanagement.ISMActionsParser import org.opensearch.indexmanagement.indexstatemanagement.action.DeleteAction import org.opensearch.indexmanagement.indexstatemanagement.randomAllocationActionConfig +import org.opensearch.indexmanagement.indexstatemanagement.randomByteSizeValue import org.opensearch.indexmanagement.indexstatemanagement.randomCloseActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomDeleteActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomForceMergeActionConfig @@ -28,6 +32,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.randomRolloverActionC import org.opensearch.indexmanagement.indexstatemanagement.randomRollupActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomSnapshotActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomTimeValueObject +import org.opensearch.indexmanagement.indexstatemanagement.util.getFreeBytesThresholdHigh import org.opensearch.indexmanagement.opensearchapi.convertToMap import org.opensearch.indexmanagement.opensearchapi.string import org.opensearch.indexmanagement.spi.indexstatemanagement.Action @@ -155,6 +160,33 @@ class ActionTests : OpenSearchTestCase() { roundTripAction(action) } + fun `test shrink disk threshold percentage settings`() { + val rawPercentage = randomIntBetween(0, 100) + val percentage = "$rawPercentage%" + val settings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.key, percentage) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.key, percentage) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.key, percentage).build() + val clusterSettings = ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS.map { it }.toSet()) + val totalNodeBytes = randomByteSizeValue().bytes + val thresholdBytes = getFreeBytesThresholdHigh(settings, clusterSettings, totalNodeBytes) + val expectedThreshold: Long = ((1 - (rawPercentage.toDouble() / 100.0)) * totalNodeBytes).toLong() + assertEquals("Free bytes threshold not being calculated correctly for percentage setting.", thresholdBytes, expectedThreshold) + } + + fun `test shrink disk threshold byte settings`() { + val byteValue = randomByteSizeValue() + val settings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.key, byteValue) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.key, byteValue) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.key, byteValue).build() + val clusterSettings = ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS.map { it }.toSet()) + val thresholdBytes = getFreeBytesThresholdHigh(settings, clusterSettings, randomByteSizeValue().bytes) + assertEquals("Free bytes threshold not being calculated correctly for byte setting.", thresholdBytes, byteValue.bytes) + } + + fun `test for fun`() { + println(0x80000000) + } + private fun roundTripAction(expectedAction: Action) { val baos = ByteArrayOutputStream() val osso = OutputStreamStreamOutput(baos) diff --git a/src/test/resources/mappings/cached-opendistro-ism-config.json b/src/test/resources/mappings/cached-opendistro-ism-config.json index 66fee073b..5bc299417 100644 --- a/src/test/resources/mappings/cached-opendistro-ism-config.json +++ b/src/test/resources/mappings/cached-opendistro-ism-config.json @@ -438,7 +438,7 @@ "max_shard_size": { "type": "keyword" }, - "percentage_decrease": { + "percentage_of_source_shards": { "type": "double" }, "target_index_suffix": { From 8d8e58908bb645968932b57cf3e498440af35beb Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Fri, 1 Apr 2022 21:31:12 +0000 Subject: [PATCH 04/13] Testing job lock Signed-off-by: Clay Downs --- .../model/ShrinkActionProperties.kt | 16 +- .../action/ShrinkAction.kt | 6 + .../step/shrink/AttemptMoveShardsStep.kt | 166 ++++++++++++------ .../step/shrink/AttemptShrinkStep.kt | 77 ++++++-- .../step/shrink/WaitForMoveShardsStep.kt | 59 ++++--- .../step/shrink/WaitForShrinkStep.kt | 79 ++++++--- .../util/ManagedIndexUtils.kt | 39 +++- .../indexstatemanagement/util/StepUtils.kt | 68 ++++++- .../action/ShrinkActionIT.kt | 67 +++++++ .../indexstatemanagement/model/ActionTests.kt | 4 - 10 files changed, 447 insertions(+), 134 deletions(-) diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt index f5d236c5c..07ded7a10 100644 --- a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt @@ -20,7 +20,8 @@ data class ShrinkActionProperties( val targetNumShards: Int, val lockPrimaryTerm: Long, val lockSeqNo: Long, - val lockEpochSecond: Long + val lockEpochSecond: Long, + val lockDurationSecond: Long ) : Writeable, ToXContentFragment { override fun writeTo(out: StreamOutput) { @@ -30,6 +31,7 @@ data class ShrinkActionProperties( out.writeLong(lockPrimaryTerm) out.writeLong(lockSeqNo) out.writeLong(lockEpochSecond) + out.writeLong(lockDurationSecond) } override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { @@ -39,6 +41,7 @@ data class ShrinkActionProperties( builder.field(ShrinkProperties.LOCK_SEQ_NO.key, lockSeqNo) builder.field(ShrinkProperties.LOCK_PRIMARY_TERM.key, lockPrimaryTerm) builder.field(ShrinkProperties.LOCK_EPOCH_SECOND.key, lockEpochSecond) + builder.field(ShrinkProperties.LOCK_DURATION_SECOND.key, lockDurationSecond) return builder } @@ -52,8 +55,9 @@ data class ShrinkActionProperties( val lockPrimaryTerm: Long = si.readLong() val lockSeqNo: Long = si.readLong() val lockEpochSecond: Long = si.readLong() + val lockDurationSecond: Long = si.readLong() - return ShrinkActionProperties(nodeName, targetIndexName, targetNumShards, lockPrimaryTerm, lockSeqNo, lockEpochSecond) + return ShrinkActionProperties(nodeName, targetIndexName, targetNumShards, lockPrimaryTerm, lockSeqNo, lockEpochSecond, lockDurationSecond) } fun parse(xcp: XContentParser): ShrinkActionProperties { @@ -63,6 +67,7 @@ data class ShrinkActionProperties( var lockPrimaryTerm: Long? = null var lockSeqNo: Long? = null var lockEpochSecond: Long? = null + var lockDurationSecond: Long? = null XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { @@ -76,6 +81,7 @@ data class ShrinkActionProperties( ShrinkProperties.LOCK_PRIMARY_TERM.key -> lockPrimaryTerm = xcp.longValue() ShrinkProperties.LOCK_SEQ_NO.key -> lockSeqNo = xcp.longValue() ShrinkProperties.LOCK_EPOCH_SECOND.key -> lockEpochSecond = xcp.longValue() + ShrinkProperties.LOCK_DURATION_SECOND.key -> lockDurationSecond = xcp.longValue() } } @@ -85,7 +91,8 @@ data class ShrinkActionProperties( requireNotNull(targetNumShards), requireNotNull(lockPrimaryTerm), requireNotNull(lockSeqNo), - requireNotNull(lockEpochSecond) + requireNotNull(lockEpochSecond), + requireNotNull(lockDurationSecond) ) } } @@ -96,6 +103,7 @@ data class ShrinkActionProperties( TARGET_NUM_SHARDS("target_num_shards"), LOCK_SEQ_NO("lock_seq_no"), LOCK_PRIMARY_TERM("lock_primary_term"), - LOCK_EPOCH_SECOND("lock_epoch_second") + LOCK_EPOCH_SECOND("lock_epoch_second"), + LOCK_DURATION_SECOND("lock_duration_second") } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt index 4af08c345..cfde34f03 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -40,6 +40,9 @@ class ShrinkAction( } else if (numNewShards != null) { require(numNewShards > 0) { "Shrink action numNewShards must be greater than 0." } } + if (targetIndexSuffix != null) { + require(!targetIndexSuffix.contains('*') && !targetIndexSuffix.contains('?')) { "Target index suffix must not contain wildcards." } + } } private val attemptMoveShardsStep = AttemptMoveShardsStep(this) @@ -71,6 +74,9 @@ class ShrinkAction( AttemptShrinkStep.name -> waitForShrinkStep else -> stepNameToStep[currentStep]!! } + } else if (currentStepStatus == Step.StepStatus.FAILED) { + // If we failed at any point, retries should start from the beginning + return attemptMoveShardsStep } // step not completed return stepNameToStep[currentStep]!! diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 9f6ba0ec7..059caa8b4 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -6,6 +6,7 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchSecurityException import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse import org.opensearch.action.admin.cluster.reroute.ClusterRerouteRequest @@ -13,15 +14,21 @@ import org.opensearch.action.admin.cluster.reroute.ClusterRerouteResponse import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.client.Client import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand import org.opensearch.cluster.service.ClusterService import org.opensearch.common.collect.Tuple +import org.opensearch.common.hash.MurmurHash3 +import org.opensearch.common.hash.MurmurHash3.Hash128 import org.opensearch.common.settings.Settings import org.opensearch.index.shard.DocsStats import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction -import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime -import org.opensearch.indexmanagement.indexstatemanagement.util.getFreeBytesThresholdHigh +import org.opensearch.indexmanagement.indexstatemanagement.model.ManagedIndexConfig +import org.opensearch.indexmanagement.indexstatemanagement.util.getIntervalFromManagedIndexConfig +import org.opensearch.indexmanagement.indexstatemanagement.util.getManagedIndexConfig +import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemoryAfterShrink +import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockModel import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest import org.opensearch.indexmanagement.opensearchapi.suspendUntil @@ -34,11 +41,12 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaDat import org.opensearch.jobscheduler.repackage.com.cronutils.utils.VisibleForTesting import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.jobscheduler.spi.LockModel -import java.lang.Exception -import java.time.Duration -import java.time.Instant +import java.io.ByteArrayOutputStream +import java.io.ObjectOutputStream +import java.io.Serializable +import java.nio.ByteBuffer +import java.util.Base64 import java.util.PriorityQueue -import kotlin.collections.ArrayList import kotlin.math.ceil import kotlin.math.floor import kotlin.math.min @@ -56,14 +64,13 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val context = this.context ?: return this val client = context.client val indexName = context.metadata.index - try { - if (actionTimedOut(context.metadata)) return this + try { val shrinkTargetIndexName = indexName + (action.targetIndexSuffix ?: DEFAULT_TARGET_SUFFIX) if (targetIndexNameExists(context.clusterService, shrinkTargetIndexName)) return this if (!isIndexGreen(client, indexName)) { - info = mapOf("message" to FAILURE_MESSAGE) + info = mapOf("message" to INDEX_NOT_GREEN_MESSAGE) stepStatus = StepStatus.CONDITION_NOT_MET return this } @@ -72,7 +79,6 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { // Fail if there is only one primary shard, as that cannot be shrunk val numOriginalShards = context.clusterService.state().metadata.indices[indexName].numberOfShards - // if (numOriginalShards == 1) { info = mapOf("message" to ONE_PRIMARY_SHARD_MESSAGE) stepStatus = StepStatus.COMPLETED @@ -87,8 +93,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val statsStore = statsResponse.total.store val statsDocs = statsResponse.total.docs if (statsStore == null || statsDocs == null) { - info = mapOf("message" to FAILURE_MESSAGE) - stepStatus = StepStatus.FAILED + fail(FAILURE_MESSAGE) return this } val indexSize = statsStore.sizeInBytes @@ -98,8 +103,12 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { // get the nodes with enough memory in increasing order of free space val suitableNodes = findSuitableNodes(context, statsResponse, indexSize) + + // Get the job interval to use in determining the lock length + val interval = getJobIntervalSeconds(context.metadata.indexUuid, client) + // iterate through the nodes and try to acquire a lock on one - val lock = acquireLockOnNode(context.jobContext, suitableNodes) + val lock = acquireLockFromNodeList(context.jobContext, suitableNodes, interval) if (lock == null) { logger.info("$indexName could not find available node to shrink onto.") info = mapOf("message" to NO_AVAILABLE_NODES_MESSAGE) @@ -113,27 +122,83 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { numTargetShards, lock.primaryTerm, lock.seqNo, - lock.lockTime.epochSecond + lock.lockTime.epochSecond, + lock.lockDurationSeconds + ) + println(lock) + var lockToReacquire = getShrinkLockModel(shrinkActionProperties!!, context.jobContext) + println(lockToReacquire) + // lock ids are not the same! + println(lock.lockId == lockToReacquire.lockId) + println(lock.lockId) + + val out = ByteArrayOutputStream() + val os = ObjectOutputStream(out) + os.writeObject(lock.resource as Map) + val resourceAsBytes = out.toByteArray() + val hash = MurmurHash3.hash128( + resourceAsBytes, 0, resourceAsBytes.size, 0, + Hash128() + ) + val resourceHashBytes = ByteBuffer.allocate(16).putLong(hash.h1).putLong(hash.h2).array() + val resourceAsHashString = Base64.getUrlEncoder().withoutPadding().encodeToString(resourceHashBytes) + println(resourceAsHashString) + + val out2 = ByteArrayOutputStream() + val os2 = ObjectOutputStream(out2) + os2.writeObject(lockToReacquire.resource as Map) + val resourceAsBytes2 = out2.toByteArray() + val hash2 = MurmurHash3.hash128( + resourceAsBytes2, 0, resourceAsBytes2.size, 0, + Hash128() ) + val resourceHashBytes2 = ByteBuffer.allocate(16).putLong(hash2.h1).putLong(hash2.h2).array() + val resourceAsHashString2 = Base64.getUrlEncoder().withoutPadding().encodeToString(resourceHashBytes2) + println(resourceAsHashString2) + + println(lockToReacquire.lockId) + try { + lockToReacquire = context.jobContext.lockService.suspendUntil { renewLock(lockToReacquire, it) } + } catch (e: Exception) { + println(e) + } + println(lockToReacquire) + setToReadOnlyAndMoveIndexToNode(context, nodeName, lock) info = mapOf("message" to getSuccessMessage(nodeName)) stepStatus = StepStatus.COMPLETED return this + } catch (e: OpenSearchSecurityException) { + fail(getSecurityFailureMessage(e.localizedMessage), e.message) + return this } catch (e: Exception) { - info = mapOf("message" to FAILURE_MESSAGE, "cause" to "{${e.message}}") - stepStatus = StepStatus.FAILED + fail(FAILURE_MESSAGE, e.message) return this } } + private fun fail(message: String, cause: String? = null) { + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + } + + private suspend fun getJobIntervalSeconds(indexUuid: String, client: Client): Long? { + val managedIndexConfig: ManagedIndexConfig? + try { + managedIndexConfig = getManagedIndexConfig(indexUuid, client) + } catch (e: Exception) { + // If we fail to get the managedIndexConfig, just return null and a default lock duration of 12 hours will be used later + return null + } + // Divide the interval by 1000 to convert from ms to seconds + return managedIndexConfig?.let { getIntervalFromManagedIndexConfig(it) / 1000L } + } + private fun shouldFailTooManyDocuments(docsStats: DocsStats, numTargetShards: Int): Boolean { val totalDocs: Long = docsStats.count val docsPerTargetShard: Long = totalDocs / numTargetShards - // The maximum number of documents per shard is 2^31 - val maximumDocsPerShard = 0x80000000 - if (docsPerTargetShard > maximumDocsPerShard) { - info = mapOf("message" to TOO_MANY_DOCS_FAILURE_MESSAGE) - stepStatus = StepStatus.FAILED + if (docsPerTargetShard > MAXIMUM_DOCS_PER_SHARD) { + fail(TOO_MANY_DOCS_FAILURE_MESSAGE) return true } return false @@ -149,8 +214,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val numReplicas = clusterService.state().metadata.indices[indexName].numberOfReplicas val shouldFailForceUnsafeCheck = numReplicas == 0 if (shouldFailForceUnsafeCheck) { - info = mapOf("message" to UNSAFE_FAILURE_MESSAGE) - stepStatus = StepStatus.FAILED + fail(UNSAFE_FAILURE_MESSAGE) return true } return false @@ -159,8 +223,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { private fun targetIndexNameExists(clusterService: ClusterService, shrinkTargetIndexName: String): Boolean { val indexExists = clusterService.state().metadata.indices.containsKey(shrinkTargetIndexName) if (indexExists) { - info = mapOf("message" to getIndexExistsMessage(shrinkTargetIndexName)) - stepStatus = StepStatus.FAILED + fail(getIndexExistsMessage(shrinkTargetIndexName)) return true } return false @@ -175,8 +238,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { try { val response: AcknowledgedResponse = issueUpdateSettingsRequest(stepContext.client, stepContext.metadata.index, updateSettings) if (!response.isAcknowledged) { - stepStatus = StepStatus.FAILED - info = mapOf("message" to UPDATE_FAILED_MESSAGE) + fail(UPDATE_FAILED_MESSAGE) jobContext.lockService.suspendUntil { release(lock, it) } } } catch (e: Exception) { @@ -190,11 +252,13 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { * Iterates through each suitable node in order, attempting to acquire a resource lock. Returns the first lock which * is successfully acquired. */ - private suspend fun acquireLockOnNode(jobContext: JobExecutionContext, suitableNodes: List): LockModel? { + private suspend fun acquireLockFromNodeList(jobContext: JobExecutionContext, suitableNodes: List, jobIntervalSeconds: Long?): LockModel? { for (node in suitableNodes) { val nodeResourceObject = mapOf(RESOURCE_NAME to node) - // TODO CLAY, the lock should be the timeout for all steps, not just one?? - val lockTime = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS + // If we couldn't get the job interval for the lock, use the default of 12 hours. + // Lock is 3x + 30 minutes the job interval to allow the next step's execution to extend the lock without losing it. + // If user sets maximum jitter, it could be 2x the job interval before the next step is executed. + val lockTime = jobIntervalSeconds?.let { (it * 3) + (30 * 60) } ?: DEFAULT_LOCK_INTERVAL val lock: LockModel? = jobContext.lockService.suspendUntil { acquireLockOnResource(jobContext, lockTime, RESOURCE_TYPE, nodeResourceObject, it) } @@ -222,35 +286,31 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val comparator = kotlin.Comparator { o1: Tuple, o2: Tuple -> o1.v1().compareTo(o2.v1()) } val nodesWithSpace = PriorityQueue(comparator) for (node in nodesList) { - val osStats = node.os - if (osStats != null) { - val memLeftInNode = osStats.mem.free.bytes - val totalNodeMem = osStats.mem.total.bytes - val freeBytesThresholdHigh = getFreeBytesThresholdHigh(stepContext.settings, stepContext.clusterService.clusterSettings, totalNodeMem) - // We require that a node has enough space to be below the high watermark disk level with an additional 2 * the index size free - val requiredBytes = (2 * indexSizeInBytes) + freeBytesThresholdHigh - if (memLeftInNode > requiredBytes) { - val memLeftAfterTransfer: Long = memLeftInNode - requiredBytes - nodesWithSpace.add(Tuple(memLeftAfterTransfer, node.node.name)) - } + val remainingMem = getNodeFreeMemoryAfterShrink(node, indexSizeInBytes, stepContext.settings, stepContext.clusterService.clusterSettings) + if (remainingMem > 0L) { + nodesWithSpace.add(Tuple(remainingMem, node.node.name)) } } val suitableNodes: ArrayList = ArrayList() // For each node, do a dry run of moving all shards to the node to make sure there is enough space. // This should be rejected if allocation puts it above the low disk watermark setting for (sizeNodeTuple in nodesWithSpace) { - val nodeName = sizeNodeTuple.v2() + val targetNodeName = sizeNodeTuple.v2() val indexName = stepContext.metadata.index val clusterRerouteRequest = ClusterRerouteRequest().explain(true).dryRun(true) + var numberOfRerouteRequests = 0 for (shard in indicesStatsResponse.shards) { val shardId = shard.shardRouting.shardId() val currentShardNode = stepContext.clusterService.state().nodes[shard.shardRouting.currentNodeId()] - clusterRerouteRequest.add(MoveAllocationCommand(indexName, shardId.id, currentShardNode.name, nodeName)) + // Don't attempt a dry run for shards which are already on that node + if (currentShardNode.name == targetNodeName) continue + clusterRerouteRequest.add(MoveAllocationCommand(indexName, shardId.id, currentShardNode.name, targetNodeName)) + numberOfRerouteRequests++ } val clusterRerouteResponse: ClusterRerouteResponse = stepContext.client.admin().cluster().suspendUntil { reroute(clusterRerouteRequest, it) } // Should be the same number of yes decisions as the number of primary shards - if (clusterRerouteResponse.explanations.yesDecisionMessages.size == indicesStatsResponse.shards.size) { + if (clusterRerouteResponse.explanations.yesDecisionMessages.size == numberOfRerouteRequests) { suitableNodes.add(sizeNodeTuple.v2()) } } @@ -323,18 +383,6 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { info = mutableInfo.toMap() } - private fun actionTimedOut(managedIndexMetadata: ManagedIndexMetaData): Boolean { - val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) - val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS - // Get ActionTimeout if given, otherwise use default timeout of 12 hours - if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { - info = mapOf("message" to TIMEOUT_MESSAGE) - stepStatus = StepStatus.FAILED - return true - } - return false - } - override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { val currentActionMetaData = currentMetadata.actionMetaData // If we succeeded because there was only one source primary shard, we no-op by skipping to the last step @@ -362,18 +410,20 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { const val ROUTING_SETTING = "index.routing.allocation.require._name" const val RESOURCE_NAME = "node_name" const val DEFAULT_TARGET_SUFFIX = "_shrunken" - const val MOVE_SHARDS_TIMEOUT_IN_SECONDS = 43200L // 12hrs in seconds const val name = "attempt_move_shards_step" const val RESOURCE_TYPE = "shrink" - const val TIMEOUT_MESSAGE = "Timed out waiting for finding node." const val UPDATE_FAILED_MESSAGE = "Shrink failed because settings could not be updated.." const val NO_AVAILABLE_NODES_MESSAGE = "There are no available nodes for to move to to execute a shrink. Delaying until node becomes available." + const val DEFAULT_LOCK_INTERVAL = 3L * 60L * 60L // Default lock interval is 3 hours in seconds const val UNSAFE_FAILURE_MESSAGE = "Shrink failed because index has no replicas and force_unsafe is not set to true." const val ONE_PRIMARY_SHARD_MESSAGE = "Shrink action did not do anything because source index only has one primary shard." const val TOO_MANY_DOCS_FAILURE_MESSAGE = "Shrink failed because there would be too many documents on each target shard following the shrink." + const val INDEX_NOT_GREEN_MESSAGE = "Shrink action cannot start moving shards as the index is not green." const val FAILURE_MESSAGE = "Shrink failed to start moving shards." + private const val MAXIMUM_DOCS_PER_SHARD = 0x80000000 // The maximum number of documents per shard is 2^31 fun getSuccessMessage(node: String) = "Successfully started moving the shards to $node." fun getIndexExistsMessage(newIndex: String) = "Shrink failed because $newIndex already exists." + fun getSecurityFailureMessage(failure: String) = "Shrink action failed because of missing permissions: $failure" } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt index ca8962b07..23e14115e 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -6,13 +6,20 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse import org.opensearch.action.admin.indices.shrink.ResizeRequest import org.opensearch.action.admin.indices.shrink.ResizeResponse +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.INDEX_NUMBER_OF_SHARDS +import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemoryAfterShrink import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData @@ -34,8 +41,12 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { val actionMetadata = context.metadata.actionMetaData val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties if (shrinkActionProperties == null) { - info = mapOf("message" to "Shrink action properties are null, metadata was not properly populated") - stepStatus = StepStatus.FAILED + cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) + return this + } + val lock = renewShrinkLock(shrinkActionProperties, context.jobContext, logger) + if (lock == null) { + cleanupAndFail("Failed to renew lock on node [${shrinkActionProperties.nodeName}]") return this } try { @@ -44,24 +55,69 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { info = mapOf("message" to INDEX_HEALTH_NOT_GREEN_MESSAGE) return this } + if (!isNodeStillSuitable(shrinkActionProperties.nodeName, indexName, context)) return this + // If the resize index api fails, the step will be set to failed and resizeIndex will return false if (!resizeIndex(indexName, shrinkActionProperties, context)) return this info = mapOf("message" to getSuccessMessage(shrinkActionProperties.targetIndexName)) stepStatus = StepStatus.COMPLETED return this } catch (e: RemoteTransportException) { - info = mapOf("message" to FAILURE_MESSAGE) - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - stepStatus = StepStatus.FAILED + cleanupAndFail(FAILURE_MESSAGE) return this } catch (e: Exception) { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - info = mapOf("message" to FAILURE_MESSAGE, "cause" to "{${e.message}}") - stepStatus = StepStatus.FAILED + cleanupAndFail(FAILURE_MESSAGE, e.message) return this } } + // Sets the action to failed, clears the readonly and allocation settings on the source index, and releases the shrink lock + private suspend fun cleanupAndFail(message: String, cause: String? = null) { + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + val context = this.context ?: return + try { + clearReadOnlyAndRouting(context.metadata.index, context.client) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") + } + try { + val shrinkActionProperties = context.metadata.actionMetaData?.actionProperties?.shrinkActionProperties ?: return + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to release the node lock after a failure: $e") + } + } + + private suspend fun isNodeStillSuitable(nodeName: String, indexName: String, context: StepContext): Boolean { + // Get the size of the index + val statsRequest = IndicesStatsRequest().indices(indexName) + val statsResponse: IndicesStatsResponse = context.client.admin().indices().suspendUntil { + stats(statsRequest, it) + } + val statsStore = statsResponse.total.store + if (statsStore == null) { + cleanupAndFail(FAILURE_MESSAGE) + return false + } + val indexSizeInBytes = statsStore.sizeInBytes + // Get the remaining memory in the node + val nodesStatsReq = NodesStatsRequest().addMetric(AttemptMoveShardsStep.OS_METRIC) + val nodeStatsResponse: NodesStatsResponse = context.client.admin().cluster().suspendUntil { nodesStats(nodesStatsReq, it) } + // If the node has been replaced, this will fail + val node = nodeStatsResponse.nodes.firstOrNull { it.node.name == nodeName } + if (node == null) { + cleanupAndFail(FAILURE_MESSAGE) + return false + } + val remainingMem = getNodeFreeMemoryAfterShrink(node, indexSizeInBytes, context.settings, context.clusterService.clusterSettings) + if (remainingMem < 1L) { + cleanupAndFail(NOT_ENOUGH_SPACE_FAILURE_MESSAGE) + return false + } + return true + } + private suspend fun resizeIndex(sourceIndex: String, shrinkActionProperties: ShrinkActionProperties, context: StepContext): Boolean { val targetIndex = shrinkActionProperties.targetIndexName val req = ResizeRequest(targetIndex, sourceIndex) @@ -74,9 +130,7 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { action.aliases?.forEach { req.targetIndexRequest.alias(it) } val resizeResponse: ResizeResponse = context.client.admin().indices().suspendUntil { resizeIndex(req, it) } if (!resizeResponse.isAcknowledged) { - info = mapOf("message" to FAILURE_MESSAGE) - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - stepStatus = StepStatus.FAILED + cleanupAndFail(FAILURE_MESSAGE) return false } return true @@ -97,6 +151,7 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { companion object { const val name = "attempt_shrink_step" const val FAILURE_MESSAGE = "Shrink failed when sending shrink request." + const val NOT_ENOUGH_SPACE_FAILURE_MESSAGE = "Shrink failed as the selected node no longer had enough free space to shrink to." const val INDEX_HEALTH_NOT_GREEN_MESSAGE = "Shrink delayed because index health is not green." fun getSuccessMessage(newIndex: String) = "Shrink started. $newIndex currently being populated." } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt index 02e4c8d0f..8c8f56449 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -9,14 +9,15 @@ import org.apache.logging.log4j.LogManager import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.admin.indices.stats.ShardStats -import org.opensearch.common.collect.ImmutableOpenIntMap import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime +import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockModel import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData -import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData import org.opensearch.transport.RemoteTransportException @@ -36,10 +37,16 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { val actionMetadata = context.metadata.actionMetaData val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties if (shrinkActionProperties == null) { - info = mapOf("message" to "Shrink action properties are null, metadata was not properly populated") - stepStatus = StepStatus.FAILED + cleanupAndFail(METADATA_FAILURE_MESSAGE) return this } + println(getShrinkLockModel(shrinkActionProperties, context.jobContext)) + val lock = renewShrinkLock(shrinkActionProperties, context.jobContext, logger) + if (lock == null) { + cleanupAndFail("Failed to renew lock on node [${shrinkActionProperties.nodeName}]") + return this + } + println(lock) try { val indexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(indexName) val response: IndicesStatsResponse = context.client.admin().indices().suspendUntil { stats(indexStatsRequests, it) } @@ -57,7 +64,9 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { if (nodeNameShardIsOn.equals(nodeToMoveOnto) && routingInfo.started()) { numShardsOnNode++ } - if (numReplicas == 0 || inSyncReplicaExists(routingInfo.id, inSyncAllocations)) { + // Either there must be no replicas (force unsafe must have been set) or all replicas must be in sync as + // it isn't known which shard (any replica or primary) will be moved to the target node and used in the shrink. + if (numReplicas == 0 || inSyncAllocations[routingInfo.id].size == (numReplicas + 1)) { numShardsInSync++ } } @@ -68,23 +77,35 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { } else { val numShardsNotOnNode = numPrimaryShards - numShardsOnNode val numShardsNotInSync = numPrimaryShards - numShardsInSync - checkTimeOut(context, shrinkActionProperties, numShardsNotOnNode, numShardsNotInSync, nodeToMoveOnto) + checkTimeOut(context, numShardsNotOnNode, numShardsNotInSync, nodeToMoveOnto) } return this } catch (e: RemoteTransportException) { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - info = mapOf("message" to FAILURE_MESSAGE) - stepStatus = StepStatus.FAILED + cleanupAndFail(FAILURE_MESSAGE) return this } catch (e: Exception) { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - info = mapOf("message" to FAILURE_MESSAGE, "cause" to "{${e.message}}") - stepStatus = StepStatus.FAILED + cleanupAndFail(FAILURE_MESSAGE, cause = e.message) return this } } - private fun inSyncReplicaExists(shardId: Int, inSyncAllocations: ImmutableOpenIntMap>): Boolean = inSyncAllocations[shardId].size > 1 + // Sets the action to failed, clears the readonly and allocation settings on the source index, and releases the shrink lock + private suspend fun cleanupAndFail(message: String, cause: String? = null) { + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + val context = this.context ?: return + try { + clearReadOnlyAndRouting(context.metadata.index, context.client) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") + } + try { + val shrinkActionProperties = context.metadata.actionMetaData?.actionProperties?.shrinkActionProperties ?: return + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to release the node lock after a failure: $e") + } + } override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { // Saving maxNumSegments in ActionProperties after the force merge operation has begun so that if a ChangePolicy occurred @@ -100,7 +121,6 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { private suspend fun checkTimeOut( stepContext: StepContext, - shrinkActionProperties: ShrinkActionProperties, numShardsNotOnNode: Int, numShardsNotInSync: Int, nodeToMoveOnto: String @@ -110,23 +130,19 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { val timeSinceActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS // Get ActionTimeout if given, otherwise use default timeout of 12 hours - stepStatus = if (timeSinceActionStarted.toSeconds() > timeOutInSeconds) { + if (timeSinceActionStarted.toSeconds() > timeOutInSeconds) { logger.error( "Shrink Action move shards failed on [$indexName], the action timed out with [$numShardsNotOnNode] shards not yet " + "moved and [$numShardsNotInSync] shards without an in sync replica." ) - if (managedIndexMetadata.actionMetaData?.actionProperties?.shrinkActionProperties != null) { - releaseShrinkLock(shrinkActionProperties, stepContext.jobContext, logger) - } - info = mapOf("message" to getTimeoutFailure(nodeToMoveOnto)) - StepStatus.FAILED + cleanupAndFail(getTimeoutFailure(nodeToMoveOnto)) } else { logger.debug( "Shrink action move shards step running on [$indexName], [$numShardsNotOnNode] shards need to be moved, " + "[$numShardsNotInSync] shards need an in sync replica." ) info = mapOf("message" to getTimeoutDelay(nodeToMoveOnto)) - StepStatus.CONDITION_NOT_MET + stepStatus = StepStatus.CONDITION_NOT_MET } } @@ -138,6 +154,7 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { fun getTimeoutFailure(node: String) = "Shrink failed because it took to long to move shards to $node" fun getTimeoutDelay(node: String) = "Shrink delayed because it took to long to move shards to $node" const val FAILURE_MESSAGE = "Shrink failed when waiting for shards to move." + const val METADATA_FAILURE_MESSAGE = "Shrink action properties are null, metadata was not properly populated" const val MOVE_SHARDS_TIMEOUT_IN_SECONDS = 43200L // 12hrs in seconds const val RESOURCE_NAME = "node_name" const val RESOURCE_TYPE = "shrink" diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt index 554c76be2..1e3246e95 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -6,19 +6,21 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData -import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData import org.opensearch.transport.RemoteTransportException @@ -36,8 +38,12 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { val actionMetadata = context.metadata.actionMetaData val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties if (shrinkActionProperties == null) { - info = mapOf("message" to "Shrink action properties are null, metadata was not properly populated") - stepStatus = StepStatus.FAILED + cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) + return this + } + val lock = renewShrinkLock(shrinkActionProperties, context.jobContext, logger) + if (lock == null) { + cleanupAndFail("Failed to renew lock on node [${shrinkActionProperties.nodeName}]") return this } try { @@ -45,38 +51,62 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { val numPrimaryShardsStarted = getNumPrimaryShardsStarted(context.client, targetIndex) val numPrimaryShards = context.clusterService.state().metadata.indices[targetIndex].numberOfShards if (numPrimaryShards != shrinkActionProperties.targetNumShards || numPrimaryShardsStarted != shrinkActionProperties.targetNumShards) { - checkTimeOut(context, shrinkActionProperties, targetIndex) + checkTimeOut(context, targetIndex) return this } // Clear source and target allocation, if either fails the step will be set to failed and the function will return false - if (!clearAllocationSettings(context, targetIndex, shrinkActionProperties)) return this - if (!clearAllocationSettings(context, context.metadata.index, shrinkActionProperties)) return this + if (!clearAllocationSettings(context, targetIndex)) return this + if (!clearAllocationSettings(context, context.metadata.index)) return this releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) stepStatus = StepStatus.COMPLETED info = mapOf("message" to SUCCESS_MESSAGE) return this } catch (e: RemoteTransportException) { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - info = mapOf("message" to getFailureMessage(shrinkActionProperties.targetIndexName)) - stepStatus = StepStatus.FAILED + cleanupAndFail(getFailureMessage(shrinkActionProperties.targetIndexName)) return this } catch (e: Exception) { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - info = mapOf("message" to GENERIC_FAILURE_MESSAGE, "cause" to "{${e.message}}") - stepStatus = StepStatus.FAILED + cleanupAndFail(GENERIC_FAILURE_MESSAGE, e.message) return this } } - private suspend fun clearAllocationSettings(context: StepContext, index: String, shrinkActionProperties: ShrinkActionProperties): Boolean { + // Sets the action to failed, clears the readonly and allocation settings on the source index, deletes the target index, and releases the shrink lock + private suspend fun cleanupAndFail(message: String, cause: String? = null) { + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + val context = this.context ?: return + // Using a try/catch for each cleanup action as we should clean up as much as possible despite any failures + try { + clearReadOnlyAndRouting(context.metadata.index, context.client) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") + } + val shrinkActionProperties = context.metadata.actionMetaData?.actionProperties?.shrinkActionProperties ?: return + try { + // TODO CLAY use plugin permissions when cleaning up + // Delete the target index + val deleteRequest = DeleteIndexRequest(shrinkActionProperties.targetIndexName) + val response: AcknowledgedResponse = context.client.admin().indices().suspendUntil { delete(deleteRequest, it) } + if (!response.isAcknowledged) { + logger.error("Shrink action failed to delete target index during cleanup after a failure") + } + } catch (e: Exception) { + logger.error("Shrink action failed while trying to delete the target index after a failure: $e") + } + try { + releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to release the node lock after a failure: $e") + } + } + + private suspend fun clearAllocationSettings(context: StepContext, index: String): Boolean { val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).build() val response: AcknowledgedResponse = issueUpdateSettingsRequest(context.client, index, allocationSettings) if (!response.isAcknowledged) { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) - stepStatus = StepStatus.FAILED - info = mapOf("message" to getFailureMessage(index)) + cleanupAndFail(getFailureMessage(index)) return false } return true @@ -88,25 +118,16 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { return targetStatsResponse.shards.filter { it.shardRouting.started() && it.shardRouting.primary() }.size } - private suspend fun checkTimeOut(stepContext: StepContext, shrinkActionProperties: ShrinkActionProperties, targetIndex: String) { + private suspend fun checkTimeOut(stepContext: StepContext, targetIndex: String) { val managedIndexMetadata = stepContext.metadata - val indexName = managedIndexMetadata.index val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: WaitForMoveShardsStep.MOVE_SHARDS_TIMEOUT_IN_SECONDS // Get ActionTimeout if given, otherwise use default timeout of 12 hours - stepStatus = if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { - logger.error( - "Shards of $indexName have still not started." - ) - releaseShrinkLock(shrinkActionProperties, stepContext.jobContext, logger) - info = mapOf("message" to getFailureMessage(targetIndex)) - StepStatus.FAILED + if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { + cleanupAndFail(getFailureMessage(targetIndex)) } else { - logger.debug( - "Shards of $indexName have still not started." - ) info = mapOf("message" to getDelayedMessage(targetIndex)) - StepStatus.CONDITION_NOT_MET + stepStatus = StepStatus.CONDITION_NOT_MET } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt index b9c9dc7ee..66254ffee 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt @@ -7,19 +7,28 @@ @file:JvmName("ManagedIndexUtils") package org.opensearch.indexmanagement.indexstatemanagement.util -// import inet.ipaddr.IPAddressString -// import org.apache.logging.log4j.LogManager +//import inet.ipaddr.IPAddressString +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext +//import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.Logger import org.opensearch.action.delete.DeleteRequest +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse import org.opensearch.action.index.IndexRequest import org.opensearch.action.search.SearchRequest import org.opensearch.action.support.WriteRequest import org.opensearch.action.update.UpdateRequest -// import org.opensearch.alerting.destination.message.BaseMessage +//import org.opensearch.alerting.destination.message.BaseMessage +import org.opensearch.client.Client import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.unit.TimeValue +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.NamedXContentRegistry import org.opensearch.common.xcontent.ToXContent import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType import org.opensearch.index.query.BoolQueryBuilder import org.opensearch.index.query.QueryBuilders import org.opensearch.indexmanagement.IndexManagementPlugin.Companion.INDEX_MANAGEMENT_INDEX @@ -37,6 +46,8 @@ import org.opensearch.indexmanagement.indexstatemanagement.model.coordinator.Swe import org.opensearch.indexmanagement.indexstatemanagement.settings.ManagedIndexSettings import org.opensearch.indexmanagement.opensearchapi.optionalISMTemplateField import org.opensearch.indexmanagement.opensearchapi.optionalTimeField +import org.opensearch.indexmanagement.opensearchapi.parseWithType +import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Action import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaData @@ -535,3 +546,25 @@ enum class MetadataCheck { // } // return false // } + +@Suppress("BlockingMethodInNonBlockingContext") +suspend fun getManagedIndexConfig(indexUuid: String, client: Client): ManagedIndexConfig? { + val request = GetRequest().routing(indexUuid).index(INDEX_MANAGEMENT_INDEX).id(indexUuid) + val response: GetResponse = client.suspendUntil { get(request, it) } + var managedIndexConfig: ManagedIndexConfig? = null + val configSource = response.sourceAsBytesRef + // Intellij complains about createParser/parseWithType blocking because it sees they throw IOExceptions + configSource?.let { + withContext(Dispatchers.IO) { + val xcp = XContentHelper.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, configSource, XContentType.JSON) + managedIndexConfig = xcp.parseWithType(response.id, response.seqNo, response.primaryTerm, ManagedIndexConfig.Companion::parse) + } + } + return managedIndexConfig +} + +// extracts the job scheduler interval from the managed index config and returns the millisecond value +fun getIntervalFromManagedIndexConfig(managedIndexConfig: ManagedIndexConfig): Long { + val periodTuple = managedIndexConfig.jobSchedule.getPeriodStartingAt(Instant.now()) + return periodTuple.v2().toEpochMilli() - periodTuple.v1().toEpochMilli() +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt index 6b9609add..96ac2bf8c 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -8,18 +8,22 @@ package org.opensearch.indexmanagement.indexstatemanagement.util import org.apache.logging.log4j.Logger import org.opensearch.action.admin.cluster.health.ClusterHealthRequest import org.opensearch.action.admin.cluster.health.ClusterHealthResponse +import org.opensearch.action.admin.cluster.node.stats.NodeStats import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client +import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.cluster.routing.allocation.DiskThresholdSettings import org.opensearch.common.settings.ClusterSettings import org.opensearch.common.settings.Settings +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.jobscheduler.spi.LockModel +import java.lang.Exception import java.time.Instant suspend fun issueUpdateSettingsRequest(client: Client, indexName: String, settings: Settings): AcknowledgedResponse { @@ -36,7 +40,33 @@ suspend fun releaseShrinkLock( val lock: LockModel = getShrinkLockModel(shrinkActionProperties, jobExecutionContext) val released: Boolean = jobExecutionContext.lockService.suspendUntil { release(lock, it) } if (!released) { - logger.warn("Lock not released on failure") + logger.error("Failed to release Shrink action lock on node [${shrinkActionProperties.nodeName}]") + } +} + +suspend fun releaseShrinkLock( + lock: LockModel, + jobExecutionContext: JobExecutionContext, + logger: Logger +) { + val released: Boolean = jobExecutionContext.lockService.suspendUntil { release(lock, it) } + if (!released) { + logger.error("Failed to release Shrink action lock on node [${lock.resource[AttemptMoveShardsStep.RESOURCE_NAME] as String}]") + } +} + +suspend fun renewShrinkLock( + shrinkActionProperties: ShrinkActionProperties, + jobExecutionContext: JobExecutionContext, + logger: Logger +): LockModel? { + val lock: LockModel = getShrinkLockModel(shrinkActionProperties, jobExecutionContext) + println(lock.lockDurationSeconds) + return try { + jobExecutionContext.lockService.suspendUntil { renewLock(lock, it) } + } catch (e: Exception) { + logger.error("Failed to renew Shrink action lock on node [${shrinkActionProperties.nodeName}]: $e") + null } } @@ -50,7 +80,8 @@ fun getShrinkLockModel( jobExecutionContext.jobId, shrinkActionProperties.lockEpochSecond, shrinkActionProperties.lockPrimaryTerm, - shrinkActionProperties.lockSeqNo + shrinkActionProperties.lockSeqNo, + shrinkActionProperties.lockDurationSecond ) } @@ -61,7 +92,8 @@ fun getShrinkLockModel( jobId: String, lockEpochSecond: Long, lockPrimaryTerm: Long, - lockSeqNo: Long + lockSeqNo: Long, + lockDurationSecond: Long ): LockModel { val resource: HashMap = HashMap() resource[WaitForMoveShardsStep.RESOURCE_NAME] = nodeName @@ -72,7 +104,7 @@ fun getShrinkLockModel( WaitForMoveShardsStep.RESOURCE_TYPE, resource as Map?, lockCreationInstant, - WaitForMoveShardsStep.MOVE_SHARDS_TIMEOUT_IN_SECONDS, + lockDurationSecond, false, lockSeqNo, lockPrimaryTerm @@ -103,6 +135,25 @@ fun getFreeBytesThresholdHigh(settings: Settings, clusterSettings: ClusterSettin } else diskThresholdBytes.bytes } +/* + * Returns the amount of memory in the node which will be free below the high watermark level after adding 2*indexSizeInBytes, or -1 + * if adding 2*indexSizeInBytes goes over the high watermark threshold, or if nodeStats does not contain OsStats. +*/ +fun getNodeFreeMemoryAfterShrink(node: NodeStats, indexSizeInBytes: Long, settings: Settings, clusterSettings: ClusterSettings?): Long { + val osStats = node.os + if (osStats != null) { + val memLeftInNode = osStats.mem.free.bytes + val totalNodeMem = osStats.mem.total.bytes + val freeBytesThresholdHigh = getFreeBytesThresholdHigh(settings, clusterSettings, totalNodeMem) + // We require that a node has enough space to be below the high watermark disk level with an additional 2 * the index size free + val requiredBytes = (2 * indexSizeInBytes) + freeBytesThresholdHigh + if (memLeftInNode > requiredBytes) { + return memLeftInNode - requiredBytes + } + } + return -1L +} + suspend fun isIndexGreen(client: Client, indexName: String): Boolean { // get index health, waiting for a green status val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() @@ -110,3 +161,12 @@ suspend fun isIndexGreen(client: Client, indexName: String): Boolean { // The request was set to wait for green index, if the request timed out, the index never was green return !response.isTimedOut } + +suspend fun clearReadOnlyAndRouting(index: String, client: Client): Boolean { + val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).putNull(IndexMetadata.SETTING_BLOCKS_WRITE).build() + val response: AcknowledgedResponse = issueUpdateSettingsRequest(client, index, allocationSettings) + if (!response.isAcknowledged) { + return false + } + return true +} diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index 084609ada..b3e4604cf 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -444,4 +444,71 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { ) } } + + fun `test retries from first step`() { + val testPolicy = """ + {"policy":{"description":"Default policy","default_state":"Shrink","states":[ + {"name":"Shrink","actions":[{"retry":{"count":2,"backoff":"constant","delay":"1s"},"shrink": + {"num_new_shards":1, "target_index_suffix":"_shrink_test", "force_unsafe": "true"}}],"transitions":[]}]}} + """.trimIndent() + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_retry" + val policyID = "${testIndexName}_testPolicyName_retry" + createPolicyJson(testPolicy, policyID) + + createIndex(indexName, policyID, null, "0", "3", "") + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + "_shrink_test" + waitFor { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Create an index with the target index name so the AttemptShrinkStep fails + createIndex(targetIndexName, null) + + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(50)) { + val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData + assertEquals("Did not fail due to target index existing step as expected", Step.StepStatus.FAILED, stepMetadata?.stepStatus) + assertEquals(AttemptShrinkStep.name, stepMetadata?.name) + } + // TODO add checks for successful cleanup + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor { + val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData + assertEquals("Shrink action should have started over after failing", stepMetadata?.name, AttemptMoveShardsStep.name) + assertEquals("Step status should have been starting", Step.StepStatus.STARTING, stepMetadata?.stepStatus) + } + } } diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt index f7829f525..7a71b3481 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt @@ -183,10 +183,6 @@ class ActionTests : OpenSearchTestCase() { assertEquals("Free bytes threshold not being calculated correctly for byte setting.", thresholdBytes, byteValue.bytes) } - fun `test for fun`() { - println(0x80000000) - } - private fun roundTripAction(expectedAction: Action) { val baos = ByteArrayOutputStream() val osso = OutputStreamStreamOutput(baos) From fe600180b32cc46c21ebcf4b6f695759c7590afc Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Thu, 7 Apr 2022 00:18:29 +0000 Subject: [PATCH 05/13] Fixes locking Signed-off-by: Clay Downs --- .../action/ShrinkAction.kt | 6 +- .../step/shrink/AttemptMoveShardsStep.kt | 121 ++++++------------ .../step/shrink/AttemptShrinkStep.kt | 35 +++-- .../step/shrink/WaitForMoveShardsStep.kt | 39 +++--- .../step/shrink/WaitForShrinkStep.kt | 58 +++++---- .../util/ManagedIndexUtils.kt | 6 +- .../indexstatemanagement/util/StepUtils.kt | 44 ++++--- .../action/ShrinkActionIT.kt | 40 ++++-- .../indexstatemanagement/model/ActionTests.kt | 11 ++ 9 files changed, 194 insertions(+), 166 deletions(-) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt index cfde34f03..2c3b89b1d 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -36,7 +36,9 @@ class ShrinkAction( if (maxShardSize != null) { require(maxShardSize.bytes > 0) { "Shrink action maxShardSize must be greater than 0." } } else if (percentageOfSourceShards != null) { - require(percentageOfSourceShards > 0.0 && percentageOfSourceShards < 1.0) { "Percentage of source shards must be between 0.0 and 1.0 exclusively" } + require(percentageOfSourceShards > 0.0 && percentageOfSourceShards < 1.0) { + "Percentage of source shards must be between 0.0 and 1.0 exclusively" + } } else if (numNewShards != null) { require(numNewShards > 0) { "Shrink action numNewShards must be greater than 0." } } @@ -116,5 +118,7 @@ class ShrinkAction( const val TARGET_INDEX_SUFFIX_FIELD = "target_index_suffix" const val ALIASES_FIELD = "aliases" const val FORCE_UNSAFE_FIELD = "force_unsafe" + const val LOCK_RESOURCE_TYPE = "shrink" + const val LOCK_RESOURCE_NAME = "node_name" } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 059caa8b4..5adcdb959 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -19,8 +19,6 @@ import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand import org.opensearch.cluster.service.ClusterService import org.opensearch.common.collect.Tuple -import org.opensearch.common.hash.MurmurHash3 -import org.opensearch.common.hash.MurmurHash3.Hash128 import org.opensearch.common.settings.Settings import org.opensearch.index.shard.DocsStats import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction @@ -28,7 +26,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.model.ManagedIndexCon import org.opensearch.indexmanagement.indexstatemanagement.util.getIntervalFromManagedIndexConfig import org.opensearch.indexmanagement.indexstatemanagement.util.getManagedIndexConfig import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemoryAfterShrink -import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockModel +import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockID import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest import org.opensearch.indexmanagement.opensearchapi.suspendUntil @@ -41,11 +39,6 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaDat import org.opensearch.jobscheduler.repackage.com.cronutils.utils.VisibleForTesting import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.jobscheduler.spi.LockModel -import java.io.ByteArrayOutputStream -import java.io.ObjectOutputStream -import java.io.Serializable -import java.nio.ByteBuffer -import java.util.Base64 import java.util.PriorityQueue import kotlin.math.ceil import kotlin.math.floor @@ -106,16 +99,15 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { // Get the job interval to use in determining the lock length val interval = getJobIntervalSeconds(context.metadata.indexUuid, client) - // iterate through the nodes and try to acquire a lock on one - val lock = acquireLockFromNodeList(context.jobContext, suitableNodes, interval) - if (lock == null) { + val lockToNodeName: Pair? = acquireLockFromNodeList(context.jobContext, suitableNodes, interval) + if (lockToNodeName == null) { logger.info("$indexName could not find available node to shrink onto.") info = mapOf("message" to NO_AVAILABLE_NODES_MESSAGE) stepStatus = StepStatus.CONDITION_NOT_MET return this } - val nodeName = lock.resource[RESOURCE_NAME] as String + val (lock, nodeName) = lockToNodeName shrinkActionProperties = ShrinkActionProperties( nodeName, shrinkTargetIndexName, @@ -125,44 +117,6 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { lock.lockTime.epochSecond, lock.lockDurationSeconds ) - println(lock) - var lockToReacquire = getShrinkLockModel(shrinkActionProperties!!, context.jobContext) - println(lockToReacquire) - // lock ids are not the same! - println(lock.lockId == lockToReacquire.lockId) - println(lock.lockId) - - val out = ByteArrayOutputStream() - val os = ObjectOutputStream(out) - os.writeObject(lock.resource as Map) - val resourceAsBytes = out.toByteArray() - val hash = MurmurHash3.hash128( - resourceAsBytes, 0, resourceAsBytes.size, 0, - Hash128() - ) - val resourceHashBytes = ByteBuffer.allocate(16).putLong(hash.h1).putLong(hash.h2).array() - val resourceAsHashString = Base64.getUrlEncoder().withoutPadding().encodeToString(resourceHashBytes) - println(resourceAsHashString) - - val out2 = ByteArrayOutputStream() - val os2 = ObjectOutputStream(out2) - os2.writeObject(lockToReacquire.resource as Map) - val resourceAsBytes2 = out2.toByteArray() - val hash2 = MurmurHash3.hash128( - resourceAsBytes2, 0, resourceAsBytes2.size, 0, - Hash128() - ) - val resourceHashBytes2 = ByteBuffer.allocate(16).putLong(hash2.h1).putLong(hash2.h2).array() - val resourceAsHashString2 = Base64.getUrlEncoder().withoutPadding().encodeToString(resourceHashBytes2) - println(resourceAsHashString2) - - println(lockToReacquire.lockId) - try { - lockToReacquire = context.jobContext.lockService.suspendUntil { renewLock(lockToReacquire, it) } - } catch (e: Exception) { - println(e) - } - println(lockToReacquire) setToReadOnlyAndMoveIndexToNode(context, nodeName, lock) info = mapOf("message" to getSuccessMessage(nodeName)) @@ -180,6 +134,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { private fun fail(message: String, cause: String? = null) { info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED + shrinkActionProperties = null } private suspend fun getJobIntervalSeconds(indexUuid: String, client: Client): Long? { @@ -191,7 +146,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { return null } // Divide the interval by 1000 to convert from ms to seconds - return managedIndexConfig?.let { getIntervalFromManagedIndexConfig(it) / 1000L } + return managedIndexConfig?.let { getIntervalFromManagedIndexConfig(it) / MILLISECONDS_IN_SECOND } } private fun shouldFailTooManyDocuments(docsStats: DocsStats, numTargetShards: Int): Boolean { @@ -208,6 +163,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { * Returns whether the action should fail due to being unsafe. The action is unsafe if there are no replicas. If forceUnsafe * is set, then this always returns false. */ + @Suppress("ReturnCount") private fun shouldFailUnsafe(clusterService: ClusterService, indexName: String): Boolean { // If forceUnsafe is set and is true, then we don't even need to check the number of replicas if (action.forceUnsafe == true) return false @@ -229,41 +185,45 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { return false } - private suspend fun setToReadOnlyAndMoveIndexToNode(stepContext: StepContext, node: String, lock: LockModel) { + private suspend fun setToReadOnlyAndMoveIndexToNode(stepContext: StepContext, node: String, lock: LockModel): Boolean { val updateSettings = Settings.builder() .put(IndexMetadata.SETTING_BLOCKS_WRITE, true) .put(ROUTING_SETTING, node) .build() val jobContext = stepContext.jobContext + var response: AcknowledgedResponse? = null + val isUpdateAcknowledged: Boolean try { - val response: AcknowledgedResponse = issueUpdateSettingsRequest(stepContext.client, stepContext.metadata.index, updateSettings) - if (!response.isAcknowledged) { + response = issueUpdateSettingsRequest(stepContext.client, stepContext.metadata.index, updateSettings) + } finally { + isUpdateAcknowledged = response != null && response.isAcknowledged + if (!isUpdateAcknowledged) { fail(UPDATE_FAILED_MESSAGE) - jobContext.lockService.suspendUntil { release(lock, it) } + val released: Boolean = jobContext.lockService.suspendUntil { release(lock, it) } + if (!released) { + logger.error("Failed to release Shrink action lock on node [$node]") + } } - } catch (e: Exception) { - stepStatus = StepStatus.FAILED - handleException(e, UPDATE_FAILED_MESSAGE) - jobContext.lockService.suspendUntil { release(lock, it) } } + return isUpdateAcknowledged } /* * Iterates through each suitable node in order, attempting to acquire a resource lock. Returns the first lock which - * is successfully acquired. + * is successfully acquired and the name of the node it acquired the lock on in a pair. */ - private suspend fun acquireLockFromNodeList(jobContext: JobExecutionContext, suitableNodes: List, jobIntervalSeconds: Long?): LockModel? { - for (node in suitableNodes) { - val nodeResourceObject = mapOf(RESOURCE_NAME to node) - // If we couldn't get the job interval for the lock, use the default of 12 hours. - // Lock is 3x + 30 minutes the job interval to allow the next step's execution to extend the lock without losing it. - // If user sets maximum jitter, it could be 2x the job interval before the next step is executed. - val lockTime = jobIntervalSeconds?.let { (it * 3) + (30 * 60) } ?: DEFAULT_LOCK_INTERVAL + private suspend fun acquireLockFromNodeList( + jobContext: JobExecutionContext, + suitableNodes: List, + jobIntervalSeconds: Long? + ): Pair? { + for (nodeName in suitableNodes) { + val lockID = getShrinkLockID(nodeName) val lock: LockModel? = jobContext.lockService.suspendUntil { - acquireLockOnResource(jobContext, lockTime, RESOURCE_TYPE, nodeResourceObject, it) + acquireLockWithId(jobContext.jobIndexName, getShrinkLockDuration(jobIntervalSeconds), lockID, it) } if (lock != null) { - return lock + return lock to nodeName } } return null @@ -374,15 +334,6 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { return n } - private fun handleException(e: Exception, message: String) { - logger.error(message, e) - stepStatus = StepStatus.FAILED - val mutableInfo = mutableMapOf("message" to message) - val errorMessage = e.message - if (errorMessage != null) mutableInfo["cause"] = errorMessage - info = mutableInfo.toMap() - } - override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { val currentActionMetaData = currentMetadata.actionMetaData // If we succeeded because there was only one source primary shard, we no-op by skipping to the last step @@ -408,22 +359,28 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { companion object { const val OS_METRIC = "os" const val ROUTING_SETTING = "index.routing.allocation.require._name" - const val RESOURCE_NAME = "node_name" const val DEFAULT_TARGET_SUFFIX = "_shrunken" const val name = "attempt_move_shards_step" - const val RESOURCE_TYPE = "shrink" - const val UPDATE_FAILED_MESSAGE = "Shrink failed because settings could not be updated.." + const val UPDATE_FAILED_MESSAGE = "Shrink failed because shard settings could not be updated." const val NO_AVAILABLE_NODES_MESSAGE = "There are no available nodes for to move to to execute a shrink. Delaying until node becomes available." - const val DEFAULT_LOCK_INTERVAL = 3L * 60L * 60L // Default lock interval is 3 hours in seconds const val UNSAFE_FAILURE_MESSAGE = "Shrink failed because index has no replicas and force_unsafe is not set to true." const val ONE_PRIMARY_SHARD_MESSAGE = "Shrink action did not do anything because source index only has one primary shard." const val TOO_MANY_DOCS_FAILURE_MESSAGE = "Shrink failed because there would be too many documents on each target shard following the shrink." const val INDEX_NOT_GREEN_MESSAGE = "Shrink action cannot start moving shards as the index is not green." const val FAILURE_MESSAGE = "Shrink failed to start moving shards." + private const val DEFAULT_LOCK_INTERVAL = 3L * 60L * 60L // Default lock interval is 3 hours in seconds + private const val MILLISECONDS_IN_SECOND = 1000 + private const val JOB_INTERVAL_LOCK_MULTIPLIER = 3 + private const val LOCK_BUFFER_SECONDS = 1800 private const val MAXIMUM_DOCS_PER_SHARD = 0x80000000 // The maximum number of documents per shard is 2^31 fun getSuccessMessage(node: String) = "Successfully started moving the shards to $node." fun getIndexExistsMessage(newIndex: String) = "Shrink failed because $newIndex already exists." fun getSecurityFailureMessage(failure: String) = "Shrink action failed because of missing permissions: $failure" + // If we couldn't get the job interval for the lock, use the default of 12 hours. + // Lock is 3x + 30 minutes the job interval to allow the next step's execution to extend the lock without losing it. + // If user sets maximum jitter, it could be 2x the job interval before the next step is executed. + private fun getShrinkLockDuration(jobInterval: Long?) = jobInterval?.let { (it * JOB_INTERVAL_LOCK_MULTIPLIER) + LOCK_BUFFER_SECONDS } + ?: DEFAULT_LOCK_INTERVAL } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt index 23e14115e..493274b89 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -20,8 +20,10 @@ import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemor import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.getUpdatedShrinkActionProperties import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext @@ -33,33 +35,36 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { private val logger = LogManager.getLogger(javaClass) private var stepStatus = StepStatus.STARTING private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount") override suspend fun execute(): AttemptShrinkStep { val context = this.context ?: return this val indexName = context.metadata.index val actionMetadata = context.metadata.actionMetaData - val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties - if (shrinkActionProperties == null) { + val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + shrinkActionProperties = localShrinkActionProperties + if (localShrinkActionProperties == null) { cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) return this } - val lock = renewShrinkLock(shrinkActionProperties, context.jobContext, logger) + val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) if (lock == null) { - cleanupAndFail("Failed to renew lock on node [${shrinkActionProperties.nodeName}]") + cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") return this } + shrinkActionProperties = getUpdatedShrinkActionProperties(localShrinkActionProperties, lock) try { if (!isIndexGreen(context.client, indexName)) { stepStatus = StepStatus.CONDITION_NOT_MET info = mapOf("message" to INDEX_HEALTH_NOT_GREEN_MESSAGE) return this } - if (!isNodeStillSuitable(shrinkActionProperties.nodeName, indexName, context)) return this + if (!isNodeStillSuitable(localShrinkActionProperties.nodeName, indexName, context)) return this // If the resize index api fails, the step will be set to failed and resizeIndex will return false - if (!resizeIndex(indexName, shrinkActionProperties, context)) return this - info = mapOf("message" to getSuccessMessage(shrinkActionProperties.targetIndexName)) + if (!resizeIndex(indexName, localShrinkActionProperties, context)) return this + info = mapOf("message" to getSuccessMessage(localShrinkActionProperties.targetIndexName)) stepStatus = StepStatus.COMPLETED return this } catch (e: RemoteTransportException) { @@ -75,20 +80,21 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { private suspend fun cleanupAndFail(message: String, cause: String? = null) { info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED - val context = this.context ?: return + // Non-null assertion !! is used to throw an exception on null which would just be caught and logged try { - clearReadOnlyAndRouting(context.metadata.index, context.client) + clearReadOnlyAndRouting(context!!.metadata.index, context!!.client) } catch (e: Exception) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } try { - val shrinkActionProperties = context.metadata.actionMetaData?.actionProperties?.shrinkActionProperties ?: return - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + releaseShrinkLock(shrinkActionProperties!!, context!!.jobContext, logger) } catch (e: Exception) { logger.error("Shrink action failed while trying to release the node lock after a failure: $e") } + shrinkActionProperties = null } + @Suppress("ReturnCount") private suspend fun isNodeStillSuitable(nodeName: String, indexName: String, context: StepContext): Boolean { // Get the size of the index val statsRequest = IndicesStatsRequest().indices(indexName) @@ -137,9 +143,12 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { } override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { - val currentActionMetaData = currentMetadata.actionMetaData return currentMetadata.copy( - actionMetaData = currentActionMetaData?.copy(), + actionMetaData = currentMetadata.actionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), transitionTo = null, info = info diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt index 8c8f56449..62faf6c90 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -12,12 +12,14 @@ import org.opensearch.action.admin.indices.stats.ShardStats import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime -import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockModel import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.getUpdatedShrinkActionProperties import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData import org.opensearch.transport.RemoteTransportException @@ -29,29 +31,31 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { private val logger = LogManager.getLogger(javaClass) private var stepStatus = StepStatus.STARTING private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "NestedBlockDepth") override suspend fun execute(): WaitForMoveShardsStep { val context = this.context ?: return this val indexName = context.metadata.index val actionMetadata = context.metadata.actionMetaData - val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties - if (shrinkActionProperties == null) { + val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + shrinkActionProperties = localShrinkActionProperties + if (localShrinkActionProperties == null) { cleanupAndFail(METADATA_FAILURE_MESSAGE) return this } - println(getShrinkLockModel(shrinkActionProperties, context.jobContext)) - val lock = renewShrinkLock(shrinkActionProperties, context.jobContext, logger) + val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) if (lock == null) { - cleanupAndFail("Failed to renew lock on node [${shrinkActionProperties.nodeName}]") + cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") return this } - println(lock) + // After renewing the lock we need to update the primary term and sequence number + shrinkActionProperties = getUpdatedShrinkActionProperties(localShrinkActionProperties, lock) try { val indexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(indexName) val response: IndicesStatsResponse = context.client.admin().indices().suspendUntil { stats(indexStatsRequests, it) } val numPrimaryShards = context.clusterService.state().metadata.indices[indexName].numberOfShards - val nodeToMoveOnto = shrinkActionProperties.nodeName + val nodeToMoveOnto = localShrinkActionProperties.nodeName val inSyncAllocations = context.clusterService.state().metadata.indices[indexName].inSyncAllocationIds val numReplicas = context.clusterService.state().metadata.indices[indexName].numberOfReplicas var numShardsOnNode = 0 @@ -93,26 +97,27 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { private suspend fun cleanupAndFail(message: String, cause: String? = null) { info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED - val context = this.context ?: return + // Non-null assertion !! is used to throw an exception on null which would just be caught and logged try { - clearReadOnlyAndRouting(context.metadata.index, context.client) + clearReadOnlyAndRouting(context!!.metadata.index, context!!.client) } catch (e: Exception) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } try { - val shrinkActionProperties = context.metadata.actionMetaData?.actionProperties?.shrinkActionProperties ?: return - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + releaseShrinkLock(shrinkActionProperties!!, context!!.jobContext, logger) } catch (e: Exception) { logger.error("Shrink action failed while trying to release the node lock after a failure: $e") } + shrinkActionProperties = null } override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { - // Saving maxNumSegments in ActionProperties after the force merge operation has begun so that if a ChangePolicy occurred - // in between this step and WaitForForceMergeStep, a cached segment count expected from the operation is available - val currentActionMetaData = currentMetadata.actionMetaData return currentMetadata.copy( - actionMetaData = currentActionMetaData?.copy(), + actionMetaData = currentMetadata.actionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), transitionTo = null, info = info @@ -156,7 +161,5 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { const val FAILURE_MESSAGE = "Shrink failed when waiting for shards to move." const val METADATA_FAILURE_MESSAGE = "Shrink action properties are null, metadata was not properly populated" const val MOVE_SHARDS_TIMEOUT_IN_SECONDS = 43200L // 12hrs in seconds - const val RESOURCE_NAME = "node_name" - const val RESOURCE_TYPE = "shrink" } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt index 1e3246e95..2651f1600 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -14,13 +14,17 @@ import org.opensearch.client.Client import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.deleteShrinkLock import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.getUpdatedShrinkActionProperties import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData import org.opensearch.transport.RemoteTransportException @@ -31,26 +35,30 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { private val logger = LogManager.getLogger(javaClass) private var stepStatus = StepStatus.STARTING private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "LongMethod") override suspend fun execute(): WaitForShrinkStep { val context = this.context ?: return this val actionMetadata = context.metadata.actionMetaData - val shrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties - if (shrinkActionProperties == null) { + val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + shrinkActionProperties = localShrinkActionProperties + if (localShrinkActionProperties == null) { cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) return this } - val lock = renewShrinkLock(shrinkActionProperties, context.jobContext, logger) + val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) if (lock == null) { - cleanupAndFail("Failed to renew lock on node [${shrinkActionProperties.nodeName}]") + cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") return this } + shrinkActionProperties = getUpdatedShrinkActionProperties(localShrinkActionProperties, lock) try { - val targetIndex = shrinkActionProperties.targetIndexName + val targetIndex = localShrinkActionProperties.targetIndexName val numPrimaryShardsStarted = getNumPrimaryShardsStarted(context.client, targetIndex) val numPrimaryShards = context.clusterService.state().metadata.indices[targetIndex].numberOfShards - if (numPrimaryShards != shrinkActionProperties.targetNumShards || numPrimaryShardsStarted != shrinkActionProperties.targetNumShards) { + val targetNumShards = localShrinkActionProperties.targetNumShards + if (numPrimaryShards != targetNumShards || numPrimaryShardsStarted != targetNumShards) { checkTimeOut(context, targetIndex) return this } @@ -59,12 +67,12 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { if (!clearAllocationSettings(context, targetIndex)) return this if (!clearAllocationSettings(context, context.metadata.index)) return this - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + deleteShrinkLock(localShrinkActionProperties, context.jobContext, logger) stepStatus = StepStatus.COMPLETED info = mapOf("message" to SUCCESS_MESSAGE) return this } catch (e: RemoteTransportException) { - cleanupAndFail(getFailureMessage(shrinkActionProperties.targetIndexName)) + cleanupAndFail(getFailureMessage(localShrinkActionProperties.targetIndexName)) return this } catch (e: Exception) { cleanupAndFail(GENERIC_FAILURE_MESSAGE, e.message) @@ -72,34 +80,37 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { } } - // Sets the action to failed, clears the readonly and allocation settings on the source index, deletes the target index, and releases the shrink lock + // Sets the action to failed, clears the readonly and allocation settings on the source index, deletes the target index, + // and releases the shrink lock private suspend fun cleanupAndFail(message: String, cause: String? = null) { info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED - val context = this.context ?: return // Using a try/catch for each cleanup action as we should clean up as much as possible despite any failures + // Non-null assertion !! is used to throw an exception on null which would just be caught and logged try { - clearReadOnlyAndRouting(context.metadata.index, context.client) + clearReadOnlyAndRouting(context!!.metadata.index, context!!.client) } catch (e: Exception) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } - val shrinkActionProperties = context.metadata.actionMetaData?.actionProperties?.shrinkActionProperties ?: return try { - // TODO CLAY use plugin permissions when cleaning up - // Delete the target index - val deleteRequest = DeleteIndexRequest(shrinkActionProperties.targetIndexName) - val response: AcknowledgedResponse = context.client.admin().indices().suspendUntil { delete(deleteRequest, it) } - if (!response.isAcknowledged) { - logger.error("Shrink action failed to delete target index during cleanup after a failure") + // Use plugin level permissions when deleting the failed target shrink index after a failure + context!!.client.threadPool().threadContext.stashContext().use { + val deleteRequest = DeleteIndexRequest(shrinkActionProperties!!.targetIndexName) + val response: AcknowledgedResponse = + context!!.client.admin().indices().suspendUntil { delete(deleteRequest, it) } + if (!response.isAcknowledged) { + logger.error("Shrink action failed to delete target index during cleanup after a failure") + } } } catch (e: Exception) { logger.error("Shrink action failed while trying to delete the target index after a failure: $e") } try { - releaseShrinkLock(shrinkActionProperties, context.jobContext, logger) + releaseShrinkLock(shrinkActionProperties!!, context!!.jobContext, logger) } catch (e: Exception) { logger.error("Shrink action failed while trying to release the node lock after a failure: $e") } + shrinkActionProperties = null } private suspend fun clearAllocationSettings(context: StepContext, index: String): Boolean { @@ -132,11 +143,12 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { } override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { - // Saving maxNumSegments in ActionProperties after the force merge operation has begun so that if a ChangePolicy occurred - // in between this step and WaitForForceMergeStep, a cached segment count expected from the operation is available - val currentActionMetaData = currentMetadata.actionMetaData return currentMetadata.copy( - actionMetaData = currentActionMetaData?.copy(), + actionMetaData = currentMetadata.actionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), transitionTo = null, info = info diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt index 66254ffee..29d34155b 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt @@ -7,10 +7,10 @@ @file:JvmName("ManagedIndexUtils") package org.opensearch.indexmanagement.indexstatemanagement.util -//import inet.ipaddr.IPAddressString +// import inet.ipaddr.IPAddressString import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.withContext -//import org.apache.logging.log4j.LogManager +// import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.Logger import org.opensearch.action.delete.DeleteRequest import org.opensearch.action.get.GetRequest @@ -19,7 +19,7 @@ import org.opensearch.action.index.IndexRequest import org.opensearch.action.search.SearchRequest import org.opensearch.action.support.WriteRequest import org.opensearch.action.update.UpdateRequest -//import org.opensearch.alerting.destination.message.BaseMessage +// import org.opensearch.alerting.destination.message.BaseMessage import org.opensearch.client.Client import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.unit.TimeValue diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt index 96ac2bf8c..d8103b8ba 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -16,8 +16,9 @@ import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.cluster.routing.allocation.DiskThresholdSettings import org.opensearch.common.settings.ClusterSettings import org.opensearch.common.settings.Settings +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_NAME +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_TYPE import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep -import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties @@ -44,14 +45,15 @@ suspend fun releaseShrinkLock( } } -suspend fun releaseShrinkLock( - lock: LockModel, +suspend fun deleteShrinkLock( + shrinkActionProperties: ShrinkActionProperties, jobExecutionContext: JobExecutionContext, logger: Logger ) { - val released: Boolean = jobExecutionContext.lockService.suspendUntil { release(lock, it) } - if (!released) { - logger.error("Failed to release Shrink action lock on node [${lock.resource[AttemptMoveShardsStep.RESOURCE_NAME] as String}]") + val lockID = getShrinkLockID(shrinkActionProperties.nodeName) + val deleted: Boolean = jobExecutionContext.lockService.suspendUntil { deleteLock(lockID, it) } + if (!deleted) { + logger.error("Failed to delete Shrink action lock on node [${shrinkActionProperties.nodeName}]") } } @@ -61,7 +63,6 @@ suspend fun renewShrinkLock( logger: Logger ): LockModel? { val lock: LockModel = getShrinkLockModel(shrinkActionProperties, jobExecutionContext) - println(lock.lockDurationSeconds) return try { jobExecutionContext.lockService.suspendUntil { renewLock(lock, it) } } catch (e: Exception) { @@ -77,7 +78,6 @@ fun getShrinkLockModel( return getShrinkLockModel( shrinkActionProperties.nodeName, jobExecutionContext.jobIndexName, - jobExecutionContext.jobId, shrinkActionProperties.lockEpochSecond, shrinkActionProperties.lockPrimaryTerm, shrinkActionProperties.lockSeqNo, @@ -89,20 +89,16 @@ fun getShrinkLockModel( fun getShrinkLockModel( nodeName: String, jobIndexName: String, - jobId: String, lockEpochSecond: Long, lockPrimaryTerm: Long, lockSeqNo: Long, lockDurationSecond: Long ): LockModel { - val resource: HashMap = HashMap() - resource[WaitForMoveShardsStep.RESOURCE_NAME] = nodeName + val lockID = getShrinkLockID(nodeName) val lockCreationInstant: Instant = Instant.ofEpochSecond(lockEpochSecond) return LockModel( jobIndexName, - jobId, - WaitForMoveShardsStep.RESOURCE_TYPE, - resource as Map?, + lockID, lockCreationInstant, lockDurationSecond, false, @@ -111,6 +107,19 @@ fun getShrinkLockModel( ) } +// Returns copied ShrinkActionProperties with the details of the provided lock added in +fun getUpdatedShrinkActionProperties(shrinkActionProperties: ShrinkActionProperties, lock: LockModel): ShrinkActionProperties { + return ShrinkActionProperties( + shrinkActionProperties.nodeName, + shrinkActionProperties.targetIndexName, + shrinkActionProperties.targetNumShards, + lock.primaryTerm, + lock.seqNo, + lock.lockTime.epochSecond, + lock.lockDurationSeconds + ) +} + fun getActionStartTime(managedIndexMetaData: ManagedIndexMetaData): Instant { val actionMetadata = managedIndexMetaData.actionMetaData // Return the action start time, or if that is null return now @@ -123,12 +132,13 @@ fun getActionStartTime(managedIndexMetaData: ManagedIndexMetaData): Instant { * parameter will return 0, and vice versa for when the values are set as bytes. This method provides a single place to * parse either and get the byte value back. */ +@Suppress("MagicNumber") fun getFreeBytesThresholdHigh(settings: Settings, clusterSettings: ClusterSettings?, totalNodeBytes: Long): Long { val diskThresholdSettings = DiskThresholdSettings(settings, clusterSettings) // Depending on how a user provided input, this setting may be a percentage or byte value val diskThresholdPercent = diskThresholdSettings.freeDiskThresholdHigh val diskThresholdBytes = diskThresholdSettings.freeBytesThresholdHigh - // If the disk threshold is set as a percentage, use it and convert it to bytes. If + // If the disk threshold is set as a percentage, use it and convert it to bytes return if (diskThresholdPercent > 0.001) { // If the user set value is 95%, diskThresholdPercent will be returned as 5% from the DiskThresholdSettings object ((diskThresholdPercent / 100) * totalNodeBytes).toLong() @@ -170,3 +180,7 @@ suspend fun clearReadOnlyAndRouting(index: String, client: Client): Boolean { } return true } + +fun getShrinkLockID(nodeName: String): String { + return "$LOCK_RESOURCE_TYPE-$LOCK_RESOURCE_NAME-$nodeName" +} diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index b3e4604cf..e4df73443 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -20,16 +20,14 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.waitFor import java.time.Instant import java.time.temporal.ChronoUnit -import java.util.Locale class ShrinkActionIT : IndexStateManagementRestTestCase() { - private val testIndexName = javaClass.simpleName.toLowerCase(Locale.ROOT) + private val testIndexName = javaClass.simpleName.lowercase() fun `test basic workflow number of shards`() { val logger = LogManager.getLogger(::ShrinkActionIT) val indexName = "${testIndexName}_index_1" val policyID = "${testIndexName}_testPolicyName_1" - // Create a Policy with one State that only preforms a force_merge Action val shrinkAction = ShrinkAction( numNewShards = 1, maxShardSize = null, @@ -93,7 +91,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { updateManagedIndexConfigStartTime(managedIndexConfig) val instant: Instant = Instant.ofEpochSecond(50) waitFor(instant) { - // assertTrue("Target index is not created", indexExists(targetIndexName)) + assertTrue("Target index is not created", indexExists(targetIndexName)) assertEquals(Step.StepStatus.COMPLETED, getExplainManagedIndexMetaData(indexName).stepMetaData?.stepStatus) assertEquals( AttemptShrinkStep.getSuccessMessage(targetIndexName), @@ -118,7 +116,6 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val indexName = "${testIndexName}_index_1" val policyID = "${testIndexName}_testPolicyName_1" val testMaxShardSize: ByteSizeValue = ByteSizeValue.parseBytesSizeValue("1GB", "test") - // Create a Policy with one State that only preforms a force_merge Action val shrinkAction = ShrinkAction( numNewShards = null, maxShardSize = testMaxShardSize, @@ -203,7 +200,6 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { fun `test basic workflow percentage to decrease to`() { val indexName = "${testIndexName}_index_1" val policyID = "${testIndexName}_testPolicyName_1" - // Create a Policy with one State that only preforms a force_merge Action val shrinkAction = ShrinkAction( numNewShards = null, maxShardSize = null, @@ -292,7 +288,6 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { if (nodes.size > 1) { val indexName = "${testIndexName}_index_1" val policyID = "${testIndexName}_testPolicyName_1" - // Create a Policy with one State that only preforms a force_merge Action val shrinkAction = ShrinkAction( numNewShards = null, maxShardSize = null, @@ -462,7 +457,6 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // Will change the startTime each execution so that it triggers in 2 seconds // First execution: Policy is initialized val managedIndexConfig = getExistingManagedIndexConfig(indexName) - updateManagedIndexConfigStartTime(managedIndexConfig) waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } logger.info("before attempt move shards") @@ -472,10 +466,10 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val targetIndexName = indexName + "_shrink_test" waitFor { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) - assertEquals("true", getIndexBlocksWriteSetting(indexName)) assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) val settings = getFlatSettings(indexName) val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue("Did not set allocation setting", settings.containsKey("index.routing.allocation.require._name")) assertTrue(settings.containsKey("index.routing.allocation.require._name")) assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) assertEquals( @@ -501,14 +495,38 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData assertEquals("Did not fail due to target index existing step as expected", Step.StepStatus.FAILED, stepMetadata?.stepStatus) assertEquals(AttemptShrinkStep.name, stepMetadata?.name) + val settings = getFlatSettings(indexName) + assertFalse("Did not clear allocation setting", settings.containsKey("index.routing.allocation.require._name")) + assertFalse("Did not clear index write block setting.", settings.containsKey("index.blocks.writes")) + assertNull( + "Did not clear shrink action properties", + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties + ) } - // TODO add checks for successful cleanup + + // Delete that index so it can pass + deleteIndex(targetIndexName) updateManagedIndexConfigStartTime(managedIndexConfig) waitFor { val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData assertEquals("Shrink action should have started over after failing", stepMetadata?.name, AttemptMoveShardsStep.name) - assertEquals("Step status should have been starting", Step.StepStatus.STARTING, stepMetadata?.stepStatus) + // The step status should be starting, but in the same execution will be completed. Allowing either to avoid flaky failures + val stepStatusDidReset = stepMetadata?.stepStatus == Step.StepStatus.STARTING || stepMetadata?.stepStatus == Step.StepStatus.COMPLETED + assertTrue("Step status should reset", stepStatusDidReset) + } + + waitFor { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + assertTrue("Did not set allocation setting", settings.containsKey("index.routing.allocation.require._name")) + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) } } } diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt index 7a71b3481..23684072e 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt @@ -30,6 +30,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.randomReadWriteAction import org.opensearch.indexmanagement.indexstatemanagement.randomReplicaCountActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRolloverActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRollupActionConfig +import org.opensearch.indexmanagement.indexstatemanagement.randomShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.randomSnapshotActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomTimeValueObject import org.opensearch.indexmanagement.indexstatemanagement.util.getFreeBytesThresholdHigh @@ -75,6 +76,12 @@ class ActionTests : OpenSearchTestCase() { } } + fun `test shrink action multiple shard options fails`() { + assertFailsWith(IllegalArgumentException::class, "Expected IllegalArgumentException for multiple shard options used") { + randomShrinkAction(3, randomByteSizeValue(), .30) + } + } + fun `test allocation action empty parameters fails`() { assertFailsWith(IllegalArgumentException::class, "Expected IllegalArgumentException for empty parameters") { randomAllocationActionConfig() @@ -140,6 +147,10 @@ class ActionTests : OpenSearchTestCase() { roundTripAction(randomDeleteActionConfig()) } + fun `test shrink action round trip`() { + roundTripAction(randomShrinkAction()) + } + fun `test action timeout and retry round trip`() { val builder = XContentFactory.jsonBuilder() .startObject() From 9b22178cf0782ee83ba574c72751d73b93b53772 Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Thu, 7 Apr 2022 21:57:55 +0000 Subject: [PATCH 06/13] Fixes selecting node and adds additional error logging Signed-off-by: Clay Downs --- .../step/shrink/AttemptMoveShardsStep.kt | 18 +++++++++++++----- .../step/shrink/AttemptShrinkStep.kt | 13 ++++++++++--- .../step/shrink/WaitForMoveShardsStep.kt | 9 ++++++--- .../step/shrink/WaitForShrinkStep.kt | 15 +++++++++++---- 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 5adcdb959..2ffec8160 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -17,6 +17,7 @@ import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand +import org.opensearch.cluster.routing.allocation.decider.Decision import org.opensearch.cluster.service.ClusterService import org.opensearch.common.collect.Tuple import org.opensearch.common.settings.Settings @@ -86,6 +87,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val statsStore = statsResponse.total.store val statsDocs = statsResponse.total.docs if (statsStore == null || statsDocs == null) { + logger.error("Failed to move shards in shrink action as IndicesStatsResponse was missing store or doc stats.") fail(FAILURE_MESSAGE) return this } @@ -123,15 +125,16 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { stepStatus = StepStatus.COMPLETED return this } catch (e: OpenSearchSecurityException) { - fail(getSecurityFailureMessage(e.localizedMessage), e.message) + fail(getSecurityFailureMessage(e.localizedMessage), e.message, e) return this } catch (e: Exception) { - fail(FAILURE_MESSAGE, e.message) + fail(FAILURE_MESSAGE, e.message, e) return this } } - private fun fail(message: String, cause: String? = null) { + private fun fail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED shrinkActionProperties = null @@ -153,6 +156,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val totalDocs: Long = docsStats.count val docsPerTargetShard: Long = totalDocs / numTargetShards if (docsPerTargetShard > MAXIMUM_DOCS_PER_SHARD) { + logger.error(TOO_MANY_DOCS_FAILURE_MESSAGE) fail(TOO_MANY_DOCS_FAILURE_MESSAGE) return true } @@ -170,6 +174,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val numReplicas = clusterService.state().metadata.indices[indexName].numberOfReplicas val shouldFailForceUnsafeCheck = numReplicas == 0 if (shouldFailForceUnsafeCheck) { + logger.error(UNSAFE_FAILURE_MESSAGE) fail(UNSAFE_FAILURE_MESSAGE) return true } @@ -179,7 +184,9 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { private fun targetIndexNameExists(clusterService: ClusterService, shrinkTargetIndexName: String): Boolean { val indexExists = clusterService.state().metadata.indices.containsKey(shrinkTargetIndexName) if (indexExists) { - fail(getIndexExistsMessage(shrinkTargetIndexName)) + val indexExistsMessage = getIndexExistsMessage(shrinkTargetIndexName) + logger.error(indexExistsMessage) + fail(indexExistsMessage) return true } return false @@ -269,8 +276,9 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } val clusterRerouteResponse: ClusterRerouteResponse = stepContext.client.admin().cluster().suspendUntil { reroute(clusterRerouteRequest, it) } + val numYesDecisions = clusterRerouteResponse.explanations.explanations().count { it.decisions().type().equals((Decision.Type.YES)) } // Should be the same number of yes decisions as the number of primary shards - if (clusterRerouteResponse.explanations.yesDecisionMessages.size == numberOfRerouteRequests) { + if (numYesDecisions == numberOfRerouteRequests) { suitableNodes.add(sizeNodeTuple.v2()) } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt index 493274b89..de692b414 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -45,11 +45,13 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties shrinkActionProperties = localShrinkActionProperties if (localShrinkActionProperties == null) { + logger.error(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) return this } val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) if (lock == null) { + logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") return this } @@ -68,16 +70,17 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { stepStatus = StepStatus.COMPLETED return this } catch (e: RemoteTransportException) { - cleanupAndFail(FAILURE_MESSAGE) + cleanupAndFail(FAILURE_MESSAGE, e = e) return this } catch (e: Exception) { - cleanupAndFail(FAILURE_MESSAGE, e.message) + cleanupAndFail(FAILURE_MESSAGE, e.message, e) return this } } // Sets the action to failed, clears the readonly and allocation settings on the source index, and releases the shrink lock - private suspend fun cleanupAndFail(message: String, cause: String? = null) { + private suspend fun cleanupAndFail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED // Non-null assertion !! is used to throw an exception on null which would just be caught and logged @@ -103,6 +106,7 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { } val statsStore = statsResponse.total.store if (statsStore == null) { + logger.error("Shrink action failed as indices stats request was missing store stats.") cleanupAndFail(FAILURE_MESSAGE) return false } @@ -113,11 +117,13 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { // If the node has been replaced, this will fail val node = nodeStatsResponse.nodes.firstOrNull { it.node.name == nodeName } if (node == null) { + logger.error("Shrink action failed as node stats were missing the previously selected node.") cleanupAndFail(FAILURE_MESSAGE) return false } val remainingMem = getNodeFreeMemoryAfterShrink(node, indexSizeInBytes, context.settings, context.clusterService.clusterSettings) if (remainingMem < 1L) { + logger.error("Shrink action failed as the previously selected node no longer has enough free space.") cleanupAndFail(NOT_ENOUGH_SPACE_FAILURE_MESSAGE) return false } @@ -136,6 +142,7 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { action.aliases?.forEach { req.targetIndexRequest.alias(it) } val resizeResponse: ResizeResponse = context.client.admin().indices().suspendUntil { resizeIndex(req, it) } if (!resizeResponse.isAcknowledged) { + logger.error("Shrink action failed as the resize index request was not acknowledged.") cleanupAndFail(FAILURE_MESSAGE) return false } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt index 62faf6c90..21b0590f2 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -41,11 +41,13 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties shrinkActionProperties = localShrinkActionProperties if (localShrinkActionProperties == null) { + logger.error(METADATA_FAILURE_MESSAGE) cleanupAndFail(METADATA_FAILURE_MESSAGE) return this } val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) if (lock == null) { + logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") return this } @@ -85,16 +87,17 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { } return this } catch (e: RemoteTransportException) { - cleanupAndFail(FAILURE_MESSAGE) + cleanupAndFail(FAILURE_MESSAGE, e = e) return this } catch (e: Exception) { - cleanupAndFail(FAILURE_MESSAGE, cause = e.message) + cleanupAndFail(FAILURE_MESSAGE, cause = e.message, e) return this } } // Sets the action to failed, clears the readonly and allocation settings on the source index, and releases the shrink lock - private suspend fun cleanupAndFail(message: String, cause: String? = null) { + private suspend fun cleanupAndFail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED // Non-null assertion !! is used to throw an exception on null which would just be caught and logged diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt index 2651f1600..de046ea58 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -13,6 +13,7 @@ import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep.Companion.getTimeoutFailure import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.deleteShrinkLock import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime @@ -44,11 +45,13 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties shrinkActionProperties = localShrinkActionProperties if (localShrinkActionProperties == null) { + logger.error(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) return this } val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) if (lock == null) { + logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") return this } @@ -72,17 +75,18 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { info = mapOf("message" to SUCCESS_MESSAGE) return this } catch (e: RemoteTransportException) { - cleanupAndFail(getFailureMessage(localShrinkActionProperties.targetIndexName)) + cleanupAndFail(getFailureMessage(localShrinkActionProperties.targetIndexName), e = e) return this } catch (e: Exception) { - cleanupAndFail(GENERIC_FAILURE_MESSAGE, e.message) + cleanupAndFail(GENERIC_FAILURE_MESSAGE, e.message, e) return this } } // Sets the action to failed, clears the readonly and allocation settings on the source index, deletes the target index, // and releases the shrink lock - private suspend fun cleanupAndFail(message: String, cause: String? = null) { + private suspend fun cleanupAndFail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) stepStatus = StepStatus.FAILED // Using a try/catch for each cleanup action as we should clean up as much as possible despite any failures @@ -117,6 +121,7 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).build() val response: AcknowledgedResponse = issueUpdateSettingsRequest(context.client, index, allocationSettings) if (!response.isAcknowledged) { + logger.error("Shrink action to clear the allocation settings on index [$index] following shrinking.") cleanupAndFail(getFailureMessage(index)) return false } @@ -135,7 +140,8 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: WaitForMoveShardsStep.MOVE_SHARDS_TIMEOUT_IN_SECONDS // Get ActionTimeout if given, otherwise use default timeout of 12 hours if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { - cleanupAndFail(getFailureMessage(targetIndex)) + logger.error(getTimeoutFailure(targetIndex)) + cleanupAndFail(getTimeoutFailure(targetIndex)) } else { info = mapOf("message" to getDelayedMessage(targetIndex)) stepStatus = StepStatus.CONDITION_NOT_MET @@ -163,5 +169,6 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { const val GENERIC_FAILURE_MESSAGE = "Shrink failed while waiting for shards to start." fun getDelayedMessage(newIndex: String) = "Shrink delayed because $newIndex shards not in started state." fun getFailureMessage(newIndex: String) = "Shrink failed while waiting for $newIndex shards to start." + fun getTimeoutFailure(newIndex: String) = "Shrink failed because it timed out while waiting for $newIndex shrink to finish." } } From 671822384421993909271fd610742456e7fcb5d8 Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Thu, 7 Apr 2022 22:51:57 +0000 Subject: [PATCH 07/13] Reduce test flakiness Signed-off-by: Clay Downs --- .../action/ShrinkActionIT.kt | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index e4df73443..01c68ea7b 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -59,13 +59,13 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val managedIndexConfig = getExistingManagedIndexConfig(indexName) updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } logger.info("before attempt move shards") // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) val targetIndexName = indexName + shrinkAction.targetIndexSuffix - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) assertEquals("true", getIndexBlocksWriteSetting(indexName)) assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) @@ -81,7 +81,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName // starts WaitForMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals( WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), getExplainManagedIndexMetaData(indexName).info?.get("message") @@ -101,7 +101,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // starts WaitForShrinkStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { // one primary and one replica assertTrue(getIndexShards(targetIndexName).size == 2) assertEquals( @@ -147,13 +147,13 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val managedIndexConfig = getExistingManagedIndexConfig(indexName) updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } logger.info("before attempt move shards") // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) val targetIndexName = indexName + shrinkAction.targetIndexSuffix - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) assertEquals("true", getIndexBlocksWriteSetting(indexName)) assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) @@ -169,7 +169,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName // starts WaitForMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals( WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), getExplainManagedIndexMetaData(indexName).info?.get("message") @@ -187,7 +187,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // starts WaitForShrinkStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { // one primary and one replica assertTrue(getIndexShards(targetIndexName).size == 2) assertEquals( @@ -231,12 +231,12 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val managedIndexConfig = getExistingManagedIndexConfig(indexName) updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) val targetIndexName = indexName + shrinkAction.targetIndexSuffix - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) assertEquals("true", getIndexBlocksWriteSetting(indexName)) assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) @@ -254,7 +254,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // starts WaitForMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals( WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), getExplainManagedIndexMetaData(indexName).info?.get("message") @@ -272,7 +272,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // starts WaitForShrinkStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { // one primary and one replica assertTrue(getIndexShards(targetIndexName).size == 2) assertEquals( @@ -323,11 +323,11 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { logger.info("index settings: \n ${getFlatSettings(indexName)}") updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) val targetIndexName = indexName + shrinkAction.targetIndexSuffix - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals( targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName @@ -353,7 +353,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // starts WaitForMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals( WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), getExplainManagedIndexMetaData(indexName).info?.get("message") @@ -371,7 +371,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // starts WaitForShrinkStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { // one primary and one replica assertTrue(getIndexShards(targetIndexName).size == 2) assertEquals( @@ -419,13 +419,13 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val managedIndexConfig = getExistingManagedIndexConfig(indexName) updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } logger.info("before attempt move shards") // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) // The action should be done after the no-op - waitFor { + waitFor(Instant.ofEpochSecond(60)) { val metadata = getExplainManagedIndexMetaData(indexName) assertEquals( "Did not get the no-op due to single primary shard message", @@ -458,13 +458,13 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // First execution: Policy is initialized val managedIndexConfig = getExistingManagedIndexConfig(indexName) updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } logger.info("before attempt move shards") // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) val targetIndexName = indexName + "_shrink_test" - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) val settings = getFlatSettings(indexName) @@ -480,7 +480,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName // starts WaitForMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { assertEquals( WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), getExplainManagedIndexMetaData(indexName).info?.get("message") @@ -491,7 +491,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // Wait for move should finish before this. Starts AttemptShrinkStep updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor(Instant.ofEpochSecond(50)) { + waitFor(Instant.ofEpochSecond(60)) { val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData assertEquals("Did not fail due to target index existing step as expected", Step.StepStatus.FAILED, stepMetadata?.stepStatus) assertEquals(AttemptShrinkStep.name, stepMetadata?.name) @@ -504,11 +504,14 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { ) } + // wait 5 seconds for the timeout from the retry to pass + Thread.sleep(5000L) + // Delete that index so it can pass deleteIndex(targetIndexName) updateManagedIndexConfigStartTime(managedIndexConfig) - waitFor { + waitFor(Instant.ofEpochSecond(60)) { val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData assertEquals("Shrink action should have started over after failing", stepMetadata?.name, AttemptMoveShardsStep.name) // The step status should be starting, but in the same execution will be completed. Allowing either to avoid flaky failures From 936694d52829dead1f4a7f9778f21c84f8197337 Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Mon, 11 Apr 2022 01:11:33 +0000 Subject: [PATCH 08/13] PR comments Signed-off-by: Clay Downs --- .../action/ShrinkAction.kt | 27 +++++++--- .../action/ShrinkActionParser.kt | 4 +- .../step/shrink/AttemptMoveShardsStep.kt | 54 +++++++++++++------ .../step/shrink/AttemptShrinkStep.kt | 9 +++- .../step/shrink/WaitForMoveShardsStep.kt | 9 +++- .../step/shrink/WaitForShrinkStep.kt | 9 +++- .../indexstatemanagement/util/StepUtils.kt | 9 +++- 7 files changed, 91 insertions(+), 30 deletions(-) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt index 2c3b89b1d..4987c0db4 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -6,6 +6,7 @@ package org.opensearch.indexmanagement.indexstatemanagement.action import org.opensearch.action.admin.indices.alias.Alias +import org.opensearch.common.Strings import org.opensearch.common.io.stream.StreamOutput import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.xcontent.ToXContent @@ -35,15 +36,19 @@ class ShrinkAction( if (maxShardSize != null) { require(maxShardSize.bytes > 0) { "Shrink action maxShardSize must be greater than 0." } - } else if (percentageOfSourceShards != null) { + } + if (percentageOfSourceShards != null) { require(percentageOfSourceShards > 0.0 && percentageOfSourceShards < 1.0) { "Percentage of source shards must be between 0.0 and 1.0 exclusively" } - } else if (numNewShards != null) { + } + if (numNewShards != null) { require(numNewShards > 0) { "Shrink action numNewShards must be greater than 0." } } if (targetIndexSuffix != null) { - require(!targetIndexSuffix.contains('*') && !targetIndexSuffix.contains('?')) { "Target index suffix must not contain wildcards." } + require(Strings.validFileName(targetIndexSuffix)) { + "Target index suffix must not contain the following characters ${Strings.INVALID_FILENAME_CHARS}" + } } } @@ -74,14 +79,23 @@ class ShrinkAction( AttemptMoveShardsStep.name -> waitForMoveShardsStep WaitForMoveShardsStep.name -> attemptShrinkStep AttemptShrinkStep.name -> waitForShrinkStep - else -> stepNameToStep[currentStep]!! + // We do not expect to ever hit this point, but if we do somehow, starting over is safe. + else -> attemptMoveShardsStep } } else if (currentStepStatus == Step.StepStatus.FAILED) { // If we failed at any point, retries should start from the beginning return attemptMoveShardsStep } - // step not completed - return stepNameToStep[currentStep]!! + + // step not completed, return the same step + return when (stepMetaData.name) { + AttemptMoveShardsStep.name -> attemptMoveShardsStep + WaitForMoveShardsStep.name -> waitForMoveShardsStep + AttemptShrinkStep.name -> attemptShrinkStep + WaitForShrinkStep.name -> waitForShrinkStep + // Again, we don't expect to ever hit this point + else -> attemptMoveShardsStep + } } override fun populateAction(builder: XContentBuilder, params: ToXContent.Params) { @@ -120,5 +134,6 @@ class ShrinkAction( const val FORCE_UNSAFE_FIELD = "force_unsafe" const val LOCK_RESOURCE_TYPE = "shrink" const val LOCK_RESOURCE_NAME = "node_name" + fun getSecurityFailureMessage(failure: String) = "Shrink action failed because of missing permissions: $failure" } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt index f5b7d9d99..23450c0ff 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt @@ -48,9 +48,9 @@ class ShrinkActionParser : ActionParser() { when (fieldName) { NUM_NEW_SHARDS_FIELD -> numNewShards = xcp.intValue() - MAX_SHARD_SIZE_FIELD -> maxShardSize = ByteSizeValue.parseBytesSizeValue(xcp.textOrNull(), MAX_SHARD_SIZE_FIELD) + MAX_SHARD_SIZE_FIELD -> maxShardSize = ByteSizeValue.parseBytesSizeValue(xcp.text(), MAX_SHARD_SIZE_FIELD) PERCENTAGE_OF_SOURCE_SHARDS_FIELD -> percentageOfSourceShards = xcp.doubleValue() - TARGET_INDEX_SUFFIX_FIELD -> targetIndexSuffix = xcp.textOrNull() + TARGET_INDEX_SUFFIX_FIELD -> targetIndexSuffix = xcp.text() ALIASES_FIELD -> { if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { aliases = mutableListOf() diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 2ffec8160..3a563abb2 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -6,6 +6,7 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper import org.opensearch.OpenSearchSecurityException import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse @@ -16,6 +17,7 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client import org.opensearch.cluster.metadata.IndexMetadata +import org.opensearch.cluster.metadata.MetadataCreateIndexService.validateIndexOrAliasName import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand import org.opensearch.cluster.routing.allocation.decider.Decision import org.opensearch.cluster.service.ClusterService @@ -23,6 +25,7 @@ import org.opensearch.common.collect.Tuple import org.opensearch.common.settings.Settings import org.opensearch.index.shard.DocsStats import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage import org.opensearch.indexmanagement.indexstatemanagement.model.ManagedIndexConfig import org.opensearch.indexmanagement.indexstatemanagement.util.getIntervalFromManagedIndexConfig import org.opensearch.indexmanagement.indexstatemanagement.util.getManagedIndexConfig @@ -37,9 +40,12 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedInde import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.indices.InvalidIndexNameException import org.opensearch.jobscheduler.repackage.com.cronutils.utils.VisibleForTesting import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.jobscheduler.spi.LockModel +import org.opensearch.transport.RemoteTransportException +import java.lang.RuntimeException import java.util.PriorityQueue import kotlin.math.ceil import kotlin.math.floor @@ -61,7 +67,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { try { val shrinkTargetIndexName = indexName + (action.targetIndexSuffix ?: DEFAULT_TARGET_SUFFIX) - if (targetIndexNameExists(context.clusterService, shrinkTargetIndexName)) return this + if (targetIndexNameIsInvalid(context.clusterService, shrinkTargetIndexName)) return this if (!isIndexGreen(client, indexName)) { info = mapOf("message" to INDEX_NOT_GREEN_MESSAGE) @@ -71,7 +77,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { if (shouldFailUnsafe(context.clusterService, indexName)) return this - // Fail if there is only one primary shard, as that cannot be shrunk + // If there is only one primary shard we complete the step and in getUpdatedManagedIndexMetadata will start a no-op val numOriginalShards = context.clusterService.state().metadata.indices[indexName].numberOfShards if (numOriginalShards == 1) { info = mapOf("message" to ONE_PRIMARY_SHARD_MESSAGE) @@ -127,6 +133,10 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { } catch (e: OpenSearchSecurityException) { fail(getSecurityFailureMessage(e.localizedMessage), e.message, e) return this + } catch (e: RemoteTransportException) { + val unwrappedException = ExceptionsHelper.unwrapCause(e) + fail(FAILURE_MESSAGE, cause = e.message, e = unwrappedException as java.lang.Exception) + return this } catch (e: Exception) { fail(FAILURE_MESSAGE, e.message, e) return this @@ -174,14 +184,14 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val numReplicas = clusterService.state().metadata.indices[indexName].numberOfReplicas val shouldFailForceUnsafeCheck = numReplicas == 0 if (shouldFailForceUnsafeCheck) { - logger.error(UNSAFE_FAILURE_MESSAGE) + logger.info(UNSAFE_FAILURE_MESSAGE) fail(UNSAFE_FAILURE_MESSAGE) return true } return false } - private fun targetIndexNameExists(clusterService: ClusterService, shrinkTargetIndexName: String): Boolean { + private fun targetIndexNameIsInvalid(clusterService: ClusterService, shrinkTargetIndexName: String): Boolean { val indexExists = clusterService.state().metadata.indices.containsKey(shrinkTargetIndexName) if (indexExists) { val indexExistsMessage = getIndexExistsMessage(shrinkTargetIndexName) @@ -189,6 +199,10 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { fail(indexExistsMessage) return true } + val exceptionGenerator: (String, String) -> RuntimeException = { index_name, reason -> InvalidIndexNameException(index_name, reason) } + // If the index name is invalid for any reason, this will throw an exception giving the reason why in the message. + // That will be displayed to the user as the cause. + validateIndexOrAliasName(shrinkTargetIndexName, exceptionGenerator) return false } @@ -248,19 +262,21 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { ): List { val nodesStatsReq = NodesStatsRequest().addMetric(OS_METRIC) val nodeStatsResponse: NodesStatsResponse = stepContext.client.admin().cluster().suspendUntil { nodesStats(nodesStatsReq, it) } - val nodesList = nodeStatsResponse.nodes - // Sort in increasing order of keys, in our case this is memory left + val nodesList = nodeStatsResponse.nodes.filter { it.node.isDataNode } + // Sort in increasing order of keys, in our case this is memory remaining val comparator = kotlin.Comparator { o1: Tuple, o2: Tuple -> o1.v1().compareTo(o2.v1()) } val nodesWithSpace = PriorityQueue(comparator) for (node in nodesList) { + // Gets the amount of memory in the node which will be free below the high watermark level after adding 2*indexSizeInBytes, + // as the source index is duplicated during the shrink val remainingMem = getNodeFreeMemoryAfterShrink(node, indexSizeInBytes, stepContext.settings, stepContext.clusterService.clusterSettings) if (remainingMem > 0L) { nodesWithSpace.add(Tuple(remainingMem, node.node.name)) } } val suitableNodes: ArrayList = ArrayList() - // For each node, do a dry run of moving all shards to the node to make sure there is enough space. - // This should be rejected if allocation puts it above the low disk watermark setting + // For each node, do a dry run of moving all shards to the node to make sure that there aren't any other blockers + // to the allocation. for (sizeNodeTuple in nodesWithSpace) { val targetNodeName = sizeNodeTuple.v2() val indexName = stepContext.metadata.index @@ -296,12 +312,16 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { return getGreatestFactorLessThan(numOriginalShards, numTargetShards) } // case where the user specifies a max shard size in the target index - val maxShardSizeInBytes = action.maxShardSize!!.bytes - // ceiling ensures that numTargetShards is never less than 1 - val minNumTargetShards = ceil(indexSize / maxShardSizeInBytes.toDouble()).toInt() - // In order to not violate the max shard size condition, this value must be >= minNumTargetShards. - // If that value doesn't exist, numOriginalShards will be returned - return getMinFactorGreaterThan(numOriginalShards, minNumTargetShards) + if (action.maxShardSize != null) { + val maxShardSizeInBytes = action.maxShardSize.bytes + // ceiling ensures that numTargetShards is never less than 1 + val minNumTargetShards = ceil(indexSize / maxShardSizeInBytes.toDouble()).toInt() + // In order to not violate the max shard size condition, this value must be >= minNumTargetShards. + // If that value doesn't exist, numOriginalShards will be returned + return getMinFactorGreaterThan(numOriginalShards, minNumTargetShards) + } + // Shrink action validation requires that at least one of the above will not be null, but return numOriginalShards for completion + return numOriginalShards } /* @@ -371,20 +391,20 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { const val name = "attempt_move_shards_step" const val UPDATE_FAILED_MESSAGE = "Shrink failed because shard settings could not be updated." const val NO_AVAILABLE_NODES_MESSAGE = - "There are no available nodes for to move to to execute a shrink. Delaying until node becomes available." + "There are no available nodes to move to to execute a shrink. Delaying until node becomes available." const val UNSAFE_FAILURE_MESSAGE = "Shrink failed because index has no replicas and force_unsafe is not set to true." const val ONE_PRIMARY_SHARD_MESSAGE = "Shrink action did not do anything because source index only has one primary shard." const val TOO_MANY_DOCS_FAILURE_MESSAGE = "Shrink failed because there would be too many documents on each target shard following the shrink." const val INDEX_NOT_GREEN_MESSAGE = "Shrink action cannot start moving shards as the index is not green." const val FAILURE_MESSAGE = "Shrink failed to start moving shards." private const val DEFAULT_LOCK_INTERVAL = 3L * 60L * 60L // Default lock interval is 3 hours in seconds - private const val MILLISECONDS_IN_SECOND = 1000 + private const val MILLISECONDS_IN_SECOND = 1000L + const val THIRTY_SECONDS_IN_MILLIS = 30L * MILLISECONDS_IN_SECOND private const val JOB_INTERVAL_LOCK_MULTIPLIER = 3 private const val LOCK_BUFFER_SECONDS = 1800 private const val MAXIMUM_DOCS_PER_SHARD = 0x80000000 // The maximum number of documents per shard is 2^31 fun getSuccessMessage(node: String) = "Successfully started moving the shards to $node." fun getIndexExistsMessage(newIndex: String) = "Shrink failed because $newIndex already exists." - fun getSecurityFailureMessage(failure: String) = "Shrink action failed because of missing permissions: $failure" // If we couldn't get the job interval for the lock, use the default of 12 hours. // Lock is 3x + 30 minutes the job interval to allow the next step's execution to extend the lock without losing it. // If user sets maximum jitter, it could be 2x the job interval before the next step is executed. diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt index de692b414..dd90ff593 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -6,6 +6,8 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchSecurityException import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse import org.opensearch.action.admin.indices.shrink.ResizeRequest @@ -14,6 +16,7 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage import org.opensearch.indexmanagement.indexstatemanagement.util.INDEX_NUMBER_OF_SHARDS import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemoryAfterShrink @@ -69,8 +72,12 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { info = mapOf("message" to getSuccessMessage(localShrinkActionProperties.targetIndexName)) stepStatus = StepStatus.COMPLETED return this + } catch (e: OpenSearchSecurityException) { + cleanupAndFail(getSecurityFailureMessage(e.localizedMessage), e.message, e) + return this } catch (e: RemoteTransportException) { - cleanupAndFail(FAILURE_MESSAGE, e = e) + val unwrappedException = ExceptionsHelper.unwrapCause(e) + cleanupAndFail(FAILURE_MESSAGE, cause = e.message, e = unwrappedException as Exception) return this } catch (e: Exception) { cleanupAndFail(FAILURE_MESSAGE, e.message, e) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt index 21b0590f2..820e0b5ca 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -6,10 +6,13 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchSecurityException import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.admin.indices.stats.ShardStats import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock @@ -86,8 +89,12 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { checkTimeOut(context, numShardsNotOnNode, numShardsNotInSync, nodeToMoveOnto) } return this + } catch (e: OpenSearchSecurityException) { + cleanupAndFail(getSecurityFailureMessage(e.localizedMessage), e.message, e) + return this } catch (e: RemoteTransportException) { - cleanupAndFail(FAILURE_MESSAGE, e = e) + val unwrappedException = ExceptionsHelper.unwrapCause(e) + cleanupAndFail(FAILURE_MESSAGE, cause = e.message, e = unwrappedException as Exception) return this } catch (e: Exception) { cleanupAndFail(FAILURE_MESSAGE, cause = e.message, e) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt index de046ea58..fce1ce8bc 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -6,6 +6,8 @@ package org.opensearch.indexmanagement.indexstatemanagement.step.shrink import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchSecurityException import org.opensearch.action.admin.indices.delete.DeleteIndexRequest import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse @@ -13,6 +15,7 @@ import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep.Companion.getTimeoutFailure import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.deleteShrinkLock @@ -74,8 +77,12 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { stepStatus = StepStatus.COMPLETED info = mapOf("message" to SUCCESS_MESSAGE) return this + } catch (e: OpenSearchSecurityException) { + cleanupAndFail(getSecurityFailureMessage(e.localizedMessage), e.message, e) + return this } catch (e: RemoteTransportException) { - cleanupAndFail(getFailureMessage(localShrinkActionProperties.targetIndexName), e = e) + val unwrappedException = ExceptionsHelper.unwrapCause(e) + cleanupAndFail(GENERIC_FAILURE_MESSAGE, cause = e.message, e = unwrappedException as java.lang.Exception) return this } catch (e: Exception) { cleanupAndFail(GENERIC_FAILURE_MESSAGE, e.message, e) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt index d8103b8ba..5f0f3db76 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -16,6 +16,7 @@ import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.cluster.routing.allocation.DiskThresholdSettings import org.opensearch.common.settings.ClusterSettings import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.TimeValue import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_NAME import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_TYPE import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep @@ -164,9 +165,13 @@ fun getNodeFreeMemoryAfterShrink(node: NodeStats, indexSizeInBytes: Long, settin return -1L } -suspend fun isIndexGreen(client: Client, indexName: String): Boolean { +suspend fun isIndexGreen( + client: Client, + indexName: String, + timeout: TimeValue = TimeValue(AttemptMoveShardsStep.THIRTY_SECONDS_IN_MILLIS) +): Boolean { // get index health, waiting for a green status - val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus() + val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus().timeout(timeout) val response: ClusterHealthResponse = client.admin().cluster().suspendUntil { health(healthReq, it) } // The request was set to wait for green index, if the request timed out, the index never was green return !response.isTimedOut From b824d6f65dd79525e3e119f925ddecf8d0a2a26e Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Tue, 12 Apr 2022 18:40:59 +0000 Subject: [PATCH 09/13] Changes jobContext to lockService Signed-off-by: Clay Downs --- .../indexstatemanagement/model/StepContext.kt | 6 ++--- .../ManagedIndexRunner.kt | 4 +++- .../step/shrink/AttemptMoveShardsStep.kt | 15 ++++++------ .../step/shrink/AttemptShrinkStep.kt | 4 ++-- .../step/shrink/WaitForMoveShardsStep.kt | 4 ++-- .../step/shrink/WaitForShrinkStep.kt | 6 ++--- .../indexstatemanagement/util/StepUtils.kt | 24 +++++++++---------- .../step/AttemptCloseStepTests.kt | 16 ++++++------- .../step/AttemptDeleteStepTests.kt | 12 +++++----- .../step/AttemptOpenStepTests.kt | 10 ++++---- .../step/AttemptSetIndexPriorityStepTests.kt | 12 +++++----- .../step/AttemptSetReplicaCountStepTests.kt | 10 ++++---- .../step/AttemptSnapshotStepTests.kt | 18 +++++++------- .../step/AttemptTransitionStepTests.kt | 10 ++++---- .../step/SetReadOnlyStepTests.kt | 10 ++++---- .../step/SetReadWriteStepTests.kt | 10 ++++---- .../step/WaitForRollupCompletionStepTests.kt | 6 ++--- .../step/WaitForSnapshotStepTests.kt | 24 +++++++++---------- 18 files changed, 102 insertions(+), 99 deletions(-) diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt index b1c60d85f..6ee4ff4f9 100644 --- a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt @@ -10,7 +10,7 @@ import org.opensearch.cluster.service.ClusterService import org.opensearch.common.settings.Settings import org.opensearch.common.util.concurrent.ThreadContext import org.opensearch.commons.authuser.User -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService class StepContext( @@ -21,9 +21,9 @@ class StepContext( val user: User?, val scriptService: ScriptService, val settings: Settings, - val jobContext: JobExecutionContext + val lockService: LockService ) { fun getUpdatedContext(metadata: ManagedIndexMetaData): StepContext { - return StepContext(metadata, this.clusterService, this.client, this.threadContext, this.user, this.scriptService, this.settings, this.jobContext) + return StepContext(metadata, this.clusterService, this.client, this.threadContext, this.user, this.scriptService, this.settings, this.lockService) } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt index be30fbda7..9074db28a 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt @@ -304,7 +304,9 @@ object ManagedIndexRunner : val state = policy.getStateToExecute(managedIndexMetaData) val action: Action? = state?.getActionToExecute(managedIndexMetaData, indexMetadataProvider) - val stepContext = StepContext(managedIndexMetaData, clusterService, client, threadPool.threadContext, policy.user, scriptService, settings, jobContext) + val stepContext = StepContext( + managedIndexMetaData, clusterService, client, threadPool.threadContext, policy.user, scriptService, settings, jobContext.lockService + ) val step: Step? = action?.getStepToExecute(stepContext) val currentActionMetaData = action?.getUpdatedActionMetadata(managedIndexMetaData, state.name) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 3a563abb2..1d18cd77e 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -24,6 +24,7 @@ import org.opensearch.cluster.service.ClusterService import org.opensearch.common.collect.Tuple import org.opensearch.common.settings.Settings import org.opensearch.index.shard.DocsStats +import org.opensearch.indexmanagement.IndexManagementPlugin.Companion.INDEX_MANAGEMENT_INDEX import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage import org.opensearch.indexmanagement.indexstatemanagement.model.ManagedIndexConfig @@ -42,8 +43,8 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData import org.opensearch.indices.InvalidIndexNameException import org.opensearch.jobscheduler.repackage.com.cronutils.utils.VisibleForTesting -import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.jobscheduler.spi.LockModel +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.transport.RemoteTransportException import java.lang.RuntimeException import java.util.PriorityQueue @@ -108,7 +109,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { // Get the job interval to use in determining the lock length val interval = getJobIntervalSeconds(context.metadata.indexUuid, client) // iterate through the nodes and try to acquire a lock on one - val lockToNodeName: Pair? = acquireLockFromNodeList(context.jobContext, suitableNodes, interval) + val lockToNodeName: Pair? = acquireLockFromNodeList(context.lockService, suitableNodes, interval) if (lockToNodeName == null) { logger.info("$indexName could not find available node to shrink onto.") info = mapOf("message" to NO_AVAILABLE_NODES_MESSAGE) @@ -211,7 +212,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { .put(IndexMetadata.SETTING_BLOCKS_WRITE, true) .put(ROUTING_SETTING, node) .build() - val jobContext = stepContext.jobContext + val lockService = stepContext.lockService var response: AcknowledgedResponse? = null val isUpdateAcknowledged: Boolean try { @@ -220,7 +221,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { isUpdateAcknowledged = response != null && response.isAcknowledged if (!isUpdateAcknowledged) { fail(UPDATE_FAILED_MESSAGE) - val released: Boolean = jobContext.lockService.suspendUntil { release(lock, it) } + val released: Boolean = lockService.suspendUntil { release(lock, it) } if (!released) { logger.error("Failed to release Shrink action lock on node [$node]") } @@ -234,14 +235,14 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { * is successfully acquired and the name of the node it acquired the lock on in a pair. */ private suspend fun acquireLockFromNodeList( - jobContext: JobExecutionContext, + lockService: LockService, suitableNodes: List, jobIntervalSeconds: Long? ): Pair? { for (nodeName in suitableNodes) { val lockID = getShrinkLockID(nodeName) - val lock: LockModel? = jobContext.lockService.suspendUntil { - acquireLockWithId(jobContext.jobIndexName, getShrinkLockDuration(jobIntervalSeconds), lockID, it) + val lock: LockModel? = lockService.suspendUntil { + acquireLockWithId(INDEX_MANAGEMENT_INDEX, getShrinkLockDuration(jobIntervalSeconds), lockID, it) } if (lock != null) { return lock to nodeName diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt index dd90ff593..07a1bac9c 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -52,7 +52,7 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) return this } - val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) + val lock = renewShrinkLock(localShrinkActionProperties, context.lockService, logger) if (lock == null) { logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") @@ -97,7 +97,7 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } try { - releaseShrinkLock(shrinkActionProperties!!, context!!.jobContext, logger) + releaseShrinkLock(shrinkActionProperties!!, context!!.lockService, logger) } catch (e: Exception) { logger.error("Shrink action failed while trying to release the node lock after a failure: $e") } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt index 820e0b5ca..b68a66638 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -48,7 +48,7 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { cleanupAndFail(METADATA_FAILURE_MESSAGE) return this } - val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) + val lock = renewShrinkLock(localShrinkActionProperties, context.lockService, logger) if (lock == null) { logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") @@ -114,7 +114,7 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } try { - releaseShrinkLock(shrinkActionProperties!!, context!!.jobContext, logger) + releaseShrinkLock(shrinkActionProperties!!, context!!.lockService, logger) } catch (e: Exception) { logger.error("Shrink action failed while trying to release the node lock after a failure: $e") } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt index fce1ce8bc..a68cd882b 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -52,7 +52,7 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) return this } - val lock = renewShrinkLock(localShrinkActionProperties, context.jobContext, logger) + val lock = renewShrinkLock(localShrinkActionProperties, context.lockService, logger) if (lock == null) { logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") @@ -73,7 +73,7 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { if (!clearAllocationSettings(context, targetIndex)) return this if (!clearAllocationSettings(context, context.metadata.index)) return this - deleteShrinkLock(localShrinkActionProperties, context.jobContext, logger) + deleteShrinkLock(localShrinkActionProperties, context.lockService, logger) stepStatus = StepStatus.COMPLETED info = mapOf("message" to SUCCESS_MESSAGE) return this @@ -117,7 +117,7 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { logger.error("Shrink action failed while trying to delete the target index after a failure: $e") } try { - releaseShrinkLock(shrinkActionProperties!!, context!!.jobContext, logger) + releaseShrinkLock(shrinkActionProperties!!, context!!.lockService, logger) } catch (e: Exception) { logger.error("Shrink action failed while trying to release the node lock after a failure: $e") } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt index 5f0f3db76..245b83f56 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -17,14 +17,15 @@ import org.opensearch.cluster.routing.allocation.DiskThresholdSettings import org.opensearch.common.settings.ClusterSettings import org.opensearch.common.settings.Settings import org.opensearch.common.unit.TimeValue +import org.opensearch.indexmanagement.IndexManagementPlugin.Companion.INDEX_MANAGEMENT_INDEX import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_NAME import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_TYPE import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties -import org.opensearch.jobscheduler.spi.JobExecutionContext import org.opensearch.jobscheduler.spi.LockModel +import org.opensearch.jobscheduler.spi.utils.LockService import java.lang.Exception import java.time.Instant @@ -36,11 +37,11 @@ suspend fun issueUpdateSettingsRequest(client: Client, indexName: String, settin suspend fun releaseShrinkLock( shrinkActionProperties: ShrinkActionProperties, - jobExecutionContext: JobExecutionContext, + lockService: LockService, logger: Logger ) { - val lock: LockModel = getShrinkLockModel(shrinkActionProperties, jobExecutionContext) - val released: Boolean = jobExecutionContext.lockService.suspendUntil { release(lock, it) } + val lock: LockModel = getShrinkLockModel(shrinkActionProperties) + val released: Boolean = lockService.suspendUntil { release(lock, it) } if (!released) { logger.error("Failed to release Shrink action lock on node [${shrinkActionProperties.nodeName}]") } @@ -48,11 +49,11 @@ suspend fun releaseShrinkLock( suspend fun deleteShrinkLock( shrinkActionProperties: ShrinkActionProperties, - jobExecutionContext: JobExecutionContext, + lockService: LockService, logger: Logger ) { val lockID = getShrinkLockID(shrinkActionProperties.nodeName) - val deleted: Boolean = jobExecutionContext.lockService.suspendUntil { deleteLock(lockID, it) } + val deleted: Boolean = lockService.suspendUntil { deleteLock(lockID, it) } if (!deleted) { logger.error("Failed to delete Shrink action lock on node [${shrinkActionProperties.nodeName}]") } @@ -60,12 +61,12 @@ suspend fun deleteShrinkLock( suspend fun renewShrinkLock( shrinkActionProperties: ShrinkActionProperties, - jobExecutionContext: JobExecutionContext, + lockService: LockService, logger: Logger ): LockModel? { - val lock: LockModel = getShrinkLockModel(shrinkActionProperties, jobExecutionContext) + val lock: LockModel = getShrinkLockModel(shrinkActionProperties) return try { - jobExecutionContext.lockService.suspendUntil { renewLock(lock, it) } + lockService.suspendUntil { renewLock(lock, it) } } catch (e: Exception) { logger.error("Failed to renew Shrink action lock on node [${shrinkActionProperties.nodeName}]: $e") null @@ -73,12 +74,11 @@ suspend fun renewShrinkLock( } fun getShrinkLockModel( - shrinkActionProperties: ShrinkActionProperties, - jobExecutionContext: JobExecutionContext + shrinkActionProperties: ShrinkActionProperties ): LockModel { return getShrinkLockModel( shrinkActionProperties.nodeName, - jobExecutionContext.jobIndexName, + INDEX_MANAGEMENT_INDEX, shrinkActionProperties.lockEpochSecond, shrinkActionProperties.lockPrimaryTerm, shrinkActionProperties.lockSeqNo, diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt index 88a3ea9e5..419a4330c 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt @@ -22,7 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.close.AttemptClo import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.snapshots.SnapshotInProgressException import org.opensearch.test.OpenSearchTestCase @@ -34,7 +34,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test close step sets step status to completed when successful`() { val closeIndexResponse = CloseIndexResponse(true, true, listOf()) @@ -43,7 +43,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -57,7 +57,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -71,7 +71,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -85,7 +85,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -99,7 +99,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -113,7 +113,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt index dbdd72f72..ceeedcd0a 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt @@ -22,7 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.delete.AttemptDe import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.snapshots.SnapshotInProgressException import org.opensearch.test.OpenSearchTestCase @@ -32,7 +32,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test delete step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) @@ -41,7 +41,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -55,7 +55,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -69,7 +69,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) @@ -84,7 +84,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt index 8ff70e89e..73bef06bd 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt @@ -22,7 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.open.AttemptOpen import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -32,7 +32,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test open step sets step status to failed when not acknowledged`() { val openIndexResponse = OpenIndexResponse(false, false) @@ -41,7 +41,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -55,7 +55,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -69,7 +69,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt index cef20a075..49dfb4fa0 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt @@ -23,7 +23,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.indexpriority.At import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -33,7 +33,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test set priority step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) @@ -43,7 +43,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -58,7 +58,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -73,7 +73,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) @@ -89,7 +89,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt index 387d20c95..0c66e3b97 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt @@ -23,7 +23,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.replicacount.Att import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -33,7 +33,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test replica step sets step status to failed when not acknowledged`() { val replicaCountResponse = AcknowledgedResponse(false) @@ -43,7 +43,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -58,7 +58,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -73,7 +73,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt index 11d4faa00..9d83cdd5b 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt @@ -30,7 +30,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionPrope import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.ingest.TestTemplateService.MockTemplateScript -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.rest.RestStatus import org.opensearch.script.ScriptService import org.opensearch.script.TemplateScript @@ -45,7 +45,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { private val settings: Settings = Settings.EMPTY private val snapshotAction = randomSnapshotActionConfig("repo", "snapshot-name") private val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(AttemptSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() @Before fun settings() { @@ -60,7 +60,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.ACCEPTED) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -69,7 +69,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.OK) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -78,7 +78,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.INTERNAL_SERVER_ERROR) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -90,7 +90,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -103,7 +103,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -116,7 +116,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -129,7 +129,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt index 041c72348..f35d75feb 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt @@ -37,7 +37,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.rest.RestStatus import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase @@ -61,7 +61,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock { on { state() } doReturn clusterState } private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() private val docsStats: DocsStats = mock() private val primaries: CommonStats = mock { on { getDocs() } doReturn docsStats } @@ -85,7 +85,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, lockService) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -103,7 +103,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, lockService) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -121,7 +121,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, lockService) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt index 625b72c36..0dfe0071d 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt @@ -22,7 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.readonly.SetRead import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -32,7 +32,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test read only step sets step status to failed when not acknowledged`() { val setReadOnlyResponse = AcknowledgedResponse(false) @@ -41,7 +41,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -55,7 +55,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -69,7 +69,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt index c5a8ca75a..3ea8cd631 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt @@ -22,7 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.readwrite.SetRea import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -32,7 +32,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test read write step sets step status to failed when not acknowledged`() { val setReadWriteResponse = AcknowledgedResponse(false) @@ -41,7 +41,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -55,7 +55,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -69,7 +69,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt index 7d0293cfa..d1b11a3e5 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt @@ -18,7 +18,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaD import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import java.time.Instant @@ -42,12 +42,12 @@ class WaitForRollupCompletionStepTests : OpenSearchTestCase() { ) private val client: Client = mock() private val step = WaitForRollupCompletionStep() - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() fun `test wait for rollup when missing rollup id`() { val actionMetadata = metadata.actionMetaData!!.copy(actionProperties = ActionProperties()) val metadata = metadata.copy(actionMetaData = actionMetadata) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) val step = WaitForRollupCompletionStep() runBlocking { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt index 437772b14..a701b7680 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt @@ -27,7 +27,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaD import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext -import org.opensearch.jobscheduler.spi.JobExecutionContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.snapshots.Snapshot import org.opensearch.snapshots.SnapshotId @@ -39,7 +39,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val jobContext: JobExecutionContext = mock() + private val lockService: LockService = mock() val snapshot = "snapshot-name" fun `test snapshot missing snapshot name in action properties`() { @@ -50,7 +50,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, emptyActionProperties), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -62,7 +62,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, nullActionProperties), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -82,7 +82,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -94,7 +94,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -106,7 +106,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -118,7 +118,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -130,7 +130,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -149,7 +149,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -164,7 +164,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -179,7 +179,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, jobContext) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) From c9e565d338bea0e9a72075a79ed59aefc94f2ed0 Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Wed, 13 Apr 2022 00:14:44 +0000 Subject: [PATCH 10/13] Changes index suffix to mustache template Signed-off-by: Clay Downs --- .../action/ShrinkAction.kt | 18 ++++++------- .../action/ShrinkActionParser.kt | 13 +++++----- .../step/shrink/AttemptMoveShardsStep.kt | 25 ++++++++++++++++++- .../mappings/opendistro-ism-config.json | 5 ++-- .../indexstatemanagement/TestHelpers.kt | 11 ++++---- .../action/ShrinkActionIT.kt | 23 +++++++++-------- .../step/AttemptCloseStepTests.kt | 2 +- .../step/AttemptDeleteStepTests.kt | 2 +- .../step/AttemptOpenStepTests.kt | 2 +- .../step/AttemptSetIndexPriorityStepTests.kt | 2 +- .../step/AttemptSetReplicaCountStepTests.kt | 2 +- .../step/AttemptSnapshotStepTests.kt | 2 +- .../step/AttemptTransitionStepTests.kt | 2 +- .../step/SetReadOnlyStepTests.kt | 2 +- .../step/SetReadWriteStepTests.kt | 2 +- .../step/WaitForRollupCompletionStepTests.kt | 2 +- .../step/WaitForSnapshotStepTests.kt | 2 +- .../cached-opendistro-ism-config.json | 5 ++-- 18 files changed, 76 insertions(+), 46 deletions(-) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt index 4987c0db4..da8d379f1 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -6,11 +6,11 @@ package org.opensearch.indexmanagement.indexstatemanagement.action import org.opensearch.action.admin.indices.alias.Alias -import org.opensearch.common.Strings import org.opensearch.common.io.stream.StreamOutput import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.xcontent.ToXContent import org.opensearch.common.xcontent.XContentBuilder +import org.opensearch.indexmanagement.indexstatemanagement.action.NotificationAction.Companion.MUSTACHE import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptShrinkStep import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep @@ -19,13 +19,14 @@ import org.opensearch.indexmanagement.opensearchapi.aliasesField import org.opensearch.indexmanagement.spi.indexstatemanagement.Action import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.script.Script @Suppress("LongParameterList") class ShrinkAction( val numNewShards: Int?, val maxShardSize: ByteSizeValue?, val percentageOfSourceShards: Double?, - val targetIndexSuffix: String?, + val targetIndexTemplate: Script?, val aliases: List?, val forceUnsafe: Boolean?, index: Int @@ -45,10 +46,8 @@ class ShrinkAction( if (numNewShards != null) { require(numNewShards > 0) { "Shrink action numNewShards must be greater than 0." } } - if (targetIndexSuffix != null) { - require(Strings.validFileName(targetIndexSuffix)) { - "Target index suffix must not contain the following characters ${Strings.INVALID_FILENAME_CHARS}" - } + if (targetIndexTemplate != null) { + require(targetIndexTemplate.lang == MUSTACHE) { "Target index name template must be a mustache script" } } } @@ -103,7 +102,7 @@ class ShrinkAction( if (numNewShards != null) builder.field(NUM_NEW_SHARDS_FIELD, numNewShards) if (maxShardSize != null) builder.field(MAX_SHARD_SIZE_FIELD, maxShardSize.stringRep) if (percentageOfSourceShards != null) builder.field(PERCENTAGE_OF_SOURCE_SHARDS_FIELD, percentageOfSourceShards) - if (targetIndexSuffix != null) builder.field(TARGET_INDEX_SUFFIX_FIELD, targetIndexSuffix) + if (targetIndexTemplate != null) builder.field(TARGET_INDEX_TEMPLATE_FIELD, targetIndexTemplate) if (aliases != null) { builder.aliasesField(aliases) } if (forceUnsafe != null) builder.field(FORCE_UNSAFE_FIELD, forceUnsafe) builder.endObject() @@ -113,7 +112,8 @@ class ShrinkAction( out.writeOptionalInt(numNewShards) out.writeOptionalWriteable(maxShardSize) out.writeOptionalDouble(percentageOfSourceShards) - out.writeOptionalString(targetIndexSuffix) + out.writeBoolean(targetIndexTemplate != null) + targetIndexTemplate?.writeTo(out) if (aliases != null) { out.writeBoolean(true) out.writeList(aliases) @@ -129,7 +129,7 @@ class ShrinkAction( const val NUM_NEW_SHARDS_FIELD = "num_new_shards" const val PERCENTAGE_OF_SOURCE_SHARDS_FIELD = "percentage_of_source_shards" const val MAX_SHARD_SIZE_FIELD = "max_shard_size" - const val TARGET_INDEX_SUFFIX_FIELD = "target_index_suffix" + const val TARGET_INDEX_TEMPLATE_FIELD = "target_index_name_template" const val ALIASES_FIELD = "aliases" const val FORCE_UNSAFE_FIELD = "force_unsafe" const val LOCK_RESOURCE_TYPE = "shrink" diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt index 23450c0ff..d38c17496 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt @@ -15,21 +15,22 @@ import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.C import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.MAX_SHARD_SIZE_FIELD import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.NUM_NEW_SHARDS_FIELD import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.PERCENTAGE_OF_SOURCE_SHARDS_FIELD -import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.TARGET_INDEX_SUFFIX_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.TARGET_INDEX_TEMPLATE_FIELD import org.opensearch.indexmanagement.spi.indexstatemanagement.Action import org.opensearch.indexmanagement.spi.indexstatemanagement.ActionParser +import org.opensearch.script.Script class ShrinkActionParser : ActionParser() { override fun fromStreamInput(sin: StreamInput): Action { val numNewShards = sin.readOptionalInt() val maxShardSize = sin.readOptionalWriteable(::ByteSizeValue) val percentageOfSourceShards = sin.readOptionalDouble() - val targetIndexSuffix = sin.readOptionalString() + val targetIndexTemplate = if (sin.readBoolean()) Script(sin) else null val aliases = if (sin.readBoolean()) sin.readList(::Alias) else null val forceUnsafe = sin.readOptionalBoolean() val index = sin.readInt() - return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexSuffix, aliases, forceUnsafe, index) + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexTemplate, aliases, forceUnsafe, index) } @Suppress("NestedBlockDepth") @@ -37,7 +38,7 @@ class ShrinkActionParser : ActionParser() { var numNewShards: Int? = null var maxShardSize: ByteSizeValue? = null var percentageOfSourceShards: Double? = null - var targetIndexSuffix: String? = null + var targetIndexTemplate: Script? = null var aliases: List? = null var forceUnsafe: Boolean? = null @@ -50,7 +51,7 @@ class ShrinkActionParser : ActionParser() { NUM_NEW_SHARDS_FIELD -> numNewShards = xcp.intValue() MAX_SHARD_SIZE_FIELD -> maxShardSize = ByteSizeValue.parseBytesSizeValue(xcp.text(), MAX_SHARD_SIZE_FIELD) PERCENTAGE_OF_SOURCE_SHARDS_FIELD -> percentageOfSourceShards = xcp.doubleValue() - TARGET_INDEX_SUFFIX_FIELD -> targetIndexSuffix = xcp.text() + TARGET_INDEX_TEMPLATE_FIELD -> targetIndexTemplate = Script.parse(xcp, Script.DEFAULT_TEMPLATE_LANG) ALIASES_FIELD -> { if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { aliases = mutableListOf() @@ -69,7 +70,7 @@ class ShrinkActionParser : ActionParser() { } } - return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexSuffix, aliases, forceUnsafe, index) + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexTemplate, aliases, forceUnsafe, index) } override fun getActionType(): String { diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 1d18cd77e..03f8772c4 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -34,6 +34,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemor import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockID import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest +import org.opensearch.indexmanagement.opensearchapi.convertToMap import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties @@ -45,6 +46,9 @@ import org.opensearch.indices.InvalidIndexNameException import org.opensearch.jobscheduler.repackage.com.cronutils.utils.VisibleForTesting import org.opensearch.jobscheduler.spi.LockModel import org.opensearch.jobscheduler.spi.utils.LockService +import org.opensearch.script.Script +import org.opensearch.script.ScriptService +import org.opensearch.script.TemplateScript import org.opensearch.transport.RemoteTransportException import java.lang.RuntimeException import java.util.PriorityQueue @@ -67,7 +71,9 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { val indexName = context.metadata.index try { - val shrinkTargetIndexName = indexName + (action.targetIndexSuffix ?: DEFAULT_TARGET_SUFFIX) + val shrinkTargetIndexName = + compileTemplate(action.targetIndexTemplate, context.metadata, indexName + DEFAULT_TARGET_SUFFIX, context.scriptService) + if (targetIndexNameIsInvalid(context.clusterService, shrinkTargetIndexName)) return this if (!isIndexGreen(client, indexName)) { @@ -151,6 +157,22 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { shrinkActionProperties = null } + private fun compileTemplate( + template: Script?, + managedIndexMetaData: ManagedIndexMetaData, + defaultValue: String, + scriptService: ScriptService + ): String { + if (template == null) return defaultValue + val contextMap = managedIndexMetaData.convertToMap().filterKeys { key -> + key in ALLOWED_TEMPLATE_FIELDS + } + val compiledValue = scriptService.compile(template, TemplateScript.CONTEXT) + .newInstance(template.params + mapOf("ctx" to contextMap)) + .execute() + return compiledValue.ifBlank { defaultValue } + } + private suspend fun getJobIntervalSeconds(indexUuid: String, client: Client): Long? { val managedIndexConfig: ManagedIndexConfig? try { @@ -411,5 +433,6 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { // If user sets maximum jitter, it could be 2x the job interval before the next step is executed. private fun getShrinkLockDuration(jobInterval: Long?) = jobInterval?.let { (it * JOB_INTERVAL_LOCK_MULTIPLIER) + LOCK_BUFFER_SECONDS } ?: DEFAULT_LOCK_INTERVAL + private val ALLOWED_TEMPLATE_FIELDS = setOf("index", "indexUuid") } } diff --git a/src/main/resources/mappings/opendistro-ism-config.json b/src/main/resources/mappings/opendistro-ism-config.json index 5bc299417..c83df1a68 100644 --- a/src/main/resources/mappings/opendistro-ism-config.json +++ b/src/main/resources/mappings/opendistro-ism-config.json @@ -441,8 +441,9 @@ "percentage_of_source_shards": { "type": "double" }, - "target_index_suffix": { - "type": "text" + "target_index_name_template": { + "type": "object", + "enabled": false }, "aliases": { "type": "object", diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt index 3dd516863..7ddadd7b9 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt @@ -140,22 +140,23 @@ fun randomRolloverActionConfig( ) } +@Suppress("ReturnCount") fun randomShrinkAction( numNewShards: Int? = null, maxShardSize: ByteSizeValue? = null, percentageOfSourceShards: Double? = null, - targetIndexSuffix: String? = if (randomBoolean()) randomAlphaOfLength(10) else null, + targetIndexTemplate: Script? = if (randomBoolean()) randomTemplateScript(randomAlphaOfLength(10)) else null, aliases: List? = if (randomBoolean()) randomList(10) { randomAlias() } else null, forceUnsafe: Boolean? = if (randomBoolean()) randomBoolean() else null ): ShrinkAction { if (numNewShards == null && maxShardSize == null && percentageOfSourceShards == null) { when (randomInt(2)) { - 0 -> return ShrinkAction(abs(randomInt()) + 1, null, null, targetIndexSuffix, aliases, forceUnsafe, 0) - 1 -> return ShrinkAction(null, randomByteSizeValue(), null, targetIndexSuffix, aliases, forceUnsafe, 0) - 2 -> return ShrinkAction(null, null, randomDoubleBetween(0.0, 1.0, true), targetIndexSuffix, aliases, forceUnsafe, 0) + 0 -> return ShrinkAction(abs(randomInt()) + 1, null, null, targetIndexTemplate, aliases, forceUnsafe, 0) + 1 -> return ShrinkAction(null, randomByteSizeValue(), null, targetIndexTemplate, aliases, forceUnsafe, 0) + 2 -> return ShrinkAction(null, null, randomDoubleBetween(0.0, 1.0, true), targetIndexTemplate, aliases, forceUnsafe, 0) } } - return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexSuffix, aliases, forceUnsafe, 0) + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexTemplate, aliases, forceUnsafe, 0) } fun randomReadOnlyActionConfig(): ReadOnlyAction { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index 01c68ea7b..c96247137 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -18,11 +18,14 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMo import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForShrinkStep import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.waitFor +import org.opensearch.script.Script +import org.opensearch.script.ScriptType import java.time.Instant import java.time.temporal.ChronoUnit class ShrinkActionIT : IndexStateManagementRestTestCase() { private val testIndexName = javaClass.simpleName.lowercase() + private val testIndexSuffix = "_shrink_test" fun `test basic workflow number of shards`() { val logger = LogManager.getLogger(::ShrinkActionIT) val indexName = "${testIndexName}_index_1" @@ -32,7 +35,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { numNewShards = 1, maxShardSize = null, percentageOfSourceShards = null, - targetIndexSuffix = "_shrink_test", + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), aliases = null, forceUnsafe = true, index = 0 @@ -64,7 +67,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - val targetIndexName = indexName + shrinkAction.targetIndexSuffix + val targetIndexName = indexName + testIndexSuffix waitFor(Instant.ofEpochSecond(60)) { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) assertEquals("true", getIndexBlocksWriteSetting(indexName)) @@ -120,7 +123,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { numNewShards = null, maxShardSize = testMaxShardSize, percentageOfSourceShards = null, - targetIndexSuffix = "_shrink_test", + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), aliases = null, forceUnsafe = true, index = 0 @@ -152,7 +155,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - val targetIndexName = indexName + shrinkAction.targetIndexSuffix + val targetIndexName = indexName + testIndexSuffix waitFor(Instant.ofEpochSecond(60)) { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) assertEquals("true", getIndexBlocksWriteSetting(indexName)) @@ -204,7 +207,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { numNewShards = null, maxShardSize = null, percentageOfSourceShards = 0.5, - targetIndexSuffix = "_shrink_test", + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), aliases = null, forceUnsafe = true, index = 0 @@ -235,7 +238,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - val targetIndexName = indexName + shrinkAction.targetIndexSuffix + val targetIndexName = indexName + testIndexSuffix waitFor(Instant.ofEpochSecond(60)) { assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) assertEquals("true", getIndexBlocksWriteSetting(indexName)) @@ -292,7 +295,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { numNewShards = null, maxShardSize = null, percentageOfSourceShards = 0.5, - targetIndexSuffix = "_shrink_test", + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), aliases = null, forceUnsafe = true, index = 0 @@ -326,7 +329,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } // Starts AttemptMoveShardsStep updateManagedIndexConfigStartTime(managedIndexConfig) - val targetIndexName = indexName + shrinkAction.targetIndexSuffix + val targetIndexName = indexName + testIndexSuffix waitFor(Instant.ofEpochSecond(60)) { assertEquals( targetIndexName, @@ -392,7 +395,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { numNewShards = null, maxShardSize = null, percentageOfSourceShards = 0.5, - targetIndexSuffix = "_shrink_test", + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), aliases = null, forceUnsafe = true, index = 0 @@ -444,7 +447,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val testPolicy = """ {"policy":{"description":"Default policy","default_state":"Shrink","states":[ {"name":"Shrink","actions":[{"retry":{"count":2,"backoff":"constant","delay":"1s"},"shrink": - {"num_new_shards":1, "target_index_suffix":"_shrink_test", "force_unsafe": "true"}}],"transitions":[]}]}} + {"num_new_shards":1, "target_index_name_template":{"source": "{{ctx.index}}_shrink_test"}, "force_unsafe": "true"}}],"transitions":[]}]}} """.trimIndent() val logger = LogManager.getLogger(::ShrinkActionIT) val indexName = "${testIndexName}_retry" diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt index 419a4330c..147268227 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt @@ -34,7 +34,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test close step sets step status to completed when successful`() { val closeIndexResponse = CloseIndexResponse(true, true, listOf()) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt index ceeedcd0a..d5faa4e48 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt @@ -32,7 +32,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test delete step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt index 73bef06bd..fe70954ba 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt @@ -32,7 +32,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test open step sets step status to failed when not acknowledged`() { val openIndexResponse = OpenIndexResponse(false, false) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt index 49dfb4fa0..e3248ae4d 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt @@ -33,7 +33,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test set priority step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt index 0c66e3b97..4959a46d4 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt @@ -33,7 +33,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test replica step sets step status to failed when not acknowledged`() { val replicaCountResponse = AcknowledgedResponse(false) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt index 9d83cdd5b..74d07b449 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt @@ -45,7 +45,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { private val settings: Settings = Settings.EMPTY private val snapshotAction = randomSnapshotActionConfig("repo", "snapshot-name") private val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(AttemptSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) @Before fun settings() { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt index f35d75feb..aeeedd7c9 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt @@ -61,7 +61,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock { on { state() } doReturn clusterState } private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) private val docsStats: DocsStats = mock() private val primaries: CommonStats = mock { on { getDocs() } doReturn docsStats } diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt index 0dfe0071d..8501f6d24 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt @@ -32,7 +32,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test read only step sets step status to failed when not acknowledged`() { val setReadOnlyResponse = AcknowledgedResponse(false) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt index 3ea8cd631..6c56fadb5 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt @@ -32,7 +32,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test read write step sets step status to failed when not acknowledged`() { val setReadWriteResponse = AcknowledgedResponse(false) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt index d1b11a3e5..b769d42e2 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt @@ -42,7 +42,7 @@ class WaitForRollupCompletionStepTests : OpenSearchTestCase() { ) private val client: Client = mock() private val step = WaitForRollupCompletionStep() - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) fun `test wait for rollup when missing rollup id`() { val actionMetadata = metadata.actionMetaData!!.copy(actionProperties = ActionProperties()) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt index a701b7680..14f22918d 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt @@ -39,7 +39,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY - private val lockService: LockService = mock() + private val lockService: LockService = LockService(mock(), clusterService) val snapshot = "snapshot-name" fun `test snapshot missing snapshot name in action properties`() { diff --git a/src/test/resources/mappings/cached-opendistro-ism-config.json b/src/test/resources/mappings/cached-opendistro-ism-config.json index 5bc299417..c83df1a68 100644 --- a/src/test/resources/mappings/cached-opendistro-ism-config.json +++ b/src/test/resources/mappings/cached-opendistro-ism-config.json @@ -441,8 +441,9 @@ "percentage_of_source_shards": { "type": "double" }, - "target_index_suffix": { - "type": "text" + "target_index_name_template": { + "type": "object", + "enabled": false }, "aliases": { "type": "object", From 18dc35dc933cfa156bbd457b9549768173e99ca4 Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Thu, 14 Apr 2022 03:22:57 +0000 Subject: [PATCH 11/13] Resets allocation and readwrite after shrink Signed-off-by: Clay Downs --- .../model/ShrinkActionProperties.kt | 19 +++++++++++++++---- .../step/shrink/AttemptMoveShardsStep.kt | 18 +++++++++++++++--- .../step/shrink/AttemptShrinkStep.kt | 4 ++-- .../step/shrink/WaitForMoveShardsStep.kt | 4 ++-- .../step/shrink/WaitForShrinkStep.kt | 6 +++--- .../indexstatemanagement/util/StepUtils.kt | 9 ++++++--- .../action/ShrinkActionIT.kt | 17 ++++++++++++++++- 7 files changed, 59 insertions(+), 18 deletions(-) diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt index 07ded7a10..8c96f7f6f 100644 --- a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt @@ -21,7 +21,9 @@ data class ShrinkActionProperties( val lockPrimaryTerm: Long, val lockSeqNo: Long, val lockEpochSecond: Long, - val lockDurationSecond: Long + val lockDurationSecond: Long, + // Used to store the original index allocation and write block setting to reapply after shrink + val originalIndexSettings: Map ) : Writeable, ToXContentFragment { override fun writeTo(out: StreamOutput) { @@ -32,6 +34,7 @@ data class ShrinkActionProperties( out.writeLong(lockSeqNo) out.writeLong(lockEpochSecond) out.writeLong(lockDurationSecond) + out.writeMap(originalIndexSettings) } override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { @@ -42,6 +45,7 @@ data class ShrinkActionProperties( builder.field(ShrinkProperties.LOCK_PRIMARY_TERM.key, lockPrimaryTerm) builder.field(ShrinkProperties.LOCK_EPOCH_SECOND.key, lockEpochSecond) builder.field(ShrinkProperties.LOCK_DURATION_SECOND.key, lockDurationSecond) + builder.field(ShrinkProperties.ORIGINAL_INDEX_SETTINGS.key, originalIndexSettings) return builder } @@ -56,8 +60,11 @@ data class ShrinkActionProperties( val lockSeqNo: Long = si.readLong() val lockEpochSecond: Long = si.readLong() val lockDurationSecond: Long = si.readLong() + val originalIndexSettings: Map = si.readMap({ it.readString() }, { it.readString() }) - return ShrinkActionProperties(nodeName, targetIndexName, targetNumShards, lockPrimaryTerm, lockSeqNo, lockEpochSecond, lockDurationSecond) + return ShrinkActionProperties( + nodeName, targetIndexName, targetNumShards, lockPrimaryTerm, lockSeqNo, lockEpochSecond, lockDurationSecond, originalIndexSettings + ) } fun parse(xcp: XContentParser): ShrinkActionProperties { @@ -68,6 +75,7 @@ data class ShrinkActionProperties( var lockSeqNo: Long? = null var lockEpochSecond: Long? = null var lockDurationSecond: Long? = null + var originalIndexSettings: Map? = null XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { @@ -82,6 +90,7 @@ data class ShrinkActionProperties( ShrinkProperties.LOCK_SEQ_NO.key -> lockSeqNo = xcp.longValue() ShrinkProperties.LOCK_EPOCH_SECOND.key -> lockEpochSecond = xcp.longValue() ShrinkProperties.LOCK_DURATION_SECOND.key -> lockDurationSecond = xcp.longValue() + ShrinkProperties.ORIGINAL_INDEX_SETTINGS.key -> originalIndexSettings = xcp.mapStrings() } } @@ -92,7 +101,8 @@ data class ShrinkActionProperties( requireNotNull(lockPrimaryTerm), requireNotNull(lockSeqNo), requireNotNull(lockEpochSecond), - requireNotNull(lockDurationSecond) + requireNotNull(lockDurationSecond), + requireNotNull(originalIndexSettings) ) } } @@ -104,6 +114,7 @@ data class ShrinkActionProperties( LOCK_SEQ_NO("lock_seq_no"), LOCK_PRIMARY_TERM("lock_primary_term"), LOCK_EPOCH_SECOND("lock_epoch_second"), - LOCK_DURATION_SECOND("lock_duration_second") + LOCK_DURATION_SECOND("lock_duration_second"), + ORIGINAL_INDEX_SETTINGS("original_index_settings") } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt index 03f8772c4..d1901f111 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -16,7 +16,7 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsRequest import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.client.Client -import org.opensearch.cluster.metadata.IndexMetadata +import org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_WRITE import org.opensearch.cluster.metadata.MetadataCreateIndexService.validateIndexOrAliasName import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand import org.opensearch.cluster.routing.allocation.decider.Decision @@ -109,6 +109,8 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { if (shouldFailTooManyDocuments(statsDocs, numTargetShards)) return this + val originalIndexSettings = getOriginalSettings(indexName, context.clusterService) + // get the nodes with enough memory in increasing order of free space val suitableNodes = findSuitableNodes(context, statsResponse, indexSize) @@ -130,7 +132,8 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { lock.primaryTerm, lock.seqNo, lock.lockTime.epochSecond, - lock.lockDurationSeconds + lock.lockDurationSeconds, + originalIndexSettings ) setToReadOnlyAndMoveIndexToNode(context, nodeName, lock) @@ -157,6 +160,15 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { shrinkActionProperties = null } + // Gets the routing and write block setting of the index and returns it in a map of setting name to setting + private fun getOriginalSettings(indexName: String, clusterService: ClusterService): Map { + val indexSettings = clusterService.state().metadata.index(indexName).settings + val originalSettings = mutableMapOf() + indexSettings.get(ROUTING_SETTING)?.let { it -> originalSettings.put(ROUTING_SETTING, it) } + indexSettings.get(SETTING_BLOCKS_WRITE)?.let { it -> originalSettings.put(SETTING_BLOCKS_WRITE, it) } + return originalSettings + } + private fun compileTemplate( template: Script?, managedIndexMetaData: ManagedIndexMetaData, @@ -231,7 +243,7 @@ class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { private suspend fun setToReadOnlyAndMoveIndexToNode(stepContext: StepContext, node: String, lock: LockModel): Boolean { val updateSettings = Settings.builder() - .put(IndexMetadata.SETTING_BLOCKS_WRITE, true) + .put(SETTING_BLOCKS_WRITE, true) .put(ROUTING_SETTING, node) .build() val lockService = stepContext.lockService diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt index 07a1bac9c..6ba6984bb 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -18,7 +18,7 @@ import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage import org.opensearch.indexmanagement.indexstatemanagement.util.INDEX_NUMBER_OF_SHARDS -import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.resetReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemoryAfterShrink import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock @@ -92,7 +92,7 @@ class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { stepStatus = StepStatus.FAILED // Non-null assertion !! is used to throw an exception on null which would just be caught and logged try { - clearReadOnlyAndRouting(context!!.metadata.index, context!!.client) + resetReadOnlyAndRouting(context!!.metadata.index, context!!.client, shrinkActionProperties!!.originalIndexSettings) } catch (e: Exception) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt index b68a66638..c4577cd08 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -13,7 +13,7 @@ import org.opensearch.action.admin.indices.stats.IndicesStatsResponse import org.opensearch.action.admin.indices.stats.ShardStats import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage -import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.resetReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock @@ -109,7 +109,7 @@ class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { stepStatus = StepStatus.FAILED // Non-null assertion !! is used to throw an exception on null which would just be caught and logged try { - clearReadOnlyAndRouting(context!!.metadata.index, context!!.client) + resetReadOnlyAndRouting(context!!.metadata.index, context!!.client, shrinkActionProperties!!.originalIndexSettings) } catch (e: Exception) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt index a68cd882b..d075ec45a 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -17,7 +17,7 @@ import org.opensearch.common.settings.Settings import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep.Companion.getTimeoutFailure -import org.opensearch.indexmanagement.indexstatemanagement.util.clearReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.resetReadOnlyAndRouting import org.opensearch.indexmanagement.indexstatemanagement.util.deleteShrinkLock import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest @@ -71,7 +71,7 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { // Clear source and target allocation, if either fails the step will be set to failed and the function will return false if (!clearAllocationSettings(context, targetIndex)) return this - if (!clearAllocationSettings(context, context.metadata.index)) return this + if (!resetReadOnlyAndRouting(context.metadata.index, context.client, localShrinkActionProperties.originalIndexSettings)) return this deleteShrinkLock(localShrinkActionProperties, context.lockService, logger) stepStatus = StepStatus.COMPLETED @@ -99,7 +99,7 @@ class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { // Using a try/catch for each cleanup action as we should clean up as much as possible despite any failures // Non-null assertion !! is used to throw an exception on null which would just be caught and logged try { - clearReadOnlyAndRouting(context!!.metadata.index, context!!.client) + resetReadOnlyAndRouting(context!!.metadata.index, context!!.client, shrinkActionProperties!!.originalIndexSettings) } catch (e: Exception) { logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt index 245b83f56..996b3f2a8 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -117,7 +117,8 @@ fun getUpdatedShrinkActionProperties(shrinkActionProperties: ShrinkActionPropert lock.primaryTerm, lock.seqNo, lock.lockTime.epochSecond, - lock.lockDurationSeconds + lock.lockDurationSeconds, + shrinkActionProperties.originalIndexSettings ) } @@ -177,8 +178,10 @@ suspend fun isIndexGreen( return !response.isTimedOut } -suspend fun clearReadOnlyAndRouting(index: String, client: Client): Boolean { - val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).putNull(IndexMetadata.SETTING_BLOCKS_WRITE).build() +suspend fun resetReadOnlyAndRouting(index: String, client: Client, originalSettings: Map): Boolean { + val allocationSettings = Settings.builder() + .put(AttemptMoveShardsStep.ROUTING_SETTING, originalSettings[AttemptMoveShardsStep.ROUTING_SETTING]) + .put(IndexMetadata.SETTING_BLOCKS_WRITE, originalSettings[IndexMetadata.SETTING_BLOCKS_WRITE]).build() val response: AcknowledgedResponse = issueUpdateSettingsRequest(client, index, allocationSettings) if (!response.isAcknowledged) { return false diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index c96247137..6af769bba 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -6,6 +6,7 @@ package org.opensearch.indexmanagement.indexstatemanagement.action import org.apache.logging.log4j.LogManager +import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.common.settings.Settings import org.opensearch.common.unit.ByteSizeValue import org.opensearch.indexmanagement.indexstatemanagement.IndexStateManagementRestTestCase @@ -57,6 +58,9 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { insertSampleData(indexName, 3) + // Set the index as readonly to check that the setting is preserved after the shrink finishes + updateIndexSetting(indexName, IndexMetadata.SETTING_BLOCKS_WRITE, "true") + // Will change the startTime each execution so that it triggers in 2 seconds // First execution: Policy is initialized val managedIndexConfig = getExistingManagedIndexConfig(indexName) @@ -111,9 +115,11 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { WaitForShrinkStep.SUCCESS_MESSAGE, getExplainManagedIndexMetaData(indexName).info?.get("message") ) + assertEquals("Write block setting was not reset after successful shrink", "true", getIndexBlocksWriteSetting(indexName)) } } + @Suppress("UNCHECKED_CAST") fun `test basic workflow max shard size`() { val logger = LogManager.getLogger(::ShrinkActionIT) val indexName = "${testIndexName}_index_1" @@ -197,9 +203,13 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { WaitForShrinkStep.SUCCESS_MESSAGE, getExplainManagedIndexMetaData(indexName).info?.get("message") ) + val indexSettings = getIndexSettings(indexName) as Map>> + val writeBlock = indexSettings[indexName]!!["settings"]!![IndexMetadata.SETTING_BLOCKS_WRITE] as String? + assertNull("Write block setting was not reset after successful shrink", writeBlock) } } + @Suppress("UNCHECKED_CAST") fun `test basic workflow percentage to decrease to`() { val indexName = "${testIndexName}_index_1" val policyID = "${testIndexName}_testPolicyName_1" @@ -282,6 +292,9 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { WaitForShrinkStep.SUCCESS_MESSAGE, getExplainManagedIndexMetaData(indexName).info?.get("message") ) + val indexSettings = getIndexSettings(indexName) as Map>> + val writeBlock = indexSettings[indexName]!!["settings"]!![IndexMetadata.SETTING_BLOCKS_WRITE] as String? + assertNull("Write block setting was not reset after successful shrink", writeBlock) } } @@ -381,6 +394,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { WaitForShrinkStep.SUCCESS_MESSAGE, getExplainManagedIndexMetaData(indexName).info?.get("message") ) + assertEquals("Write block setting was not reset after successful shrink", "false", getIndexBlocksWriteSetting(indexName)) } } } @@ -443,7 +457,8 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { } } - fun `test retries from first step`() { + // TODO This test is excessively flaky, disabling for now but it needs to be fixed + private fun `test retries from first step`() { val testPolicy = """ {"policy":{"description":"Default policy","default_state":"Shrink","states":[ {"name":"Shrink","actions":[{"retry":{"count":2,"backoff":"constant","delay":"1s"},"shrink": From b0730ce8f4b57b7fdfc3ae78725ceb19cf41b30d Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Thu, 14 Apr 2022 04:40:22 +0000 Subject: [PATCH 12/13] Fixes test Signed-off-by: Clay Downs --- .../indexstatemanagement/action/ShrinkActionIT.kt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index 6af769bba..1f563ed50 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -298,6 +298,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { } } + @Suppress("UNCHECKED_CAST") fun `test allocation block picks correct node`() { val logger = LogManager.getLogger(::ShrinkActionIT) val nodes = getNodes() @@ -394,7 +395,9 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { WaitForShrinkStep.SUCCESS_MESSAGE, getExplainManagedIndexMetaData(indexName).info?.get("message") ) - assertEquals("Write block setting was not reset after successful shrink", "false", getIndexBlocksWriteSetting(indexName)) + val indexSettings = getIndexSettings(indexName) as Map>> + val writeBlock = indexSettings[indexName]!!["settings"]!![IndexMetadata.SETTING_BLOCKS_WRITE] as String? + assertNull("Write block setting was not reset after successful shrink", writeBlock) } } } From eb1eb904b35dd5178e7e2391f96121388b06666b Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Fri, 15 Apr 2022 02:49:21 +0000 Subject: [PATCH 13/13] Fixes alias parsing logic Signed-off-by: Clay Downs --- .../action/ShrinkActionParser.kt | 12 +++++------- .../opensearchapi/OpenSearchExtensions.kt | 10 +++++++--- .../indexstatemanagement/action/ShrinkActionIT.kt | 10 ++++++++-- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt index d38c17496..d74e65bb6 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt @@ -54,14 +54,12 @@ class ShrinkActionParser : ActionParser() { TARGET_INDEX_TEMPLATE_FIELD -> targetIndexTemplate = Script.parse(xcp, Script.DEFAULT_TEMPLATE_LANG) ALIASES_FIELD -> { if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { + ensureExpectedToken(XContentParser.Token.START_ARRAY, xcp.currentToken(), xcp) aliases = mutableListOf() - when (xcp.currentToken()) { - XContentParser.Token.START_OBJECT -> { - while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { - aliases.add(Alias.fromXContent(xcp)) - } - } - else -> ensureExpectedToken(XContentParser.Token.START_ARRAY, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_ARRAY) { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, xcp.nextToken(), xcp) + aliases.add(Alias.fromXContent(xcp)) + ensureExpectedToken(XContentParser.Token.END_OBJECT, xcp.nextToken(), xcp) } } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt b/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt index 8d6c23a6e..4288c88cf 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt @@ -83,9 +83,13 @@ fun XContentParser.instant(): Instant? { } fun XContentBuilder.aliasesField(aliases: List): XContentBuilder { - val builder = this.startObject(ShrinkAction.ALIASES_FIELD) - aliases.forEach { it.toXContent(builder, ToXContent.EMPTY_PARAMS) } - return builder.endObject() + val builder = this.startArray(ShrinkAction.ALIASES_FIELD) + aliases.forEach { + builder.startObject() + it.toXContent(builder, ToXContent.EMPTY_PARAMS) + builder.endObject() + } + return builder.endArray() } fun XContentBuilder.optionalTimeField(name: String, instant: Instant?): XContentBuilder { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt index 1f563ed50..686a09265 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -6,9 +6,11 @@ package org.opensearch.indexmanagement.indexstatemanagement.action import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.indices.alias.Alias import org.opensearch.cluster.metadata.IndexMetadata import org.opensearch.common.settings.Settings import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.index.query.QueryBuilders import org.opensearch.indexmanagement.indexstatemanagement.IndexStateManagementRestTestCase import org.opensearch.indexmanagement.indexstatemanagement.model.Policy import org.opensearch.indexmanagement.indexstatemanagement.model.State @@ -37,7 +39,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { maxShardSize = null, percentageOfSourceShards = null, targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), - aliases = null, + aliases = listOf(Alias("test-alias1"), Alias("test-alias2").filter(QueryBuilders.termQuery("foo", "bar")).writeIndex(true)), forceUnsafe = true, index = 0 ) @@ -116,6 +118,8 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { getExplainManagedIndexMetaData(indexName).info?.get("message") ) assertEquals("Write block setting was not reset after successful shrink", "true", getIndexBlocksWriteSetting(indexName)) + val aliases = getAlias(targetIndexName, "") + assertTrue("Aliases were not added to shrunken index", aliases.containsKey("test-alias1") && aliases.containsKey("test-alias2")) } } @@ -130,7 +134,7 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { maxShardSize = testMaxShardSize, percentageOfSourceShards = null, targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), - aliases = null, + aliases = listOf(Alias("max-shard-alias")), forceUnsafe = true, index = 0 ) @@ -206,6 +210,8 @@ class ShrinkActionIT : IndexStateManagementRestTestCase() { val indexSettings = getIndexSettings(indexName) as Map>> val writeBlock = indexSettings[indexName]!!["settings"]!![IndexMetadata.SETTING_BLOCKS_WRITE] as String? assertNull("Write block setting was not reset after successful shrink", writeBlock) + val aliases = getAlias(targetIndexName, "") + assertTrue("Alias was not added to shrunken index", aliases.containsKey("max-shard-alias")) } }