From ab1e8214207b0def9921e88e464e7e8f7427cc1d Mon Sep 17 00:00:00 2001 From: Clay Downs Date: Fri, 15 Apr 2022 17:03:37 -0700 Subject: [PATCH] Adds shrink action to ISM (#326) * Updates shrink action to new interface Signed-off-by: Clay Downs --- spi/build.gradle | 1 + .../model/ActionProperties.kt | 23 +- .../model/ShrinkActionProperties.kt | 120 ++++ .../indexstatemanagement/model/StepContext.kt | 4 +- .../indexstatemanagement/ISMActionsParser.kt | 2 + .../ManagedIndexRunner.kt | 8 +- .../action/ShrinkAction.kt | 139 +++++ .../action/ShrinkActionParser.kt | 77 +++ .../step/shrink/AttemptMoveShardsStep.kt | 450 ++++++++++++++ .../step/shrink/AttemptShrinkStep.kt | 181 ++++++ .../step/shrink/WaitForMoveShardsStep.kt | 175 ++++++ .../step/shrink/WaitForShrinkStep.kt | 181 ++++++ .../util/ManagedIndexUtils.kt | 33 + .../indexstatemanagement/util/StepUtils.kt | 194 ++++++ .../opensearchapi/OpenSearchExtensions.kt | 12 + .../mappings/opendistro-ism-config.json | 30 +- .../mappings/opendistro-ism-history.json | 6 +- .../IndexManagementRestTestCase.kt | 4 +- .../indexstatemanagement/TestHelpers.kt | 33 + .../action/ShrinkActionIT.kt | 562 ++++++++++++++++++ .../indexstatemanagement/model/ActionTests.kt | 39 ++ .../model/XContentTests.kt | 9 + .../step/AttemptCloseStepTests.kt | 14 +- .../step/AttemptDeleteStepTests.kt | 10 +- .../step/AttemptOpenStepTests.kt | 8 +- .../step/AttemptSetIndexPriorityStepTests.kt | 10 +- .../step/AttemptSetReplicaCountStepTests.kt | 8 +- .../step/AttemptSnapshotStepTests.kt | 16 +- .../step/AttemptTransitionStepTests.kt | 8 +- .../step/SetReadOnlyStepTests.kt | 8 +- .../step/SetReadWriteStepTests.kt | 8 +- .../step/WaitForRollupCompletionStepTests.kt | 4 +- .../step/WaitForSnapshotStepTests.kt | 22 +- .../cached-opendistro-ism-config.json | 30 +- .../cached-opendistro-ism-history.json | 6 +- 35 files changed, 2371 insertions(+), 64 deletions(-) create mode 100644 spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt create mode 100644 src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt create mode 100644 src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt diff --git a/spi/build.gradle b/spi/build.gradle index a9806df4f..f8ba3f69b 100644 --- a/spi/build.gradle +++ b/spi/build.gradle @@ -53,6 +53,7 @@ configurations.all { dependencies { compileOnly "org.opensearch:opensearch:${opensearch_version}" + compileOnly "org.opensearch:opensearch-job-scheduler-spi:${job_scheduler_version}" compileOnly "org.jetbrains.kotlin:kotlin-stdlib:${kotlin_version}" compileOnly "org.jetbrains.kotlin:kotlin-stdlib-common:${kotlin_version}" compileOnly "org.jetbrains.kotlin:kotlin-stdlib-jdk8:${kotlin_version}" diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt index 175dc447d..19a050e49 100644 --- a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ActionProperties.kt @@ -12,7 +12,9 @@ import org.opensearch.common.xcontent.ToXContent import org.opensearch.common.xcontent.ToXContentFragment import org.opensearch.common.xcontent.XContentBuilder import org.opensearch.common.xcontent.XContentParser -import org.opensearch.common.xcontent.XContentParserUtils +import org.opensearch.common.xcontent.XContentParser.Token +import org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken +import org.opensearch.indexmanagement.spi.indexstatemanagement.addObject /** Properties that will persist across steps of a single Action. Will be stored in the [ActionMetaData]. */ // TODO: Create namespaces to group properties together @@ -20,7 +22,8 @@ data class ActionProperties( val maxNumSegments: Int? = null, val snapshotName: String? = null, val rollupId: String? = null, - val hasRollupFailed: Boolean? = null + val hasRollupFailed: Boolean? = null, + val shrinkActionProperties: ShrinkActionProperties? = null ) : Writeable, ToXContentFragment { override fun writeTo(out: StreamOutput) { @@ -28,6 +31,7 @@ data class ActionProperties( out.writeOptionalString(snapshotName) out.writeOptionalString(rollupId) out.writeOptionalBoolean(hasRollupFailed) + out.writeOptionalWriteable(shrinkActionProperties) } override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { @@ -35,6 +39,7 @@ data class ActionProperties( if (snapshotName != null) builder.field(Properties.SNAPSHOT_NAME.key, snapshotName) if (rollupId != null) builder.field(Properties.ROLLUP_ID.key, rollupId) if (hasRollupFailed != null) builder.field(Properties.HAS_ROLLUP_FAILED.key, hasRollupFailed) + if (shrinkActionProperties != null) builder.addObject(ShrinkActionProperties.SHRINK_ACTION_PROPERTIES, shrinkActionProperties, params) return builder } @@ -46,8 +51,8 @@ data class ActionProperties( val snapshotName: String? = si.readOptionalString() val rollupId: String? = si.readOptionalString() val hasRollupFailed: Boolean? = si.readOptionalBoolean() - - return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed) + val shrinkActionProperties: ShrinkActionProperties? = si.readOptionalWriteable { ShrinkActionProperties.fromStreamInput(it) } + return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed, shrinkActionProperties) } fun parse(xcp: XContentParser): ActionProperties { @@ -55,9 +60,10 @@ data class ActionProperties( var snapshotName: String? = null var rollupId: String? = null var hasRollupFailed: Boolean? = null + var shrinkActionProperties: ShrinkActionProperties? = null - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) - while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + ensureExpectedToken(Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != Token.END_OBJECT) { val fieldName = xcp.currentName() xcp.nextToken() @@ -66,10 +72,13 @@ data class ActionProperties( Properties.SNAPSHOT_NAME.key -> snapshotName = xcp.text() Properties.ROLLUP_ID.key -> rollupId = xcp.text() Properties.HAS_ROLLUP_FAILED.key -> hasRollupFailed = xcp.booleanValue() + ShrinkActionProperties.SHRINK_ACTION_PROPERTIES -> { + shrinkActionProperties = if (xcp.currentToken() == Token.VALUE_NULL) null else ShrinkActionProperties.parse(xcp) + } } } - return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed) + return ActionProperties(maxNumSegments, snapshotName, rollupId, hasRollupFailed, shrinkActionProperties) } } diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt new file mode 100644 index 000000000..8c96f7f6f --- /dev/null +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/ShrinkActionProperties.kt @@ -0,0 +1,120 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.spi.indexstatemanagement.model + +import org.opensearch.common.io.stream.StreamInput +import org.opensearch.common.io.stream.StreamOutput +import org.opensearch.common.io.stream.Writeable +import org.opensearch.common.xcontent.ToXContent +import org.opensearch.common.xcontent.ToXContentFragment +import org.opensearch.common.xcontent.XContentBuilder +import org.opensearch.common.xcontent.XContentParser +import org.opensearch.common.xcontent.XContentParserUtils + +data class ShrinkActionProperties( + val nodeName: String, + val targetIndexName: String, + val targetNumShards: Int, + val lockPrimaryTerm: Long, + val lockSeqNo: Long, + val lockEpochSecond: Long, + val lockDurationSecond: Long, + // Used to store the original index allocation and write block setting to reapply after shrink + val originalIndexSettings: Map +) : Writeable, ToXContentFragment { + + override fun writeTo(out: StreamOutput) { + out.writeString(nodeName) + out.writeString(targetIndexName) + out.writeInt(targetNumShards) + out.writeLong(lockPrimaryTerm) + out.writeLong(lockSeqNo) + out.writeLong(lockEpochSecond) + out.writeLong(lockDurationSecond) + out.writeMap(originalIndexSettings) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.field(ShrinkProperties.NODE_NAME.key, nodeName) + builder.field(ShrinkProperties.TARGET_INDEX_NAME.key, targetIndexName) + builder.field(ShrinkProperties.TARGET_NUM_SHARDS.key, targetNumShards) + builder.field(ShrinkProperties.LOCK_SEQ_NO.key, lockSeqNo) + builder.field(ShrinkProperties.LOCK_PRIMARY_TERM.key, lockPrimaryTerm) + builder.field(ShrinkProperties.LOCK_EPOCH_SECOND.key, lockEpochSecond) + builder.field(ShrinkProperties.LOCK_DURATION_SECOND.key, lockDurationSecond) + builder.field(ShrinkProperties.ORIGINAL_INDEX_SETTINGS.key, originalIndexSettings) + return builder + } + + companion object { + const val SHRINK_ACTION_PROPERTIES = "shrink_action_properties" + + fun fromStreamInput(si: StreamInput): ShrinkActionProperties { + val nodeName: String = si.readString() + val targetIndexName: String = si.readString() + val targetNumShards: Int = si.readInt() + val lockPrimaryTerm: Long = si.readLong() + val lockSeqNo: Long = si.readLong() + val lockEpochSecond: Long = si.readLong() + val lockDurationSecond: Long = si.readLong() + val originalIndexSettings: Map = si.readMap({ it.readString() }, { it.readString() }) + + return ShrinkActionProperties( + nodeName, targetIndexName, targetNumShards, lockPrimaryTerm, lockSeqNo, lockEpochSecond, lockDurationSecond, originalIndexSettings + ) + } + + fun parse(xcp: XContentParser): ShrinkActionProperties { + var nodeName: String? = null + var targetIndexName: String? = null + var targetNumShards: Int? = null + var lockPrimaryTerm: Long? = null + var lockSeqNo: Long? = null + var lockEpochSecond: Long? = null + var lockDurationSecond: Long? = null + var originalIndexSettings: Map? = null + + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + + when (fieldName) { + ShrinkProperties.NODE_NAME.key -> nodeName = xcp.text() + ShrinkProperties.TARGET_INDEX_NAME.key -> targetIndexName = xcp.text() + ShrinkProperties.TARGET_NUM_SHARDS.key -> targetNumShards = xcp.intValue() + ShrinkProperties.LOCK_PRIMARY_TERM.key -> lockPrimaryTerm = xcp.longValue() + ShrinkProperties.LOCK_SEQ_NO.key -> lockSeqNo = xcp.longValue() + ShrinkProperties.LOCK_EPOCH_SECOND.key -> lockEpochSecond = xcp.longValue() + ShrinkProperties.LOCK_DURATION_SECOND.key -> lockDurationSecond = xcp.longValue() + ShrinkProperties.ORIGINAL_INDEX_SETTINGS.key -> originalIndexSettings = xcp.mapStrings() + } + } + + return ShrinkActionProperties( + requireNotNull(nodeName), + requireNotNull(targetIndexName), + requireNotNull(targetNumShards), + requireNotNull(lockPrimaryTerm), + requireNotNull(lockSeqNo), + requireNotNull(lockEpochSecond), + requireNotNull(lockDurationSecond), + requireNotNull(originalIndexSettings) + ) + } + } + + enum class ShrinkProperties(val key: String) { + NODE_NAME("node_name"), + TARGET_INDEX_NAME("target_index_name"), + TARGET_NUM_SHARDS("target_num_shards"), + LOCK_SEQ_NO("lock_seq_no"), + LOCK_PRIMARY_TERM("lock_primary_term"), + LOCK_EPOCH_SECOND("lock_epoch_second"), + LOCK_DURATION_SECOND("lock_duration_second"), + ORIGINAL_INDEX_SETTINGS("original_index_settings") + } +} diff --git a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt index 6773d08f4..6ee4ff4f9 100644 --- a/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt +++ b/spi/src/main/kotlin/org.opensearch.indexmanagement.spi/indexstatemanagement/model/StepContext.kt @@ -10,6 +10,7 @@ import org.opensearch.cluster.service.ClusterService import org.opensearch.common.settings.Settings import org.opensearch.common.util.concurrent.ThreadContext import org.opensearch.commons.authuser.User +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService class StepContext( @@ -20,8 +21,9 @@ class StepContext( val user: User?, val scriptService: ScriptService, val settings: Settings, + val lockService: LockService ) { fun getUpdatedContext(metadata: ManagedIndexMetaData): StepContext { - return StepContext(metadata, this.clusterService, this.client, this.threadContext, this.user, this.scriptService, this.settings) + return StepContext(metadata, this.clusterService, this.client, this.threadContext, this.user, this.scriptService, this.settings, this.lockService) } } diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt index 9b0160658..7783250e5 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ISMActionsParser.kt @@ -20,6 +20,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.action.ReadWriteActio import org.opensearch.indexmanagement.indexstatemanagement.action.ReplicaCountActionParser import org.opensearch.indexmanagement.indexstatemanagement.action.RolloverActionParser import org.opensearch.indexmanagement.indexstatemanagement.action.RollupActionParser +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkActionParser import org.opensearch.indexmanagement.indexstatemanagement.action.SnapshotActionParser import org.opensearch.indexmanagement.spi.indexstatemanagement.Action import org.opensearch.indexmanagement.spi.indexstatemanagement.ActionParser @@ -45,6 +46,7 @@ class ISMActionsParser private constructor() { ReplicaCountActionParser(), RollupActionParser(), RolloverActionParser(), + ShrinkActionParser(), SnapshotActionParser() ) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt index c4cdce159..9074db28a 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/ManagedIndexRunner.kt @@ -220,7 +220,7 @@ object ManagedIndexRunner : if (lock == null) { logger.debug("Could not acquire lock [${lock?.lockId}] for ${job.index}") } else { - runManagedIndexConfig(job) + runManagedIndexConfig(job, context) // Release lock val released: Boolean = context.lockService.suspendUntil { release(lock, it) } if (!released) { @@ -231,7 +231,7 @@ object ManagedIndexRunner : } @Suppress("ReturnCount", "ComplexMethod", "LongMethod", "ComplexCondition", "NestedBlockDepth") - private suspend fun runManagedIndexConfig(managedIndexConfig: ManagedIndexConfig) { + private suspend fun runManagedIndexConfig(managedIndexConfig: ManagedIndexConfig, jobContext: JobExecutionContext) { logger.debug("Run job for index ${managedIndexConfig.index}") // doing a check of local cluster health as we do not want to overload master node with potentially a lot of calls if (clusterIsRed()) { @@ -304,7 +304,9 @@ object ManagedIndexRunner : val state = policy.getStateToExecute(managedIndexMetaData) val action: Action? = state?.getActionToExecute(managedIndexMetaData, indexMetadataProvider) - val stepContext = StepContext(managedIndexMetaData, clusterService, client, threadPool.threadContext, policy.user, scriptService, settings) + val stepContext = StepContext( + managedIndexMetaData, clusterService, client, threadPool.threadContext, policy.user, scriptService, settings, jobContext.lockService + ) val step: Step? = action?.getStepToExecute(stepContext) val currentActionMetaData = action?.getUpdatedActionMetadata(managedIndexMetaData, state.name) diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt new file mode 100644 index 000000000..da8d379f1 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkAction.kt @@ -0,0 +1,139 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.action + +import org.opensearch.action.admin.indices.alias.Alias +import org.opensearch.common.io.stream.StreamOutput +import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.common.xcontent.ToXContent +import org.opensearch.common.xcontent.XContentBuilder +import org.opensearch.indexmanagement.indexstatemanagement.action.NotificationAction.Companion.MUSTACHE +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptShrinkStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForShrinkStep +import org.opensearch.indexmanagement.opensearchapi.aliasesField +import org.opensearch.indexmanagement.spi.indexstatemanagement.Action +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.script.Script + +@Suppress("LongParameterList") +class ShrinkAction( + val numNewShards: Int?, + val maxShardSize: ByteSizeValue?, + val percentageOfSourceShards: Double?, + val targetIndexTemplate: Script?, + val aliases: List?, + val forceUnsafe: Boolean?, + index: Int +) : Action(name, index) { + init { + val numSet = arrayOf(maxShardSize != null, percentageOfSourceShards != null, numNewShards != null).count { it } + require(numSet == 1) { "Exactly one option specifying the number of shards to shrink to must be used." } + + if (maxShardSize != null) { + require(maxShardSize.bytes > 0) { "Shrink action maxShardSize must be greater than 0." } + } + if (percentageOfSourceShards != null) { + require(percentageOfSourceShards > 0.0 && percentageOfSourceShards < 1.0) { + "Percentage of source shards must be between 0.0 and 1.0 exclusively" + } + } + if (numNewShards != null) { + require(numNewShards > 0) { "Shrink action numNewShards must be greater than 0." } + } + if (targetIndexTemplate != null) { + require(targetIndexTemplate.lang == MUSTACHE) { "Target index name template must be a mustache script" } + } + } + + private val attemptMoveShardsStep = AttemptMoveShardsStep(this) + private val waitForMoveShardsStep = WaitForMoveShardsStep(this) + private val attemptShrinkStep = AttemptShrinkStep(this) + private val waitForShrinkStep = WaitForShrinkStep(this) + + private val stepNameToStep: LinkedHashMap = linkedMapOf( + AttemptMoveShardsStep.name to attemptMoveShardsStep, + WaitForMoveShardsStep.name to waitForMoveShardsStep, + AttemptShrinkStep.name to attemptShrinkStep, + WaitForShrinkStep.name to waitForShrinkStep + ) + override fun getSteps(): List = listOf(attemptMoveShardsStep, waitForMoveShardsStep, attemptShrinkStep, waitForShrinkStep) + + @SuppressWarnings("ReturnCount") + override fun getStepToExecute(context: StepContext): Step { + val stepMetaData = context.metadata.stepMetaData ?: return attemptMoveShardsStep + val currentStep = stepMetaData.name + + // If the current step is not from this action, assume it is from another action. + if (!stepNameToStep.containsKey(currentStep)) return attemptMoveShardsStep + + val currentStepStatus = stepMetaData.stepStatus + if (currentStepStatus == Step.StepStatus.COMPLETED) { + return when (currentStep) { + AttemptMoveShardsStep.name -> waitForMoveShardsStep + WaitForMoveShardsStep.name -> attemptShrinkStep + AttemptShrinkStep.name -> waitForShrinkStep + // We do not expect to ever hit this point, but if we do somehow, starting over is safe. + else -> attemptMoveShardsStep + } + } else if (currentStepStatus == Step.StepStatus.FAILED) { + // If we failed at any point, retries should start from the beginning + return attemptMoveShardsStep + } + + // step not completed, return the same step + return when (stepMetaData.name) { + AttemptMoveShardsStep.name -> attemptMoveShardsStep + WaitForMoveShardsStep.name -> waitForMoveShardsStep + AttemptShrinkStep.name -> attemptShrinkStep + WaitForShrinkStep.name -> waitForShrinkStep + // Again, we don't expect to ever hit this point + else -> attemptMoveShardsStep + } + } + + override fun populateAction(builder: XContentBuilder, params: ToXContent.Params) { + builder.startObject(type) + if (numNewShards != null) builder.field(NUM_NEW_SHARDS_FIELD, numNewShards) + if (maxShardSize != null) builder.field(MAX_SHARD_SIZE_FIELD, maxShardSize.stringRep) + if (percentageOfSourceShards != null) builder.field(PERCENTAGE_OF_SOURCE_SHARDS_FIELD, percentageOfSourceShards) + if (targetIndexTemplate != null) builder.field(TARGET_INDEX_TEMPLATE_FIELD, targetIndexTemplate) + if (aliases != null) { builder.aliasesField(aliases) } + if (forceUnsafe != null) builder.field(FORCE_UNSAFE_FIELD, forceUnsafe) + builder.endObject() + } + + override fun populateAction(out: StreamOutput) { + out.writeOptionalInt(numNewShards) + out.writeOptionalWriteable(maxShardSize) + out.writeOptionalDouble(percentageOfSourceShards) + out.writeBoolean(targetIndexTemplate != null) + targetIndexTemplate?.writeTo(out) + if (aliases != null) { + out.writeBoolean(true) + out.writeList(aliases) + } else { + out.writeBoolean(false) + } + out.writeOptionalBoolean(forceUnsafe) + out.writeInt(actionIndex) + } + + companion object { + const val name = "shrink" + const val NUM_NEW_SHARDS_FIELD = "num_new_shards" + const val PERCENTAGE_OF_SOURCE_SHARDS_FIELD = "percentage_of_source_shards" + const val MAX_SHARD_SIZE_FIELD = "max_shard_size" + const val TARGET_INDEX_TEMPLATE_FIELD = "target_index_name_template" + const val ALIASES_FIELD = "aliases" + const val FORCE_UNSAFE_FIELD = "force_unsafe" + const val LOCK_RESOURCE_TYPE = "shrink" + const val LOCK_RESOURCE_NAME = "node_name" + fun getSecurityFailureMessage(failure: String) = "Shrink action failed because of missing permissions: $failure" + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt new file mode 100644 index 000000000..d74e65bb6 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionParser.kt @@ -0,0 +1,77 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.action + +import org.opensearch.action.admin.indices.alias.Alias +import org.opensearch.common.io.stream.StreamInput +import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.common.xcontent.XContentParser +import org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.ALIASES_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.FORCE_UNSAFE_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.MAX_SHARD_SIZE_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.NUM_NEW_SHARDS_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.PERCENTAGE_OF_SOURCE_SHARDS_FIELD +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.TARGET_INDEX_TEMPLATE_FIELD +import org.opensearch.indexmanagement.spi.indexstatemanagement.Action +import org.opensearch.indexmanagement.spi.indexstatemanagement.ActionParser +import org.opensearch.script.Script + +class ShrinkActionParser : ActionParser() { + override fun fromStreamInput(sin: StreamInput): Action { + val numNewShards = sin.readOptionalInt() + val maxShardSize = sin.readOptionalWriteable(::ByteSizeValue) + val percentageOfSourceShards = sin.readOptionalDouble() + val targetIndexTemplate = if (sin.readBoolean()) Script(sin) else null + val aliases = if (sin.readBoolean()) sin.readList(::Alias) else null + val forceUnsafe = sin.readOptionalBoolean() + val index = sin.readInt() + + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexTemplate, aliases, forceUnsafe, index) + } + + @Suppress("NestedBlockDepth") + override fun fromXContent(xcp: XContentParser, index: Int): Action { + var numNewShards: Int? = null + var maxShardSize: ByteSizeValue? = null + var percentageOfSourceShards: Double? = null + var targetIndexTemplate: Script? = null + var aliases: List? = null + var forceUnsafe: Boolean? = null + + ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + + when (fieldName) { + NUM_NEW_SHARDS_FIELD -> numNewShards = xcp.intValue() + MAX_SHARD_SIZE_FIELD -> maxShardSize = ByteSizeValue.parseBytesSizeValue(xcp.text(), MAX_SHARD_SIZE_FIELD) + PERCENTAGE_OF_SOURCE_SHARDS_FIELD -> percentageOfSourceShards = xcp.doubleValue() + TARGET_INDEX_TEMPLATE_FIELD -> targetIndexTemplate = Script.parse(xcp, Script.DEFAULT_TEMPLATE_LANG) + ALIASES_FIELD -> { + if (xcp.currentToken() != XContentParser.Token.VALUE_NULL) { + ensureExpectedToken(XContentParser.Token.START_ARRAY, xcp.currentToken(), xcp) + aliases = mutableListOf() + while (xcp.nextToken() != XContentParser.Token.END_ARRAY) { + ensureExpectedToken(XContentParser.Token.FIELD_NAME, xcp.nextToken(), xcp) + aliases.add(Alias.fromXContent(xcp)) + ensureExpectedToken(XContentParser.Token.END_OBJECT, xcp.nextToken(), xcp) + } + } + } + FORCE_UNSAFE_FIELD -> forceUnsafe = xcp.booleanValue() + else -> throw IllegalArgumentException("Invalid field: [$fieldName] found in ShrinkAction.") + } + } + + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexTemplate, aliases, forceUnsafe, index) + } + + override fun getActionType(): String { + return ShrinkAction.name + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt new file mode 100644 index 000000000..d1901f111 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptMoveShardsStep.kt @@ -0,0 +1,450 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchSecurityException +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse +import org.opensearch.action.admin.cluster.reroute.ClusterRerouteRequest +import org.opensearch.action.admin.cluster.reroute.ClusterRerouteResponse +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse +import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.client.Client +import org.opensearch.cluster.metadata.IndexMetadata.SETTING_BLOCKS_WRITE +import org.opensearch.cluster.metadata.MetadataCreateIndexService.validateIndexOrAliasName +import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand +import org.opensearch.cluster.routing.allocation.decider.Decision +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.collect.Tuple +import org.opensearch.common.settings.Settings +import org.opensearch.index.shard.DocsStats +import org.opensearch.indexmanagement.IndexManagementPlugin.Companion.INDEX_MANAGEMENT_INDEX +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage +import org.opensearch.indexmanagement.indexstatemanagement.model.ManagedIndexConfig +import org.opensearch.indexmanagement.indexstatemanagement.util.getIntervalFromManagedIndexConfig +import org.opensearch.indexmanagement.indexstatemanagement.util.getManagedIndexConfig +import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemoryAfterShrink +import org.opensearch.indexmanagement.indexstatemanagement.util.getShrinkLockID +import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen +import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest +import org.opensearch.indexmanagement.opensearchapi.convertToMap +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.indices.InvalidIndexNameException +import org.opensearch.jobscheduler.repackage.com.cronutils.utils.VisibleForTesting +import org.opensearch.jobscheduler.spi.LockModel +import org.opensearch.jobscheduler.spi.utils.LockService +import org.opensearch.script.Script +import org.opensearch.script.ScriptService +import org.opensearch.script.TemplateScript +import org.opensearch.transport.RemoteTransportException +import java.lang.RuntimeException +import java.util.PriorityQueue +import kotlin.math.ceil +import kotlin.math.floor +import kotlin.math.min +import kotlin.math.sqrt + +@SuppressWarnings("TooManyFunctions") +class AttemptMoveShardsStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "LongMethod") + override suspend fun execute(): Step { + val context = this.context ?: return this + val client = context.client + val indexName = context.metadata.index + + try { + val shrinkTargetIndexName = + compileTemplate(action.targetIndexTemplate, context.metadata, indexName + DEFAULT_TARGET_SUFFIX, context.scriptService) + + if (targetIndexNameIsInvalid(context.clusterService, shrinkTargetIndexName)) return this + + if (!isIndexGreen(client, indexName)) { + info = mapOf("message" to INDEX_NOT_GREEN_MESSAGE) + stepStatus = StepStatus.CONDITION_NOT_MET + return this + } + + if (shouldFailUnsafe(context.clusterService, indexName)) return this + + // If there is only one primary shard we complete the step and in getUpdatedManagedIndexMetadata will start a no-op + val numOriginalShards = context.clusterService.state().metadata.indices[indexName].numberOfShards + if (numOriginalShards == 1) { + info = mapOf("message" to ONE_PRIMARY_SHARD_MESSAGE) + stepStatus = StepStatus.COMPLETED + return this + } + + // Get the size of the index + val statsRequest = IndicesStatsRequest().indices(indexName) + val statsResponse: IndicesStatsResponse = client.admin().indices().suspendUntil { + stats(statsRequest, it) + } + val statsStore = statsResponse.total.store + val statsDocs = statsResponse.total.docs + if (statsStore == null || statsDocs == null) { + logger.error("Failed to move shards in shrink action as IndicesStatsResponse was missing store or doc stats.") + fail(FAILURE_MESSAGE) + return this + } + val indexSize = statsStore.sizeInBytes + val numTargetShards = getNumTargetShards(numOriginalShards, indexSize) + + if (shouldFailTooManyDocuments(statsDocs, numTargetShards)) return this + + val originalIndexSettings = getOriginalSettings(indexName, context.clusterService) + + // get the nodes with enough memory in increasing order of free space + val suitableNodes = findSuitableNodes(context, statsResponse, indexSize) + + // Get the job interval to use in determining the lock length + val interval = getJobIntervalSeconds(context.metadata.indexUuid, client) + // iterate through the nodes and try to acquire a lock on one + val lockToNodeName: Pair? = acquireLockFromNodeList(context.lockService, suitableNodes, interval) + if (lockToNodeName == null) { + logger.info("$indexName could not find available node to shrink onto.") + info = mapOf("message" to NO_AVAILABLE_NODES_MESSAGE) + stepStatus = StepStatus.CONDITION_NOT_MET + return this + } + val (lock, nodeName) = lockToNodeName + shrinkActionProperties = ShrinkActionProperties( + nodeName, + shrinkTargetIndexName, + numTargetShards, + lock.primaryTerm, + lock.seqNo, + lock.lockTime.epochSecond, + lock.lockDurationSeconds, + originalIndexSettings + ) + + setToReadOnlyAndMoveIndexToNode(context, nodeName, lock) + info = mapOf("message" to getSuccessMessage(nodeName)) + stepStatus = StepStatus.COMPLETED + return this + } catch (e: OpenSearchSecurityException) { + fail(getSecurityFailureMessage(e.localizedMessage), e.message, e) + return this + } catch (e: RemoteTransportException) { + val unwrappedException = ExceptionsHelper.unwrapCause(e) + fail(FAILURE_MESSAGE, cause = e.message, e = unwrappedException as java.lang.Exception) + return this + } catch (e: Exception) { + fail(FAILURE_MESSAGE, e.message, e) + return this + } + } + + private fun fail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + shrinkActionProperties = null + } + + // Gets the routing and write block setting of the index and returns it in a map of setting name to setting + private fun getOriginalSettings(indexName: String, clusterService: ClusterService): Map { + val indexSettings = clusterService.state().metadata.index(indexName).settings + val originalSettings = mutableMapOf() + indexSettings.get(ROUTING_SETTING)?.let { it -> originalSettings.put(ROUTING_SETTING, it) } + indexSettings.get(SETTING_BLOCKS_WRITE)?.let { it -> originalSettings.put(SETTING_BLOCKS_WRITE, it) } + return originalSettings + } + + private fun compileTemplate( + template: Script?, + managedIndexMetaData: ManagedIndexMetaData, + defaultValue: String, + scriptService: ScriptService + ): String { + if (template == null) return defaultValue + val contextMap = managedIndexMetaData.convertToMap().filterKeys { key -> + key in ALLOWED_TEMPLATE_FIELDS + } + val compiledValue = scriptService.compile(template, TemplateScript.CONTEXT) + .newInstance(template.params + mapOf("ctx" to contextMap)) + .execute() + return compiledValue.ifBlank { defaultValue } + } + + private suspend fun getJobIntervalSeconds(indexUuid: String, client: Client): Long? { + val managedIndexConfig: ManagedIndexConfig? + try { + managedIndexConfig = getManagedIndexConfig(indexUuid, client) + } catch (e: Exception) { + // If we fail to get the managedIndexConfig, just return null and a default lock duration of 12 hours will be used later + return null + } + // Divide the interval by 1000 to convert from ms to seconds + return managedIndexConfig?.let { getIntervalFromManagedIndexConfig(it) / MILLISECONDS_IN_SECOND } + } + + private fun shouldFailTooManyDocuments(docsStats: DocsStats, numTargetShards: Int): Boolean { + val totalDocs: Long = docsStats.count + val docsPerTargetShard: Long = totalDocs / numTargetShards + if (docsPerTargetShard > MAXIMUM_DOCS_PER_SHARD) { + logger.error(TOO_MANY_DOCS_FAILURE_MESSAGE) + fail(TOO_MANY_DOCS_FAILURE_MESSAGE) + return true + } + return false + } + + /* + * Returns whether the action should fail due to being unsafe. The action is unsafe if there are no replicas. If forceUnsafe + * is set, then this always returns false. + */ + @Suppress("ReturnCount") + private fun shouldFailUnsafe(clusterService: ClusterService, indexName: String): Boolean { + // If forceUnsafe is set and is true, then we don't even need to check the number of replicas + if (action.forceUnsafe == true) return false + val numReplicas = clusterService.state().metadata.indices[indexName].numberOfReplicas + val shouldFailForceUnsafeCheck = numReplicas == 0 + if (shouldFailForceUnsafeCheck) { + logger.info(UNSAFE_FAILURE_MESSAGE) + fail(UNSAFE_FAILURE_MESSAGE) + return true + } + return false + } + + private fun targetIndexNameIsInvalid(clusterService: ClusterService, shrinkTargetIndexName: String): Boolean { + val indexExists = clusterService.state().metadata.indices.containsKey(shrinkTargetIndexName) + if (indexExists) { + val indexExistsMessage = getIndexExistsMessage(shrinkTargetIndexName) + logger.error(indexExistsMessage) + fail(indexExistsMessage) + return true + } + val exceptionGenerator: (String, String) -> RuntimeException = { index_name, reason -> InvalidIndexNameException(index_name, reason) } + // If the index name is invalid for any reason, this will throw an exception giving the reason why in the message. + // That will be displayed to the user as the cause. + validateIndexOrAliasName(shrinkTargetIndexName, exceptionGenerator) + return false + } + + private suspend fun setToReadOnlyAndMoveIndexToNode(stepContext: StepContext, node: String, lock: LockModel): Boolean { + val updateSettings = Settings.builder() + .put(SETTING_BLOCKS_WRITE, true) + .put(ROUTING_SETTING, node) + .build() + val lockService = stepContext.lockService + var response: AcknowledgedResponse? = null + val isUpdateAcknowledged: Boolean + try { + response = issueUpdateSettingsRequest(stepContext.client, stepContext.metadata.index, updateSettings) + } finally { + isUpdateAcknowledged = response != null && response.isAcknowledged + if (!isUpdateAcknowledged) { + fail(UPDATE_FAILED_MESSAGE) + val released: Boolean = lockService.suspendUntil { release(lock, it) } + if (!released) { + logger.error("Failed to release Shrink action lock on node [$node]") + } + } + } + return isUpdateAcknowledged + } + + /* + * Iterates through each suitable node in order, attempting to acquire a resource lock. Returns the first lock which + * is successfully acquired and the name of the node it acquired the lock on in a pair. + */ + private suspend fun acquireLockFromNodeList( + lockService: LockService, + suitableNodes: List, + jobIntervalSeconds: Long? + ): Pair? { + for (nodeName in suitableNodes) { + val lockID = getShrinkLockID(nodeName) + val lock: LockModel? = lockService.suspendUntil { + acquireLockWithId(INDEX_MANAGEMENT_INDEX, getShrinkLockDuration(jobIntervalSeconds), lockID, it) + } + if (lock != null) { + return lock to nodeName + } + } + return null + } + + /* + * Returns the list of node names for nodes with enough space to shrink to, in increasing order of space available + */ + @VisibleForTesting + @SuppressWarnings("NestedBlockDepth", "ComplexMethod") + private suspend fun findSuitableNodes( + stepContext: StepContext, + indicesStatsResponse: IndicesStatsResponse, + indexSizeInBytes: Long + ): List { + val nodesStatsReq = NodesStatsRequest().addMetric(OS_METRIC) + val nodeStatsResponse: NodesStatsResponse = stepContext.client.admin().cluster().suspendUntil { nodesStats(nodesStatsReq, it) } + val nodesList = nodeStatsResponse.nodes.filter { it.node.isDataNode } + // Sort in increasing order of keys, in our case this is memory remaining + val comparator = kotlin.Comparator { o1: Tuple, o2: Tuple -> o1.v1().compareTo(o2.v1()) } + val nodesWithSpace = PriorityQueue(comparator) + for (node in nodesList) { + // Gets the amount of memory in the node which will be free below the high watermark level after adding 2*indexSizeInBytes, + // as the source index is duplicated during the shrink + val remainingMem = getNodeFreeMemoryAfterShrink(node, indexSizeInBytes, stepContext.settings, stepContext.clusterService.clusterSettings) + if (remainingMem > 0L) { + nodesWithSpace.add(Tuple(remainingMem, node.node.name)) + } + } + val suitableNodes: ArrayList = ArrayList() + // For each node, do a dry run of moving all shards to the node to make sure that there aren't any other blockers + // to the allocation. + for (sizeNodeTuple in nodesWithSpace) { + val targetNodeName = sizeNodeTuple.v2() + val indexName = stepContext.metadata.index + val clusterRerouteRequest = ClusterRerouteRequest().explain(true).dryRun(true) + var numberOfRerouteRequests = 0 + for (shard in indicesStatsResponse.shards) { + val shardId = shard.shardRouting.shardId() + val currentShardNode = stepContext.clusterService.state().nodes[shard.shardRouting.currentNodeId()] + // Don't attempt a dry run for shards which are already on that node + if (currentShardNode.name == targetNodeName) continue + clusterRerouteRequest.add(MoveAllocationCommand(indexName, shardId.id, currentShardNode.name, targetNodeName)) + numberOfRerouteRequests++ + } + val clusterRerouteResponse: ClusterRerouteResponse = + stepContext.client.admin().cluster().suspendUntil { reroute(clusterRerouteRequest, it) } + val numYesDecisions = clusterRerouteResponse.explanations.explanations().count { it.decisions().type().equals((Decision.Type.YES)) } + // Should be the same number of yes decisions as the number of primary shards + if (numYesDecisions == numberOfRerouteRequests) { + suitableNodes.add(sizeNodeTuple.v2()) + } + } + return suitableNodes + } + + @SuppressWarnings("ReturnCount") + private fun getNumTargetShards(numOriginalShards: Int, indexSize: Long): Int { + // case where user specifies a certain number of shards in the target index + if (action.numNewShards != null) return getGreatestFactorLessThan(numOriginalShards, action.numNewShards) + + // case where user specifies a percentage of source shards to shrink to in the number of shards in the target index + if (action.percentageOfSourceShards != null) { + val numTargetShards = floor((action.percentageOfSourceShards) * numOriginalShards).toInt() + return getGreatestFactorLessThan(numOriginalShards, numTargetShards) + } + // case where the user specifies a max shard size in the target index + if (action.maxShardSize != null) { + val maxShardSizeInBytes = action.maxShardSize.bytes + // ceiling ensures that numTargetShards is never less than 1 + val minNumTargetShards = ceil(indexSize / maxShardSizeInBytes.toDouble()).toInt() + // In order to not violate the max shard size condition, this value must be >= minNumTargetShards. + // If that value doesn't exist, numOriginalShards will be returned + return getMinFactorGreaterThan(numOriginalShards, minNumTargetShards) + } + // Shrink action validation requires that at least one of the above will not be null, but return numOriginalShards for completion + return numOriginalShards + } + + /* + * Returns the greatest number which is <= k and is a factor of n. In the context of the shrink action, + * n is the original number of shards, k is the attempted number of shards to shrink to. If k is 0, 1 is returned. + */ + @SuppressWarnings("ReturnCount") + private fun getGreatestFactorLessThan(n: Int, k: Int): Int { + if (k >= n) return n + // The bound is set to the floor of the square root of n, or just k, whichever is lower + val bound: Int = min(floor(sqrt(n.toDouble())).toInt(), k) + var greatestFactor = 1 + for (i in 2..bound) { + if (n % i == 0) { + val complement: Int = n / i + if (complement <= k) { + return complement + } else { + greatestFactor = i + } + } + } + return greatestFactor + } + + /* + * Returns the smallest number which is >= k and is a factor of n. In the context of the shrink action, + * n is the original number of shards, k is the attempted number of shards to shrink to in a case + */ + @SuppressWarnings("ReturnCount") + private fun getMinFactorGreaterThan(n: Int, k: Int): Int { + if (k >= n) { + return n + } + for (i in k..n) { + if (n % i == 0) return i + } + return n + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + val currentActionMetaData = currentMetadata.actionMetaData + // If we succeeded because there was only one source primary shard, we no-op by skipping to the last step + val stepMetaData = if (info?.get("message") == ONE_PRIMARY_SHARD_MESSAGE) { + StepMetaData(WaitForShrinkStep.name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus) + } else { + StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus) + } + return currentMetadata.copy( + actionMetaData = currentActionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), + stepMetaData = stepMetaData, + transitionTo = null, + info = info + ) + } + + override fun isIdempotent() = true + + companion object { + const val OS_METRIC = "os" + const val ROUTING_SETTING = "index.routing.allocation.require._name" + const val DEFAULT_TARGET_SUFFIX = "_shrunken" + const val name = "attempt_move_shards_step" + const val UPDATE_FAILED_MESSAGE = "Shrink failed because shard settings could not be updated." + const val NO_AVAILABLE_NODES_MESSAGE = + "There are no available nodes to move to to execute a shrink. Delaying until node becomes available." + const val UNSAFE_FAILURE_MESSAGE = "Shrink failed because index has no replicas and force_unsafe is not set to true." + const val ONE_PRIMARY_SHARD_MESSAGE = "Shrink action did not do anything because source index only has one primary shard." + const val TOO_MANY_DOCS_FAILURE_MESSAGE = "Shrink failed because there would be too many documents on each target shard following the shrink." + const val INDEX_NOT_GREEN_MESSAGE = "Shrink action cannot start moving shards as the index is not green." + const val FAILURE_MESSAGE = "Shrink failed to start moving shards." + private const val DEFAULT_LOCK_INTERVAL = 3L * 60L * 60L // Default lock interval is 3 hours in seconds + private const val MILLISECONDS_IN_SECOND = 1000L + const val THIRTY_SECONDS_IN_MILLIS = 30L * MILLISECONDS_IN_SECOND + private const val JOB_INTERVAL_LOCK_MULTIPLIER = 3 + private const val LOCK_BUFFER_SECONDS = 1800 + private const val MAXIMUM_DOCS_PER_SHARD = 0x80000000 // The maximum number of documents per shard is 2^31 + fun getSuccessMessage(node: String) = "Successfully started moving the shards to $node." + fun getIndexExistsMessage(newIndex: String) = "Shrink failed because $newIndex already exists." + // If we couldn't get the job interval for the lock, use the default of 12 hours. + // Lock is 3x + 30 minutes the job interval to allow the next step's execution to extend the lock without losing it. + // If user sets maximum jitter, it could be 2x the job interval before the next step is executed. + private fun getShrinkLockDuration(jobInterval: Long?) = jobInterval?.let { (it * JOB_INTERVAL_LOCK_MULTIPLIER) + LOCK_BUFFER_SECONDS } + ?: DEFAULT_LOCK_INTERVAL + private val ALLOWED_TEMPLATE_FIELDS = setOf("index", "indexUuid") + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt new file mode 100644 index 000000000..6ba6984bb --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/AttemptShrinkStep.kt @@ -0,0 +1,181 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchSecurityException +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse +import org.opensearch.action.admin.indices.shrink.ResizeRequest +import org.opensearch.action.admin.indices.shrink.ResizeResponse +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse +import org.opensearch.common.settings.Settings +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage +import org.opensearch.indexmanagement.indexstatemanagement.util.INDEX_NUMBER_OF_SHARDS +import org.opensearch.indexmanagement.indexstatemanagement.util.resetReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.getNodeFreeMemoryAfterShrink +import org.opensearch.indexmanagement.indexstatemanagement.util.isIndexGreen +import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.getUpdatedShrinkActionProperties +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.transport.RemoteTransportException +import java.lang.Exception + +class AttemptShrinkStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount") + override suspend fun execute(): AttemptShrinkStep { + val context = this.context ?: return this + val indexName = context.metadata.index + val actionMetadata = context.metadata.actionMetaData + val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + shrinkActionProperties = localShrinkActionProperties + if (localShrinkActionProperties == null) { + logger.error(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) + cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) + return this + } + val lock = renewShrinkLock(localShrinkActionProperties, context.lockService, logger) + if (lock == null) { + logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") + cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") + return this + } + shrinkActionProperties = getUpdatedShrinkActionProperties(localShrinkActionProperties, lock) + try { + if (!isIndexGreen(context.client, indexName)) { + stepStatus = StepStatus.CONDITION_NOT_MET + info = mapOf("message" to INDEX_HEALTH_NOT_GREEN_MESSAGE) + return this + } + if (!isNodeStillSuitable(localShrinkActionProperties.nodeName, indexName, context)) return this + + // If the resize index api fails, the step will be set to failed and resizeIndex will return false + if (!resizeIndex(indexName, localShrinkActionProperties, context)) return this + info = mapOf("message" to getSuccessMessage(localShrinkActionProperties.targetIndexName)) + stepStatus = StepStatus.COMPLETED + return this + } catch (e: OpenSearchSecurityException) { + cleanupAndFail(getSecurityFailureMessage(e.localizedMessage), e.message, e) + return this + } catch (e: RemoteTransportException) { + val unwrappedException = ExceptionsHelper.unwrapCause(e) + cleanupAndFail(FAILURE_MESSAGE, cause = e.message, e = unwrappedException as Exception) + return this + } catch (e: Exception) { + cleanupAndFail(FAILURE_MESSAGE, e.message, e) + return this + } + } + + // Sets the action to failed, clears the readonly and allocation settings on the source index, and releases the shrink lock + private suspend fun cleanupAndFail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + // Non-null assertion !! is used to throw an exception on null which would just be caught and logged + try { + resetReadOnlyAndRouting(context!!.metadata.index, context!!.client, shrinkActionProperties!!.originalIndexSettings) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") + } + try { + releaseShrinkLock(shrinkActionProperties!!, context!!.lockService, logger) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to release the node lock after a failure: $e") + } + shrinkActionProperties = null + } + + @Suppress("ReturnCount") + private suspend fun isNodeStillSuitable(nodeName: String, indexName: String, context: StepContext): Boolean { + // Get the size of the index + val statsRequest = IndicesStatsRequest().indices(indexName) + val statsResponse: IndicesStatsResponse = context.client.admin().indices().suspendUntil { + stats(statsRequest, it) + } + val statsStore = statsResponse.total.store + if (statsStore == null) { + logger.error("Shrink action failed as indices stats request was missing store stats.") + cleanupAndFail(FAILURE_MESSAGE) + return false + } + val indexSizeInBytes = statsStore.sizeInBytes + // Get the remaining memory in the node + val nodesStatsReq = NodesStatsRequest().addMetric(AttemptMoveShardsStep.OS_METRIC) + val nodeStatsResponse: NodesStatsResponse = context.client.admin().cluster().suspendUntil { nodesStats(nodesStatsReq, it) } + // If the node has been replaced, this will fail + val node = nodeStatsResponse.nodes.firstOrNull { it.node.name == nodeName } + if (node == null) { + logger.error("Shrink action failed as node stats were missing the previously selected node.") + cleanupAndFail(FAILURE_MESSAGE) + return false + } + val remainingMem = getNodeFreeMemoryAfterShrink(node, indexSizeInBytes, context.settings, context.clusterService.clusterSettings) + if (remainingMem < 1L) { + logger.error("Shrink action failed as the previously selected node no longer has enough free space.") + cleanupAndFail(NOT_ENOUGH_SPACE_FAILURE_MESSAGE) + return false + } + return true + } + + private suspend fun resizeIndex(sourceIndex: String, shrinkActionProperties: ShrinkActionProperties, context: StepContext): Boolean { + val targetIndex = shrinkActionProperties.targetIndexName + val req = ResizeRequest(targetIndex, sourceIndex) + req.targetIndexRequest.settings( + Settings.builder() + .put(AttemptMoveShardsStep.ROUTING_SETTING, shrinkActionProperties.nodeName) + .put(INDEX_NUMBER_OF_SHARDS, shrinkActionProperties.targetNumShards) + .build() + ) + action.aliases?.forEach { req.targetIndexRequest.alias(it) } + val resizeResponse: ResizeResponse = context.client.admin().indices().suspendUntil { resizeIndex(req, it) } + if (!resizeResponse.isAcknowledged) { + logger.error("Shrink action failed as the resize index request was not acknowledged.") + cleanupAndFail(FAILURE_MESSAGE) + return false + } + return true + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + return currentMetadata.copy( + actionMetaData = currentMetadata.actionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), + stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + transitionTo = null, + info = info + ) + } + + override fun isIdempotent() = false + + companion object { + const val name = "attempt_shrink_step" + const val FAILURE_MESSAGE = "Shrink failed when sending shrink request." + const val NOT_ENOUGH_SPACE_FAILURE_MESSAGE = "Shrink failed as the selected node no longer had enough free space to shrink to." + const val INDEX_HEALTH_NOT_GREEN_MESSAGE = "Shrink delayed because index health is not green." + fun getSuccessMessage(newIndex: String) = "Shrink started. $newIndex currently being populated." + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt new file mode 100644 index 000000000..c4577cd08 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForMoveShardsStep.kt @@ -0,0 +1,175 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchSecurityException +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse +import org.opensearch.action.admin.indices.stats.ShardStats +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage +import org.opensearch.indexmanagement.indexstatemanagement.util.resetReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime +import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.getUpdatedShrinkActionProperties +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.transport.RemoteTransportException +import java.lang.Exception +import java.time.Duration +import java.time.Instant + +class WaitForMoveShardsStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "NestedBlockDepth") + override suspend fun execute(): WaitForMoveShardsStep { + val context = this.context ?: return this + val indexName = context.metadata.index + val actionMetadata = context.metadata.actionMetaData + val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + shrinkActionProperties = localShrinkActionProperties + if (localShrinkActionProperties == null) { + logger.error(METADATA_FAILURE_MESSAGE) + cleanupAndFail(METADATA_FAILURE_MESSAGE) + return this + } + val lock = renewShrinkLock(localShrinkActionProperties, context.lockService, logger) + if (lock == null) { + logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") + cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") + return this + } + // After renewing the lock we need to update the primary term and sequence number + shrinkActionProperties = getUpdatedShrinkActionProperties(localShrinkActionProperties, lock) + try { + val indexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(indexName) + val response: IndicesStatsResponse = context.client.admin().indices().suspendUntil { stats(indexStatsRequests, it) } + val numPrimaryShards = context.clusterService.state().metadata.indices[indexName].numberOfShards + val nodeToMoveOnto = localShrinkActionProperties.nodeName + val inSyncAllocations = context.clusterService.state().metadata.indices[indexName].inSyncAllocationIds + val numReplicas = context.clusterService.state().metadata.indices[indexName].numberOfReplicas + var numShardsOnNode = 0 + var numShardsInSync = 0 + for (shard: ShardStats in response.shards) { + val routingInfo = shard.shardRouting + val nodeIdShardIsOn = routingInfo.currentNodeId() + val nodeNameShardIsOn = context.clusterService.state().nodes()[nodeIdShardIsOn].name + if (routingInfo.primary()) { + if (nodeNameShardIsOn.equals(nodeToMoveOnto) && routingInfo.started()) { + numShardsOnNode++ + } + // Either there must be no replicas (force unsafe must have been set) or all replicas must be in sync as + // it isn't known which shard (any replica or primary) will be moved to the target node and used in the shrink. + if (numReplicas == 0 || inSyncAllocations[routingInfo.id].size == (numReplicas + 1)) { + numShardsInSync++ + } + } + } + if (numShardsOnNode >= numPrimaryShards && numShardsInSync >= numPrimaryShards) { + info = mapOf("message" to getSuccessMessage(nodeToMoveOnto)) + stepStatus = StepStatus.COMPLETED + } else { + val numShardsNotOnNode = numPrimaryShards - numShardsOnNode + val numShardsNotInSync = numPrimaryShards - numShardsInSync + checkTimeOut(context, numShardsNotOnNode, numShardsNotInSync, nodeToMoveOnto) + } + return this + } catch (e: OpenSearchSecurityException) { + cleanupAndFail(getSecurityFailureMessage(e.localizedMessage), e.message, e) + return this + } catch (e: RemoteTransportException) { + val unwrappedException = ExceptionsHelper.unwrapCause(e) + cleanupAndFail(FAILURE_MESSAGE, cause = e.message, e = unwrappedException as Exception) + return this + } catch (e: Exception) { + cleanupAndFail(FAILURE_MESSAGE, cause = e.message, e) + return this + } + } + + // Sets the action to failed, clears the readonly and allocation settings on the source index, and releases the shrink lock + private suspend fun cleanupAndFail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + // Non-null assertion !! is used to throw an exception on null which would just be caught and logged + try { + resetReadOnlyAndRouting(context!!.metadata.index, context!!.client, shrinkActionProperties!!.originalIndexSettings) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") + } + try { + releaseShrinkLock(shrinkActionProperties!!, context!!.lockService, logger) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to release the node lock after a failure: $e") + } + shrinkActionProperties = null + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + return currentMetadata.copy( + actionMetaData = currentMetadata.actionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), + stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + transitionTo = null, + info = info + ) + } + + private suspend fun checkTimeOut( + stepContext: StepContext, + numShardsNotOnNode: Int, + numShardsNotInSync: Int, + nodeToMoveOnto: String + ) { + val managedIndexMetadata = stepContext.metadata + val indexName = managedIndexMetadata.index + val timeSinceActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) + val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: MOVE_SHARDS_TIMEOUT_IN_SECONDS + // Get ActionTimeout if given, otherwise use default timeout of 12 hours + if (timeSinceActionStarted.toSeconds() > timeOutInSeconds) { + logger.error( + "Shrink Action move shards failed on [$indexName], the action timed out with [$numShardsNotOnNode] shards not yet " + + "moved and [$numShardsNotInSync] shards without an in sync replica." + ) + cleanupAndFail(getTimeoutFailure(nodeToMoveOnto)) + } else { + logger.debug( + "Shrink action move shards step running on [$indexName], [$numShardsNotOnNode] shards need to be moved, " + + "[$numShardsNotInSync] shards need an in sync replica." + ) + info = mapOf("message" to getTimeoutDelay(nodeToMoveOnto)) + stepStatus = StepStatus.CONDITION_NOT_MET + } + } + + override fun isIdempotent() = true + + companion object { + const val name = "wait_for_move_shards_step" + fun getSuccessMessage(node: String) = "The shards successfully moved to $node." + fun getTimeoutFailure(node: String) = "Shrink failed because it took to long to move shards to $node" + fun getTimeoutDelay(node: String) = "Shrink delayed because it took to long to move shards to $node" + const val FAILURE_MESSAGE = "Shrink failed when waiting for shards to move." + const val METADATA_FAILURE_MESSAGE = "Shrink action properties are null, metadata was not properly populated" + const val MOVE_SHARDS_TIMEOUT_IN_SECONDS = 43200L // 12hrs in seconds + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt new file mode 100644 index 000000000..d075ec45a --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/shrink/WaitForShrinkStep.kt @@ -0,0 +1,181 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.step.shrink + +import org.apache.logging.log4j.LogManager +import org.opensearch.ExceptionsHelper +import org.opensearch.OpenSearchSecurityException +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsRequest +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse +import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.client.Client +import org.opensearch.common.settings.Settings +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.getSecurityFailureMessage +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep.Companion.getTimeoutFailure +import org.opensearch.indexmanagement.indexstatemanagement.util.resetReadOnlyAndRouting +import org.opensearch.indexmanagement.indexstatemanagement.util.deleteShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.getActionStartTime +import org.opensearch.indexmanagement.indexstatemanagement.util.issueUpdateSettingsRequest +import org.opensearch.indexmanagement.indexstatemanagement.util.releaseShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.renewShrinkLock +import org.opensearch.indexmanagement.indexstatemanagement.util.getUpdatedShrinkActionProperties +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.transport.RemoteTransportException +import java.time.Duration +import java.time.Instant + +class WaitForShrinkStep(private val action: ShrinkAction) : Step(name) { + private val logger = LogManager.getLogger(javaClass) + private var stepStatus = StepStatus.STARTING + private var info: Map? = null + private var shrinkActionProperties: ShrinkActionProperties? = null + + @Suppress("TooGenericExceptionCaught", "ComplexMethod", "ReturnCount", "LongMethod") + override suspend fun execute(): WaitForShrinkStep { + val context = this.context ?: return this + val actionMetadata = context.metadata.actionMetaData + val localShrinkActionProperties = actionMetadata?.actionProperties?.shrinkActionProperties + shrinkActionProperties = localShrinkActionProperties + if (localShrinkActionProperties == null) { + logger.error(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) + cleanupAndFail(WaitForMoveShardsStep.METADATA_FAILURE_MESSAGE) + return this + } + val lock = renewShrinkLock(localShrinkActionProperties, context.lockService, logger) + if (lock == null) { + logger.error("Shrink action failed to renew lock on node [${localShrinkActionProperties.nodeName}]") + cleanupAndFail("Failed to renew lock on node [${localShrinkActionProperties.nodeName}]") + return this + } + shrinkActionProperties = getUpdatedShrinkActionProperties(localShrinkActionProperties, lock) + try { + val targetIndex = localShrinkActionProperties.targetIndexName + val numPrimaryShardsStarted = getNumPrimaryShardsStarted(context.client, targetIndex) + val numPrimaryShards = context.clusterService.state().metadata.indices[targetIndex].numberOfShards + val targetNumShards = localShrinkActionProperties.targetNumShards + if (numPrimaryShards != targetNumShards || numPrimaryShardsStarted != targetNumShards) { + checkTimeOut(context, targetIndex) + return this + } + + // Clear source and target allocation, if either fails the step will be set to failed and the function will return false + if (!clearAllocationSettings(context, targetIndex)) return this + if (!resetReadOnlyAndRouting(context.metadata.index, context.client, localShrinkActionProperties.originalIndexSettings)) return this + + deleteShrinkLock(localShrinkActionProperties, context.lockService, logger) + stepStatus = StepStatus.COMPLETED + info = mapOf("message" to SUCCESS_MESSAGE) + return this + } catch (e: OpenSearchSecurityException) { + cleanupAndFail(getSecurityFailureMessage(e.localizedMessage), e.message, e) + return this + } catch (e: RemoteTransportException) { + val unwrappedException = ExceptionsHelper.unwrapCause(e) + cleanupAndFail(GENERIC_FAILURE_MESSAGE, cause = e.message, e = unwrappedException as java.lang.Exception) + return this + } catch (e: Exception) { + cleanupAndFail(GENERIC_FAILURE_MESSAGE, e.message, e) + return this + } + } + + // Sets the action to failed, clears the readonly and allocation settings on the source index, deletes the target index, + // and releases the shrink lock + private suspend fun cleanupAndFail(message: String, cause: String? = null, e: Exception? = null) { + e?.let { logger.error(message, e) } + info = if (cause == null) mapOf("message" to message) else mapOf("message" to message, "cause" to cause) + stepStatus = StepStatus.FAILED + // Using a try/catch for each cleanup action as we should clean up as much as possible despite any failures + // Non-null assertion !! is used to throw an exception on null which would just be caught and logged + try { + resetReadOnlyAndRouting(context!!.metadata.index, context!!.client, shrinkActionProperties!!.originalIndexSettings) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to clean up routing and readonly setting after a failure: $e") + } + try { + // Use plugin level permissions when deleting the failed target shrink index after a failure + context!!.client.threadPool().threadContext.stashContext().use { + val deleteRequest = DeleteIndexRequest(shrinkActionProperties!!.targetIndexName) + val response: AcknowledgedResponse = + context!!.client.admin().indices().suspendUntil { delete(deleteRequest, it) } + if (!response.isAcknowledged) { + logger.error("Shrink action failed to delete target index during cleanup after a failure") + } + } + } catch (e: Exception) { + logger.error("Shrink action failed while trying to delete the target index after a failure: $e") + } + try { + releaseShrinkLock(shrinkActionProperties!!, context!!.lockService, logger) + } catch (e: Exception) { + logger.error("Shrink action failed while trying to release the node lock after a failure: $e") + } + shrinkActionProperties = null + } + + private suspend fun clearAllocationSettings(context: StepContext, index: String): Boolean { + val allocationSettings = Settings.builder().putNull(AttemptMoveShardsStep.ROUTING_SETTING).build() + val response: AcknowledgedResponse = issueUpdateSettingsRequest(context.client, index, allocationSettings) + if (!response.isAcknowledged) { + logger.error("Shrink action to clear the allocation settings on index [$index] following shrinking.") + cleanupAndFail(getFailureMessage(index)) + return false + } + return true + } + + private suspend fun getNumPrimaryShardsStarted(client: Client, targetIndex: String): Int { + val targetIndexStatsRequests: IndicesStatsRequest = IndicesStatsRequest().indices(targetIndex) + val targetStatsResponse: IndicesStatsResponse = client.admin().indices().suspendUntil { stats(targetIndexStatsRequests, it) } + return targetStatsResponse.shards.filter { it.shardRouting.started() && it.shardRouting.primary() }.size + } + + private suspend fun checkTimeOut(stepContext: StepContext, targetIndex: String) { + val managedIndexMetadata = stepContext.metadata + val timeFromActionStarted: Duration = Duration.between(getActionStartTime(managedIndexMetadata), Instant.now()) + val timeOutInSeconds = action.configTimeout?.timeout?.seconds ?: WaitForMoveShardsStep.MOVE_SHARDS_TIMEOUT_IN_SECONDS + // Get ActionTimeout if given, otherwise use default timeout of 12 hours + if (timeFromActionStarted.toSeconds() > timeOutInSeconds) { + logger.error(getTimeoutFailure(targetIndex)) + cleanupAndFail(getTimeoutFailure(targetIndex)) + } else { + info = mapOf("message" to getDelayedMessage(targetIndex)) + stepStatus = StepStatus.CONDITION_NOT_MET + } + } + + override fun getUpdatedManagedIndexMetadata(currentMetadata: ManagedIndexMetaData): ManagedIndexMetaData { + return currentMetadata.copy( + actionMetaData = currentMetadata.actionMetaData?.copy( + actionProperties = ActionProperties( + shrinkActionProperties = shrinkActionProperties + ) + ), + stepMetaData = StepMetaData(name, getStepStartTime(currentMetadata).toEpochMilli(), stepStatus), + transitionTo = null, + info = info + ) + } + + override fun isIdempotent() = true + + companion object { + const val name = "wait_for_shrink_step" + const val SUCCESS_MESSAGE = "Shrink finished successfully." + const val GENERIC_FAILURE_MESSAGE = "Shrink failed while waiting for shards to start." + fun getDelayedMessage(newIndex: String) = "Shrink delayed because $newIndex shards not in started state." + fun getFailureMessage(newIndex: String) = "Shrink failed while waiting for $newIndex shards to start." + fun getTimeoutFailure(newIndex: String) = "Shrink failed because it timed out while waiting for $newIndex shrink to finish." + } +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt index b9c9dc7ee..29d34155b 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/ManagedIndexUtils.kt @@ -8,18 +8,27 @@ package org.opensearch.indexmanagement.indexstatemanagement.util // import inet.ipaddr.IPAddressString +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.withContext // import org.apache.logging.log4j.LogManager import org.apache.logging.log4j.Logger import org.opensearch.action.delete.DeleteRequest +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse import org.opensearch.action.index.IndexRequest import org.opensearch.action.search.SearchRequest import org.opensearch.action.support.WriteRequest import org.opensearch.action.update.UpdateRequest // import org.opensearch.alerting.destination.message.BaseMessage +import org.opensearch.client.Client import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.unit.TimeValue +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.NamedXContentRegistry import org.opensearch.common.xcontent.ToXContent import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType import org.opensearch.index.query.BoolQueryBuilder import org.opensearch.index.query.QueryBuilders import org.opensearch.indexmanagement.IndexManagementPlugin.Companion.INDEX_MANAGEMENT_INDEX @@ -37,6 +46,8 @@ import org.opensearch.indexmanagement.indexstatemanagement.model.coordinator.Swe import org.opensearch.indexmanagement.indexstatemanagement.settings.ManagedIndexSettings import org.opensearch.indexmanagement.opensearchapi.optionalISMTemplateField import org.opensearch.indexmanagement.opensearchapi.optionalTimeField +import org.opensearch.indexmanagement.opensearchapi.parseWithType +import org.opensearch.indexmanagement.opensearchapi.suspendUntil import org.opensearch.indexmanagement.spi.indexstatemanagement.Action import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaData @@ -535,3 +546,25 @@ enum class MetadataCheck { // } // return false // } + +@Suppress("BlockingMethodInNonBlockingContext") +suspend fun getManagedIndexConfig(indexUuid: String, client: Client): ManagedIndexConfig? { + val request = GetRequest().routing(indexUuid).index(INDEX_MANAGEMENT_INDEX).id(indexUuid) + val response: GetResponse = client.suspendUntil { get(request, it) } + var managedIndexConfig: ManagedIndexConfig? = null + val configSource = response.sourceAsBytesRef + // Intellij complains about createParser/parseWithType blocking because it sees they throw IOExceptions + configSource?.let { + withContext(Dispatchers.IO) { + val xcp = XContentHelper.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, configSource, XContentType.JSON) + managedIndexConfig = xcp.parseWithType(response.id, response.seqNo, response.primaryTerm, ManagedIndexConfig.Companion::parse) + } + } + return managedIndexConfig +} + +// extracts the job scheduler interval from the managed index config and returns the millisecond value +fun getIntervalFromManagedIndexConfig(managedIndexConfig: ManagedIndexConfig): Long { + val periodTuple = managedIndexConfig.jobSchedule.getPeriodStartingAt(Instant.now()) + return periodTuple.v2().toEpochMilli() - periodTuple.v1().toEpochMilli() +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt new file mode 100644 index 000000000..996b3f2a8 --- /dev/null +++ b/src/main/kotlin/org/opensearch/indexmanagement/indexstatemanagement/util/StepUtils.kt @@ -0,0 +1,194 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.util + +import org.apache.logging.log4j.Logger +import org.opensearch.action.admin.cluster.health.ClusterHealthRequest +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse +import org.opensearch.action.admin.cluster.node.stats.NodeStats +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest +import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.client.Client +import org.opensearch.cluster.metadata.IndexMetadata +import org.opensearch.cluster.routing.allocation.DiskThresholdSettings +import org.opensearch.common.settings.ClusterSettings +import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.TimeValue +import org.opensearch.indexmanagement.IndexManagementPlugin.Companion.INDEX_MANAGEMENT_INDEX +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_NAME +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction.Companion.LOCK_RESOURCE_TYPE +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep +import org.opensearch.indexmanagement.opensearchapi.suspendUntil +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData +import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ShrinkActionProperties +import org.opensearch.jobscheduler.spi.LockModel +import org.opensearch.jobscheduler.spi.utils.LockService +import java.lang.Exception +import java.time.Instant + +suspend fun issueUpdateSettingsRequest(client: Client, indexName: String, settings: Settings): AcknowledgedResponse { + return client.admin() + .indices() + .suspendUntil { updateSettings(UpdateSettingsRequest(settings, indexName), it) } +} + +suspend fun releaseShrinkLock( + shrinkActionProperties: ShrinkActionProperties, + lockService: LockService, + logger: Logger +) { + val lock: LockModel = getShrinkLockModel(shrinkActionProperties) + val released: Boolean = lockService.suspendUntil { release(lock, it) } + if (!released) { + logger.error("Failed to release Shrink action lock on node [${shrinkActionProperties.nodeName}]") + } +} + +suspend fun deleteShrinkLock( + shrinkActionProperties: ShrinkActionProperties, + lockService: LockService, + logger: Logger +) { + val lockID = getShrinkLockID(shrinkActionProperties.nodeName) + val deleted: Boolean = lockService.suspendUntil { deleteLock(lockID, it) } + if (!deleted) { + logger.error("Failed to delete Shrink action lock on node [${shrinkActionProperties.nodeName}]") + } +} + +suspend fun renewShrinkLock( + shrinkActionProperties: ShrinkActionProperties, + lockService: LockService, + logger: Logger +): LockModel? { + val lock: LockModel = getShrinkLockModel(shrinkActionProperties) + return try { + lockService.suspendUntil { renewLock(lock, it) } + } catch (e: Exception) { + logger.error("Failed to renew Shrink action lock on node [${shrinkActionProperties.nodeName}]: $e") + null + } +} + +fun getShrinkLockModel( + shrinkActionProperties: ShrinkActionProperties +): LockModel { + return getShrinkLockModel( + shrinkActionProperties.nodeName, + INDEX_MANAGEMENT_INDEX, + shrinkActionProperties.lockEpochSecond, + shrinkActionProperties.lockPrimaryTerm, + shrinkActionProperties.lockSeqNo, + shrinkActionProperties.lockDurationSecond + ) +} + +@SuppressWarnings("LongParameterList") +fun getShrinkLockModel( + nodeName: String, + jobIndexName: String, + lockEpochSecond: Long, + lockPrimaryTerm: Long, + lockSeqNo: Long, + lockDurationSecond: Long +): LockModel { + val lockID = getShrinkLockID(nodeName) + val lockCreationInstant: Instant = Instant.ofEpochSecond(lockEpochSecond) + return LockModel( + jobIndexName, + lockID, + lockCreationInstant, + lockDurationSecond, + false, + lockSeqNo, + lockPrimaryTerm + ) +} + +// Returns copied ShrinkActionProperties with the details of the provided lock added in +fun getUpdatedShrinkActionProperties(shrinkActionProperties: ShrinkActionProperties, lock: LockModel): ShrinkActionProperties { + return ShrinkActionProperties( + shrinkActionProperties.nodeName, + shrinkActionProperties.targetIndexName, + shrinkActionProperties.targetNumShards, + lock.primaryTerm, + lock.seqNo, + lock.lockTime.epochSecond, + lock.lockDurationSeconds, + shrinkActionProperties.originalIndexSettings + ) +} + +fun getActionStartTime(managedIndexMetaData: ManagedIndexMetaData): Instant { + val actionMetadata = managedIndexMetaData.actionMetaData + // Return the action start time, or if that is null return now + actionMetadata?.startTime?.let { return Instant.ofEpochMilli(it) } + return Instant.now() +} + +/* + * For disk threshold, if the values are set as a percentage, the percent parameter will return a value and the bytes + * parameter will return 0, and vice versa for when the values are set as bytes. This method provides a single place to + * parse either and get the byte value back. + */ +@Suppress("MagicNumber") +fun getFreeBytesThresholdHigh(settings: Settings, clusterSettings: ClusterSettings?, totalNodeBytes: Long): Long { + val diskThresholdSettings = DiskThresholdSettings(settings, clusterSettings) + // Depending on how a user provided input, this setting may be a percentage or byte value + val diskThresholdPercent = diskThresholdSettings.freeDiskThresholdHigh + val diskThresholdBytes = diskThresholdSettings.freeBytesThresholdHigh + // If the disk threshold is set as a percentage, use it and convert it to bytes + return if (diskThresholdPercent > 0.001) { + // If the user set value is 95%, diskThresholdPercent will be returned as 5% from the DiskThresholdSettings object + ((diskThresholdPercent / 100) * totalNodeBytes).toLong() + } else diskThresholdBytes.bytes +} + +/* + * Returns the amount of memory in the node which will be free below the high watermark level after adding 2*indexSizeInBytes, or -1 + * if adding 2*indexSizeInBytes goes over the high watermark threshold, or if nodeStats does not contain OsStats. +*/ +fun getNodeFreeMemoryAfterShrink(node: NodeStats, indexSizeInBytes: Long, settings: Settings, clusterSettings: ClusterSettings?): Long { + val osStats = node.os + if (osStats != null) { + val memLeftInNode = osStats.mem.free.bytes + val totalNodeMem = osStats.mem.total.bytes + val freeBytesThresholdHigh = getFreeBytesThresholdHigh(settings, clusterSettings, totalNodeMem) + // We require that a node has enough space to be below the high watermark disk level with an additional 2 * the index size free + val requiredBytes = (2 * indexSizeInBytes) + freeBytesThresholdHigh + if (memLeftInNode > requiredBytes) { + return memLeftInNode - requiredBytes + } + } + return -1L +} + +suspend fun isIndexGreen( + client: Client, + indexName: String, + timeout: TimeValue = TimeValue(AttemptMoveShardsStep.THIRTY_SECONDS_IN_MILLIS) +): Boolean { + // get index health, waiting for a green status + val healthReq = ClusterHealthRequest().indices(indexName).waitForGreenStatus().timeout(timeout) + val response: ClusterHealthResponse = client.admin().cluster().suspendUntil { health(healthReq, it) } + // The request was set to wait for green index, if the request timed out, the index never was green + return !response.isTimedOut +} + +suspend fun resetReadOnlyAndRouting(index: String, client: Client, originalSettings: Map): Boolean { + val allocationSettings = Settings.builder() + .put(AttemptMoveShardsStep.ROUTING_SETTING, originalSettings[AttemptMoveShardsStep.ROUTING_SETTING]) + .put(IndexMetadata.SETTING_BLOCKS_WRITE, originalSettings[IndexMetadata.SETTING_BLOCKS_WRITE]).build() + val response: AcknowledgedResponse = issueUpdateSettingsRequest(client, index, allocationSettings) + if (!response.isAcknowledged) { + return false + } + return true +} + +fun getShrinkLockID(nodeName: String): String { + return "$LOCK_RESOURCE_TYPE-$LOCK_RESOURCE_NAME-$nodeName" +} diff --git a/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt b/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt index 80e293ba6..4288c88cf 100644 --- a/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt +++ b/src/main/kotlin/org/opensearch/indexmanagement/opensearchapi/OpenSearchExtensions.kt @@ -16,6 +16,7 @@ import org.apache.logging.log4j.Logger import org.opensearch.ExceptionsHelper import org.opensearch.OpenSearchException import org.opensearch.action.ActionListener +import org.opensearch.action.admin.indices.alias.Alias import org.opensearch.action.bulk.BackoffPolicy import org.opensearch.action.get.GetResponse import org.opensearch.action.search.SearchResponse @@ -39,6 +40,7 @@ import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.InjectSecurity import org.opensearch.commons.authuser.User import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.model.ISMTemplate import org.opensearch.indexmanagement.indexstatemanagement.model.Policy import org.opensearch.indexmanagement.util.NO_ID @@ -80,6 +82,16 @@ fun XContentParser.instant(): Instant? { } } +fun XContentBuilder.aliasesField(aliases: List): XContentBuilder { + val builder = this.startArray(ShrinkAction.ALIASES_FIELD) + aliases.forEach { + builder.startObject() + it.toXContent(builder, ToXContent.EMPTY_PARAMS) + builder.endObject() + } + return builder.endArray() +} + fun XContentBuilder.optionalTimeField(name: String, instant: Instant?): XContentBuilder { if (instant == null) { return nullField(name) diff --git a/src/main/resources/mappings/opendistro-ism-config.json b/src/main/resources/mappings/opendistro-ism-config.json index 074257bf8..c83df1a68 100644 --- a/src/main/resources/mappings/opendistro-ism-config.json +++ b/src/main/resources/mappings/opendistro-ism-config.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 13 + "schema_version": 14 }, "dynamic": "strict", "properties": { @@ -430,6 +430,30 @@ } } }, + "shrink": { + "properties": { + "num_new_shards": { + "type": "integer" + }, + "max_shard_size": { + "type": "keyword" + }, + "percentage_of_source_shards": { + "type": "double" + }, + "target_index_name_template": { + "type": "object", + "enabled": false + }, + "aliases": { + "type": "object", + "enabled": false + }, + "force_unsafe": { + "type": "boolean" + } + } + }, "custom": { "enabled": false, "type": "object" @@ -733,6 +757,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } } diff --git a/src/main/resources/mappings/opendistro-ism-history.json b/src/main/resources/mappings/opendistro-ism-history.json index 44c7ab896..ca5a8d8de 100644 --- a/src/main/resources/mappings/opendistro-ism-history.json +++ b/src/main/resources/mappings/opendistro-ism-history.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 4 + "schema_version": 5 }, "dynamic": "strict", "properties": { @@ -108,6 +108,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } } diff --git a/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt b/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt index 3cc3a1bba..fbe64cf8a 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/IndexManagementRestTestCase.kt @@ -27,8 +27,8 @@ import javax.management.remote.JMXServiceURL abstract class IndexManagementRestTestCase : ODFERestTestCase() { - val configSchemaVersion = 13 - val historySchemaVersion = 4 + val configSchemaVersion = 14 + val historySchemaVersion = 5 // Having issues with tests leaking into other tests and mappings being incorrect and they are not caught by any pending task wait check as // they do not go through the pending task queue. Ideally this should probably be written in a way to wait for the diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt index 842ded4a5..7ddadd7b9 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/TestHelpers.kt @@ -5,10 +5,12 @@ package org.opensearch.indexmanagement.indexstatemanagement +import org.opensearch.action.admin.indices.alias.Alias import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.ToXContent import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.index.RandomCreateIndexGenerator.randomAlias import org.opensearch.index.seqno.SequenceNumbers import org.opensearch.indexmanagement.indexstatemanagement.action.AllocationAction import org.opensearch.indexmanagement.indexstatemanagement.action.CloseAction @@ -22,6 +24,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.action.ReadWriteActio import org.opensearch.indexmanagement.indexstatemanagement.action.ReplicaCountAction import org.opensearch.indexmanagement.indexstatemanagement.action.RolloverAction import org.opensearch.indexmanagement.indexstatemanagement.action.RollupAction +import org.opensearch.indexmanagement.indexstatemanagement.action.ShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.action.SnapshotAction import org.opensearch.indexmanagement.indexstatemanagement.model.ChangePolicy import org.opensearch.indexmanagement.indexstatemanagement.model.Conditions @@ -48,10 +51,16 @@ import org.opensearch.jobscheduler.spi.schedule.IntervalSchedule import org.opensearch.jobscheduler.spi.schedule.Schedule import org.opensearch.script.Script import org.opensearch.script.ScriptType +import org.opensearch.test.OpenSearchTestCase.randomAlphaOfLength +import org.opensearch.test.OpenSearchTestCase.randomBoolean +import org.opensearch.test.OpenSearchTestCase.randomDoubleBetween +import org.opensearch.test.OpenSearchTestCase.randomInt +import org.opensearch.test.OpenSearchTestCase.randomList import org.opensearch.test.rest.OpenSearchRestTestCase import java.time.Instant import java.time.ZoneId import java.time.temporal.ChronoUnit +import kotlin.math.abs fun randomPolicy( id: String = OpenSearchRestTestCase.randomAlphaOfLength(10), @@ -131,6 +140,25 @@ fun randomRolloverActionConfig( ) } +@Suppress("ReturnCount") +fun randomShrinkAction( + numNewShards: Int? = null, + maxShardSize: ByteSizeValue? = null, + percentageOfSourceShards: Double? = null, + targetIndexTemplate: Script? = if (randomBoolean()) randomTemplateScript(randomAlphaOfLength(10)) else null, + aliases: List? = if (randomBoolean()) randomList(10) { randomAlias() } else null, + forceUnsafe: Boolean? = if (randomBoolean()) randomBoolean() else null +): ShrinkAction { + if (numNewShards == null && maxShardSize == null && percentageOfSourceShards == null) { + when (randomInt(2)) { + 0 -> return ShrinkAction(abs(randomInt()) + 1, null, null, targetIndexTemplate, aliases, forceUnsafe, 0) + 1 -> return ShrinkAction(null, randomByteSizeValue(), null, targetIndexTemplate, aliases, forceUnsafe, 0) + 2 -> return ShrinkAction(null, null, randomDoubleBetween(0.0, 1.0, true), targetIndexTemplate, aliases, forceUnsafe, 0) + } + } + return ShrinkAction(numNewShards, maxShardSize, percentageOfSourceShards, targetIndexTemplate, aliases, forceUnsafe, 0) +} + fun randomReadOnlyActionConfig(): ReadOnlyAction { return ReadOnlyAction(index = 0) } @@ -378,6 +406,11 @@ fun ReadWriteAction.toJsonString(): String { return this.toXContent(builder, ToXContent.EMPTY_PARAMS).string() } +fun ShrinkAction.toJsonString(): String { + val builder = XContentFactory.jsonBuilder() + return this.toXContent(builder, ToXContent.EMPTY_PARAMS).string() +} + fun ReplicaCountAction.toJsonString(): String { val builder = XContentFactory.jsonBuilder() return this.toXContent(builder, ToXContent.EMPTY_PARAMS).string() diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt new file mode 100644 index 000000000..686a09265 --- /dev/null +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/action/ShrinkActionIT.kt @@ -0,0 +1,562 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.indexmanagement.indexstatemanagement.action + +import org.apache.logging.log4j.LogManager +import org.opensearch.action.admin.indices.alias.Alias +import org.opensearch.cluster.metadata.IndexMetadata +import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.ByteSizeValue +import org.opensearch.index.query.QueryBuilders +import org.opensearch.indexmanagement.indexstatemanagement.IndexStateManagementRestTestCase +import org.opensearch.indexmanagement.indexstatemanagement.model.Policy +import org.opensearch.indexmanagement.indexstatemanagement.model.State +import org.opensearch.indexmanagement.indexstatemanagement.randomErrorNotification +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.AttemptShrinkStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForMoveShardsStep +import org.opensearch.indexmanagement.indexstatemanagement.step.shrink.WaitForShrinkStep +import org.opensearch.indexmanagement.spi.indexstatemanagement.Step +import org.opensearch.indexmanagement.waitFor +import org.opensearch.script.Script +import org.opensearch.script.ScriptType +import java.time.Instant +import java.time.temporal.ChronoUnit + +class ShrinkActionIT : IndexStateManagementRestTestCase() { + private val testIndexName = javaClass.simpleName.lowercase() + private val testIndexSuffix = "_shrink_test" + fun `test basic workflow number of shards`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + + val shrinkAction = ShrinkAction( + numNewShards = 1, + maxShardSize = null, + percentageOfSourceShards = null, + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), + aliases = listOf(Alias("test-alias1"), Alias("test-alias2").filter(QueryBuilders.termQuery("foo", "bar")).writeIndex(true)), + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + + insertSampleData(indexName, 3) + + // Set the index as readonly to check that the setting is preserved after the shrink finishes + updateIndexSetting(indexName, IndexMetadata.SETTING_BLOCKS_WRITE, "true") + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + testIndexSuffix + waitFor(Instant.ofEpochSecond(60)) { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + val instant: Instant = Instant.ofEpochSecond(50) + waitFor(instant) { + assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals(Step.StepStatus.COMPLETED, getExplainManagedIndexMetaData(indexName).stepMetaData?.stepStatus) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + assertEquals("Write block setting was not reset after successful shrink", "true", getIndexBlocksWriteSetting(indexName)) + val aliases = getAlias(targetIndexName, "") + assertTrue("Aliases were not added to shrunken index", aliases.containsKey("test-alias1") && aliases.containsKey("test-alias2")) + } + } + + @Suppress("UNCHECKED_CAST") + fun `test basic workflow max shard size`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + val testMaxShardSize: ByteSizeValue = ByteSizeValue.parseBytesSizeValue("1GB", "test") + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = testMaxShardSize, + percentageOfSourceShards = null, + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), + aliases = listOf(Alias("max-shard-alias")), + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + testIndexSuffix + waitFor(Instant.ofEpochSecond(60)) { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(50)) { + assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + val indexSettings = getIndexSettings(indexName) as Map>> + val writeBlock = indexSettings[indexName]!!["settings"]!![IndexMetadata.SETTING_BLOCKS_WRITE] as String? + assertNull("Write block setting was not reset after successful shrink", writeBlock) + val aliases = getAlias(targetIndexName, "") + assertTrue("Alias was not added to shrunken index", aliases.containsKey("max-shard-alias")) + } + } + + @Suppress("UNCHECKED_CAST") + fun `test basic workflow percentage to decrease to`() { + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = null, + percentageOfSourceShards = 0.5, + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + testIndexSuffix + waitFor(Instant.ofEpochSecond(60)) { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(50)) { + assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + val indexSettings = getIndexSettings(indexName) as Map>> + val writeBlock = indexSettings[indexName]!!["settings"]!![IndexMetadata.SETTING_BLOCKS_WRITE] as String? + assertNull("Write block setting was not reset after successful shrink", writeBlock) + } + } + + @Suppress("UNCHECKED_CAST") + fun `test allocation block picks correct node`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val nodes = getNodes() + if (nodes.size > 1) { + val indexName = "${testIndexName}_index_1" + val policyID = "${testIndexName}_testPolicyName_1" + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = null, + percentageOfSourceShards = 0.5, + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "3", "") + val excludedNode = nodes.iterator().next() + logger.info("Excluded node: $excludedNode") + updateIndexSettings( + indexName, + Settings.builder().put("index.routing.allocation.exclude._name", excludedNode) + ) + insertSampleData(indexName, 3) + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + logger.info("index settings: \n ${getFlatSettings(indexName)}") + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + val targetIndexName = indexName + testIndexSuffix + waitFor(Instant.ofEpochSecond(60)) { + assertEquals( + targetIndexName, + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName + ) + assertEquals("true", getIndexBlocksWriteSetting(indexName)) + val nodeName = + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertNotNull("Couldn't find node to shrink onto.", nodeName) + assertNotEquals(nodeName, excludedNode) + val settings = getFlatSettings(indexName) + val nodeToShrink = + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + val nodeToShrink = + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(50)) { + assertTrue("Target index is not created", indexExists(targetIndexName)) + assertEquals( + AttemptShrinkStep.getSuccessMessage(targetIndexName), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + + // starts WaitForShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + // one primary and one replica + assertTrue(getIndexShards(targetIndexName).size == 2) + assertEquals( + WaitForShrinkStep.SUCCESS_MESSAGE, + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + val indexSettings = getIndexSettings(indexName) as Map>> + val writeBlock = indexSettings[indexName]!!["settings"]!![IndexMetadata.SETTING_BLOCKS_WRITE] as String? + assertNull("Write block setting was not reset after successful shrink", writeBlock) + } + } + } + + fun `test no-op with single source index primary shard`() { + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_index_1_shard_noop" + val policyID = "${testIndexName}_testPolicyName_1_shard_noop" + + // Create a Policy with one State that only preforms a force_merge Action + val shrinkAction = ShrinkAction( + numNewShards = null, + maxShardSize = null, + percentageOfSourceShards = 0.5, + targetIndexTemplate = Script(ScriptType.INLINE, Script.DEFAULT_TEMPLATE_LANG, "{{ctx.index}}$testIndexSuffix", mapOf()), + aliases = null, + forceUnsafe = true, + index = 0 + ) + val states = listOf(State("ShrinkState", listOf(shrinkAction), listOf())) + + val policy = Policy( + id = policyID, + description = "$testIndexName description", + schemaVersion = 11L, + lastUpdatedTime = Instant.now().truncatedTo(ChronoUnit.MILLIS), + errorNotification = randomErrorNotification(), + defaultState = states[0].name, + states = states + ) + + createPolicy(policy, policyID) + createIndex(indexName, policyID, null, "0", "1", "") + + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + // The action should be done after the no-op + waitFor(Instant.ofEpochSecond(60)) { + val metadata = getExplainManagedIndexMetaData(indexName) + assertEquals( + "Did not get the no-op due to single primary shard message", + AttemptMoveShardsStep.ONE_PRIMARY_SHARD_MESSAGE, + metadata.info?.get("message") + ) + assertEquals( + "Was not on the last step after no-op due to single primary shard", + WaitForShrinkStep.name, + metadata.stepMetaData?.name + ) + } + } + + // TODO This test is excessively flaky, disabling for now but it needs to be fixed + private fun `test retries from first step`() { + val testPolicy = """ + {"policy":{"description":"Default policy","default_state":"Shrink","states":[ + {"name":"Shrink","actions":[{"retry":{"count":2,"backoff":"constant","delay":"1s"},"shrink": + {"num_new_shards":1, "target_index_name_template":{"source": "{{ctx.index}}_shrink_test"}, "force_unsafe": "true"}}],"transitions":[]}]}} + """.trimIndent() + val logger = LogManager.getLogger(::ShrinkActionIT) + val indexName = "${testIndexName}_retry" + val policyID = "${testIndexName}_testPolicyName_retry" + createPolicyJson(testPolicy, policyID) + + createIndex(indexName, policyID, null, "0", "3", "") + insertSampleData(indexName, 3) + + // Will change the startTime each execution so that it triggers in 2 seconds + // First execution: Policy is initialized + val managedIndexConfig = getExistingManagedIndexConfig(indexName) + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { assertEquals(policyID, getExplainManagedIndexMetaData(indexName).policyID) } + logger.info("before attempt move shards") + // Starts AttemptMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + + val targetIndexName = indexName + "_shrink_test" + waitFor(Instant.ofEpochSecond(60)) { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + assertTrue("Did not set allocation setting", settings.containsKey("index.routing.allocation.require._name")) + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + val nodeToShrink = getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName + // starts WaitForMoveShardsStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + assertEquals( + WaitForMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + // Create an index with the target index name so the AttemptShrinkStep fails + createIndex(targetIndexName, null) + + // Wait for move should finish before this. Starts AttemptShrinkStep + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData + assertEquals("Did not fail due to target index existing step as expected", Step.StepStatus.FAILED, stepMetadata?.stepStatus) + assertEquals(AttemptShrinkStep.name, stepMetadata?.name) + val settings = getFlatSettings(indexName) + assertFalse("Did not clear allocation setting", settings.containsKey("index.routing.allocation.require._name")) + assertFalse("Did not clear index write block setting.", settings.containsKey("index.blocks.writes")) + assertNull( + "Did not clear shrink action properties", + getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties + ) + } + + // wait 5 seconds for the timeout from the retry to pass + Thread.sleep(5000L) + + // Delete that index so it can pass + deleteIndex(targetIndexName) + + updateManagedIndexConfigStartTime(managedIndexConfig) + waitFor(Instant.ofEpochSecond(60)) { + val stepMetadata = getExplainManagedIndexMetaData(indexName).stepMetaData + assertEquals("Shrink action should have started over after failing", stepMetadata?.name, AttemptMoveShardsStep.name) + // The step status should be starting, but in the same execution will be completed. Allowing either to avoid flaky failures + val stepStatusDidReset = stepMetadata?.stepStatus == Step.StepStatus.STARTING || stepMetadata?.stepStatus == Step.StepStatus.COMPLETED + assertTrue("Step status should reset", stepStatusDidReset) + } + + waitFor { + assertEquals(targetIndexName, getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.targetIndexName) + assertNotNull("Couldn't find node to shrink onto.", getExplainManagedIndexMetaData(indexName).actionMetaData!!.actionProperties!!.shrinkActionProperties!!.nodeName) + val settings = getFlatSettings(indexName) + assertTrue("Did not set allocation setting", settings.containsKey("index.routing.allocation.require._name")) + assertTrue(settings.containsKey("index.routing.allocation.require._name")) + assertEquals(nodeToShrink, settings["index.routing.allocation.require._name"]) + assertEquals( + AttemptMoveShardsStep.getSuccessMessage(nodeToShrink), + getExplainManagedIndexMetaData(indexName).info?.get("message") + ) + } + } +} diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt index e73e92345..23684072e 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/ActionTests.kt @@ -5,8 +5,11 @@ package org.opensearch.indexmanagement.indexstatemanagement.model +import org.opensearch.cluster.routing.allocation.DiskThresholdSettings import org.opensearch.common.io.stream.InputStreamStreamInput import org.opensearch.common.io.stream.OutputStreamStreamOutput +import org.opensearch.common.settings.ClusterSettings +import org.opensearch.common.settings.Settings import org.opensearch.common.unit.ByteSizeValue import org.opensearch.common.unit.TimeValue import org.opensearch.common.xcontent.LoggingDeprecationHandler @@ -15,6 +18,7 @@ import org.opensearch.common.xcontent.XContentType import org.opensearch.indexmanagement.indexstatemanagement.ISMActionsParser import org.opensearch.indexmanagement.indexstatemanagement.action.DeleteAction import org.opensearch.indexmanagement.indexstatemanagement.randomAllocationActionConfig +import org.opensearch.indexmanagement.indexstatemanagement.randomByteSizeValue import org.opensearch.indexmanagement.indexstatemanagement.randomCloseActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomDeleteActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomForceMergeActionConfig @@ -26,8 +30,10 @@ import org.opensearch.indexmanagement.indexstatemanagement.randomReadWriteAction import org.opensearch.indexmanagement.indexstatemanagement.randomReplicaCountActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRolloverActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRollupActionConfig +import org.opensearch.indexmanagement.indexstatemanagement.randomShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.randomSnapshotActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomTimeValueObject +import org.opensearch.indexmanagement.indexstatemanagement.util.getFreeBytesThresholdHigh import org.opensearch.indexmanagement.opensearchapi.convertToMap import org.opensearch.indexmanagement.opensearchapi.string import org.opensearch.indexmanagement.spi.indexstatemanagement.Action @@ -70,6 +76,12 @@ class ActionTests : OpenSearchTestCase() { } } + fun `test shrink action multiple shard options fails`() { + assertFailsWith(IllegalArgumentException::class, "Expected IllegalArgumentException for multiple shard options used") { + randomShrinkAction(3, randomByteSizeValue(), .30) + } + } + fun `test allocation action empty parameters fails`() { assertFailsWith(IllegalArgumentException::class, "Expected IllegalArgumentException for empty parameters") { randomAllocationActionConfig() @@ -135,6 +147,10 @@ class ActionTests : OpenSearchTestCase() { roundTripAction(randomDeleteActionConfig()) } + fun `test shrink action round trip`() { + roundTripAction(randomShrinkAction()) + } + fun `test action timeout and retry round trip`() { val builder = XContentFactory.jsonBuilder() .startObject() @@ -155,6 +171,29 @@ class ActionTests : OpenSearchTestCase() { roundTripAction(action) } + fun `test shrink disk threshold percentage settings`() { + val rawPercentage = randomIntBetween(0, 100) + val percentage = "$rawPercentage%" + val settings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.key, percentage) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.key, percentage) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.key, percentage).build() + val clusterSettings = ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS.map { it }.toSet()) + val totalNodeBytes = randomByteSizeValue().bytes + val thresholdBytes = getFreeBytesThresholdHigh(settings, clusterSettings, totalNodeBytes) + val expectedThreshold: Long = ((1 - (rawPercentage.toDouble() / 100.0)) * totalNodeBytes).toLong() + assertEquals("Free bytes threshold not being calculated correctly for percentage setting.", thresholdBytes, expectedThreshold) + } + + fun `test shrink disk threshold byte settings`() { + val byteValue = randomByteSizeValue() + val settings = Settings.builder().put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.key, byteValue) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.key, byteValue) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.key, byteValue).build() + val clusterSettings = ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS.map { it }.toSet()) + val thresholdBytes = getFreeBytesThresholdHigh(settings, clusterSettings, randomByteSizeValue().bytes) + assertEquals("Free bytes threshold not being calculated correctly for byte setting.", thresholdBytes, byteValue.bytes) + } + private fun roundTripAction(expectedAction: Action) { val baos = ByteArrayOutputStream() val osso = OutputStreamStreamOutput(baos) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt index 7397d4d40..1fe960408 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/model/XContentTests.kt @@ -28,6 +28,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.randomReadWriteAction import org.opensearch.indexmanagement.indexstatemanagement.randomReplicaCountActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRolloverActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomRollupActionConfig +import org.opensearch.indexmanagement.indexstatemanagement.randomShrinkAction import org.opensearch.indexmanagement.indexstatemanagement.randomSnapshotActionConfig import org.opensearch.indexmanagement.indexstatemanagement.randomState import org.opensearch.indexmanagement.indexstatemanagement.randomTransition @@ -223,6 +224,14 @@ class XContentTests : OpenSearchTestCase() { assertEquals("Round tripping OpenAction doesn't work", openAction.convertToMap(), parsedOpenAction.convertToMap()) } + fun `test shrink action parsing`() { + val shrinkAction = randomShrinkAction() + val shrinkActionString = shrinkAction.toJsonString() + val parsedShrinkAction = ISMActionsParser.instance.parse(parser(shrinkActionString), 0) + + assertEquals("Round tripping ShrinkAction doesn't work", shrinkAction.convertToMap(), parsedShrinkAction.convertToMap()) + } + fun `test managed index metadata parsing`() { val metadata = ManagedIndexMetaData( index = randomAlphaOfLength(10), diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt index 11e01c6a5..147268227 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptCloseStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.close.AttemptClo import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.snapshots.SnapshotInProgressException import org.opensearch.test.OpenSearchTestCase @@ -33,6 +34,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) fun `test close step sets step status to completed when successful`() { val closeIndexResponse = CloseIndexResponse(true, true, listOf()) @@ -41,7 +43,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -55,7 +57,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -69,7 +71,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -83,7 +85,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -97,7 +99,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -111,7 +113,7 @@ class AttemptCloseStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptCloseStep = AttemptCloseStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptCloseStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptCloseStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt index 7b6963dba..d5faa4e48 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptDeleteStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.delete.AttemptDe import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.snapshots.SnapshotInProgressException import org.opensearch.test.OpenSearchTestCase @@ -31,6 +32,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) fun `test delete step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) @@ -39,7 +41,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) @@ -82,7 +84,7 @@ class AttemptDeleteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptDeleteStep = AttemptDeleteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptDeleteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptDeleteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt index 2ed8669e2..fe70954ba 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptOpenStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.open.AttemptOpen import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -31,6 +32,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) fun `test open step sets step status to failed when not acknowledged`() { val openIndexResponse = OpenIndexResponse(false, false) @@ -39,7 +41,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class AttemptOpenStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptOpenStep = AttemptOpenStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptOpenStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptOpenStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt index 796baf8ab..e3248ae4d 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetIndexPriorityStepTests.kt @@ -23,6 +23,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.indexpriority.At import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -32,6 +33,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) fun `test set priority step sets step status to completed when successful`() { val acknowledgedResponse = AcknowledgedResponse(true) @@ -41,7 +43,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -56,7 +58,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -71,7 +73,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) @@ -87,7 +89,7 @@ class AttemptSetIndexPriorityStepTests : OpenSearchTestCase() { val indexPriorityAction = IndexPriorityAction(50, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val attemptSetPriorityStep = AttemptSetIndexPriorityStep(indexPriorityAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) attemptSetPriorityStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptSetPriorityStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) logger.info(updatedManagedIndexMetaData) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt index c5e589dd9..4959a46d4 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSetReplicaCountStepTests.kt @@ -23,6 +23,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.replicacount.Att import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -32,6 +33,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) fun `test replica step sets step status to failed when not acknowledged`() { val replicaCountResponse = AcknowledgedResponse(false) @@ -41,7 +43,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -56,7 +58,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -71,7 +73,7 @@ class AttemptSetReplicaCountStepTests : OpenSearchTestCase() { val replicaCountAction = ReplicaCountAction(2, 0) val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val replicaCountStep = AttemptReplicaCountStep(replicaCountAction) - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) replicaCountStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = replicaCountStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt index bea3c41ae..74d07b449 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptSnapshotStepTests.kt @@ -30,6 +30,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionPrope import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.ingest.TestTemplateService.MockTemplateScript +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.rest.RestStatus import org.opensearch.script.ScriptService import org.opensearch.script.TemplateScript @@ -44,6 +45,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { private val settings: Settings = Settings.EMPTY private val snapshotAction = randomSnapshotActionConfig("repo", "snapshot-name") private val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(AttemptSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) + private val lockService: LockService = LockService(mock(), clusterService) @Before fun settings() { @@ -58,7 +60,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.ACCEPTED) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.OK) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -76,7 +78,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { whenever(response.status()).doReturn(RestStatus.INTERNAL_SERVER_ERROR) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -88,7 +90,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -101,7 +103,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -114,7 +116,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -127,7 +129,7 @@ class AttemptSnapshotStepTests : OpenSearchTestCase() { val client = getClient(getAdminClient(getClusterAdminClient(null, exception))) runBlocking { val step = AttemptSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt index e30ce2e33..aeeedd7c9 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/AttemptTransitionStepTests.kt @@ -37,6 +37,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepMetaData +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.rest.RestStatus import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase @@ -60,6 +61,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock { on { state() } doReturn clusterState } private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) private val docsStats: DocsStats = mock() private val primaries: CommonStats = mock { on { getDocs() } doReturn docsStats } @@ -83,7 +85,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, lockService) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -101,7 +103,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, lockService) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -119,7 +121,7 @@ class AttemptTransitionStepTests : OpenSearchTestCase() { val managedIndexMetadata = ManagedIndexMetaData(indexName, indexUUID, "policy_id", null, null, null, null, null, null, null, null, null, null, null) val transitionsAction = TransitionsAction(listOf(Transition("some_state", Conditions(docCount = 5L))), indexMetadataProvider) val attemptTransitionStep = AttemptTransitionStep(transitionsAction) - val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetadata, clusterService, client, null, null, scriptService, settings, lockService) attemptTransitionStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = attemptTransitionStep.getUpdatedManagedIndexMetadata(managedIndexMetadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt index cfcc73142..8501f6d24 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadOnlyStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.readonly.SetRead import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -31,6 +32,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) fun `test read only step sets step status to failed when not acknowledged`() { val setReadOnlyResponse = AcknowledgedResponse(false) @@ -39,7 +41,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class SetReadOnlyStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadOnlyStep = SetReadOnlyStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadOnlyStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadOnlyStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt index c332a0c90..6c56fadb5 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/SetReadWriteStepTests.kt @@ -22,6 +22,7 @@ import org.opensearch.indexmanagement.indexstatemanagement.step.readwrite.SetRea import org.opensearch.indexmanagement.spi.indexstatemanagement.Step import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import org.opensearch.transport.RemoteTransportException @@ -31,6 +32,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) fun `test read write step sets step status to failed when not acknowledged`() { val setReadWriteResponse = AcknowledgedResponse(false) @@ -39,7 +41,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -53,7 +55,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -67,7 +69,7 @@ class SetReadWriteStepTests : OpenSearchTestCase() { runBlocking { val managedIndexMetaData = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, null, null, null, null) val setReadWriteStep = SetReadWriteStep() - val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings) + val context = StepContext(managedIndexMetaData, clusterService, client, null, null, scriptService, settings, lockService) setReadWriteStep.preExecute(logger, context).execute() val updatedManagedIndexMetaData = setReadWriteStep.getUpdatedManagedIndexMetadata(managedIndexMetaData) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt index 83fd61145..b769d42e2 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForRollupCompletionStepTests.kt @@ -18,6 +18,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaD import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.test.OpenSearchTestCase import java.time.Instant @@ -41,11 +42,12 @@ class WaitForRollupCompletionStepTests : OpenSearchTestCase() { ) private val client: Client = mock() private val step = WaitForRollupCompletionStep() + private val lockService: LockService = LockService(mock(), clusterService) fun `test wait for rollup when missing rollup id`() { val actionMetadata = metadata.actionMetaData!!.copy(actionProperties = ActionProperties()) val metadata = metadata.copy(actionMetaData = actionMetadata) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) val step = WaitForRollupCompletionStep() runBlocking { diff --git a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt index 8af94805f..14f22918d 100644 --- a/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt +++ b/src/test/kotlin/org/opensearch/indexmanagement/indexstatemanagement/step/WaitForSnapshotStepTests.kt @@ -27,6 +27,7 @@ import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionMetaD import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ActionProperties import org.opensearch.indexmanagement.spi.indexstatemanagement.model.ManagedIndexMetaData import org.opensearch.indexmanagement.spi.indexstatemanagement.model.StepContext +import org.opensearch.jobscheduler.spi.utils.LockService import org.opensearch.script.ScriptService import org.opensearch.snapshots.Snapshot import org.opensearch.snapshots.SnapshotId @@ -38,6 +39,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { private val clusterService: ClusterService = mock() private val scriptService: ScriptService = mock() private val settings: Settings = Settings.EMPTY + private val lockService: LockService = LockService(mock(), clusterService) val snapshot = "snapshot-name" fun `test snapshot missing snapshot name in action properties`() { @@ -48,7 +50,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, emptyActionProperties), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -60,7 +62,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, nullActionProperties), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -80,7 +82,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -92,7 +94,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not CONDITION_NOT_MET", Step.StepStatus.CONDITION_NOT_MET, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -104,7 +106,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not COMPLETED", Step.StepStatus.COMPLETED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -116,7 +118,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -128,7 +130,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -147,7 +149,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -162,7 +164,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) @@ -177,7 +179,7 @@ class WaitForSnapshotStepTests : OpenSearchTestCase() { val snapshotAction = SnapshotAction("repo", snapshot, 0) val metadata = ManagedIndexMetaData("test", "indexUuid", "policy_id", null, null, null, null, null, null, null, ActionMetaData(WaitForSnapshotStep.name, 1, 0, false, 0, null, ActionProperties(snapshotName = "snapshot-name")), null, null, null) val step = WaitForSnapshotStep(snapshotAction) - val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings) + val context = StepContext(metadata, clusterService, client, null, null, scriptService, settings, lockService) step.preExecute(logger, context).execute() val updatedManagedIndexMetaData = step.getUpdatedManagedIndexMetadata(metadata) assertEquals("Step status is not FAILED", Step.StepStatus.FAILED, updatedManagedIndexMetaData.stepMetaData?.stepStatus) diff --git a/src/test/resources/mappings/cached-opendistro-ism-config.json b/src/test/resources/mappings/cached-opendistro-ism-config.json index 074257bf8..c83df1a68 100644 --- a/src/test/resources/mappings/cached-opendistro-ism-config.json +++ b/src/test/resources/mappings/cached-opendistro-ism-config.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 13 + "schema_version": 14 }, "dynamic": "strict", "properties": { @@ -430,6 +430,30 @@ } } }, + "shrink": { + "properties": { + "num_new_shards": { + "type": "integer" + }, + "max_shard_size": { + "type": "keyword" + }, + "percentage_of_source_shards": { + "type": "double" + }, + "target_index_name_template": { + "type": "object", + "enabled": false + }, + "aliases": { + "type": "object", + "enabled": false + }, + "force_unsafe": { + "type": "boolean" + } + } + }, "custom": { "enabled": false, "type": "object" @@ -733,6 +757,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } } diff --git a/src/test/resources/mappings/cached-opendistro-ism-history.json b/src/test/resources/mappings/cached-opendistro-ism-history.json index 44c7ab896..ca5a8d8de 100644 --- a/src/test/resources/mappings/cached-opendistro-ism-history.json +++ b/src/test/resources/mappings/cached-opendistro-ism-history.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 4 + "schema_version": 5 }, "dynamic": "strict", "properties": { @@ -108,6 +108,10 @@ }, "has_rollup_failed": { "type": "boolean" + }, + "shrink_action_properties": { + "type": "object", + "enabled": false } } }