Skip to content

Commit

Permalink
Fixed deleting monitor workflow metadata (#882)
Browse files Browse the repository at this point in the history
* Fixed deleting monitor metadata and workflow metadata.

Signed-off-by: Stevan Buzejic <[email protected]>
Signed-off-by: Surya Sashank Nistala <[email protected]>
  • Loading branch information
stevanbz authored and eirsep committed May 25, 2023
1 parent 4e9f860 commit c328bab
Show file tree
Hide file tree
Showing 14 changed files with 704 additions and 228 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import org.opensearch.alerting.resthandler.RestSearchEmailAccountAction
import org.opensearch.alerting.resthandler.RestSearchEmailGroupAction
import org.opensearch.alerting.resthandler.RestSearchMonitorAction
import org.opensearch.alerting.script.TriggerScript
import org.opensearch.alerting.service.DeleteMonitorService
import org.opensearch.alerting.settings.AlertingSettings
import org.opensearch.alerting.settings.DestinationSettings
import org.opensearch.alerting.settings.LegacyOpenDistroAlertingSettings
Expand Down Expand Up @@ -279,6 +280,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R
settings
)

DeleteMonitorService.initialize(client)

return listOf(sweeper, scheduler, runner, scheduledJobIndices, docLevelMonitorQueries, destinationMigrationCoordinator)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ object MonitorMetadataService :
else null
val runContext = if (monitor.monitorType == Monitor.MonitorType.DOC_LEVEL_MONITOR)
createFullRunContext(monitorIndex, metadata.lastRunContext as MutableMap<String, MutableMap<String, Any>>)
} else null
else null
return if (runContext != null) {
metadata.copy(
lastRunContext = runContext
Expand All @@ -184,12 +184,15 @@ object MonitorMetadataService :
}
}

private suspend fun createNewMetadata(monitor: Monitor, createWithRunContext: Boolean, workflowMetadataId: String? = null): MonitorMetadata {
val monitorIndex = if (monitor.monitorType == Monitor.MonitorType.DOC_LEVEL_MONITOR) {
private suspend fun createNewMetadata(
monitor: Monitor,
createWithRunContext: Boolean,
workflowMetadataId: String? = null,
): MonitorMetadata {
val monitorIndex = if (monitor.monitorType == Monitor.MonitorType.DOC_LEVEL_MONITOR)
(monitor.inputs[0] as DocLevelMonitorInput).indices[0]
else null
val runContext =
if (monitor.monitorType == Monitor.MonitorType.DOC_LEVEL_MONITOR && createWithRunContext)
val runContext = if (monitor.monitorType == Monitor.MonitorType.DOC_LEVEL_MONITOR && createWithRunContext)
createFullRunContext(monitorIndex)
else emptyMap()
return MonitorMetadata(
Expand All @@ -202,8 +205,7 @@ object MonitorMetadataService :
sourceToQueryIndexMapping = mutableMapOf()
)
}

private suspend fun createFullRunContext(
suspend fun createFullRunContext(
index: String?,
existingRunContext: MutableMap<String, MutableMap<String, Any>>? = null
): MutableMap<String, MutableMap<String, Any>> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ object WorkflowMetadataService :
} catch (e: Exception) {
// If the update is set to false and id is set conflict exception will be thrown
if (e is OpenSearchException && e.status() == RestStatus.CONFLICT && !updating) {
log.debug("Metadata with ${metadata.id} for workflow ${metadata.workflowId} already exist. Instead of creating new, updating existing metadata will be performed")
log.debug(
"Metadata with ${metadata.id} for workflow ${metadata.workflowId} already exist." +
" Instead of creating new, updating existing metadata will be performed"
)
return upsertWorkflowMetadata(metadata, true)
}
log.error("Error saving metadata", e)
Expand Down Expand Up @@ -157,6 +160,8 @@ object WorkflowMetadataService :
}

private fun createNewWorkflowMetadata(workflow: Workflow, executionId: String, isTempWorkflow: Boolean): WorkflowMetadata {
// In the case of temp workflow (ie. workflow is in dry-run) use timestampWithUUID-metadata format
// In the case of regular workflow execution, use the workflowId-metadata format
val id = if (isTempWorkflow) "${LocalDateTime.now(ZoneOffset.UTC)}${UUID.randomUUID()}" else workflow.id
return WorkflowMetadata(
id = WorkflowMetadata.getId(id),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,17 @@ data class MonitorMetadata(
return MonitorMetadata(sin)
}

fun getId(monitor: Monitor, workflowId: String? = null): String {
return if (workflowId.isNullOrEmpty()) "${monitor.id}-metadata"
else "${monitor.id}-$workflowId-metadata"
/** workflowMetadataId is used as key for monitor metadata in the case when the workflow execution happens
so the monitor lastRunContext (in the case of doc level monitor) is not interfering with the monitor execution
WorkflowMetadataId will be either workflowId-metadata (when executing the workflow as it is scheduled)
or timestampWithUUID-metadata (when a workflow is executed in a dry-run mode)
In the case of temp workflow, doc level monitors must have lastRunContext created from scratch
That's why we are using workflowMetadataId - in order to ensure that the doc level monitor metadata is created from scratch
**/
fun getId(monitor: Monitor, workflowMetadataId: String? = null): String {
return if (workflowMetadataId.isNullOrEmpty()) "${monitor.id}-metadata"
// WorkflowMetadataId already contains -metadata suffix
else "$workflowMetadataId-${monitor.id}-metadata"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.alerting.service

import kotlinx.coroutines.CoroutineName
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.SupervisorJob
import org.apache.logging.log4j.LogManager
import org.apache.lucene.search.join.ScoreMode
import org.opensearch.action.ActionListener
import org.opensearch.action.admin.indices.delete.DeleteIndexRequest
import org.opensearch.action.admin.indices.exists.indices.IndicesExistsRequest
import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse
import org.opensearch.action.delete.DeleteRequest
import org.opensearch.action.delete.DeleteResponse
import org.opensearch.action.search.SearchRequest
import org.opensearch.action.search.SearchResponse
import org.opensearch.action.support.IndicesOptions
import org.opensearch.action.support.WriteRequest.RefreshPolicy
import org.opensearch.action.support.master.AcknowledgedResponse
import org.opensearch.alerting.MonitorMetadataService
import org.opensearch.alerting.opensearchapi.suspendUntil
import org.opensearch.alerting.transport.TransportDeleteWorkflowAction.Companion.WORKFLOW_DELEGATE_PATH
import org.opensearch.alerting.transport.TransportDeleteWorkflowAction.Companion.WORKFLOW_MONITOR_PATH
import org.opensearch.alerting.util.AlertingException
import org.opensearch.client.Client
import org.opensearch.commons.alerting.action.DeleteMonitorResponse
import org.opensearch.commons.alerting.model.Monitor
import org.opensearch.commons.alerting.model.ScheduledJob
import org.opensearch.index.query.QueryBuilders
import org.opensearch.index.reindex.BulkByScrollResponse
import org.opensearch.index.reindex.DeleteByQueryAction
import org.opensearch.index.reindex.DeleteByQueryRequestBuilder
import org.opensearch.search.builder.SearchSourceBuilder
import kotlin.coroutines.resume
import kotlin.coroutines.resumeWithException
import kotlin.coroutines.suspendCoroutine

/**
* Component used when deleting the monitors
*/
object DeleteMonitorService :
CoroutineScope by CoroutineScope(SupervisorJob() + Dispatchers.Default + CoroutineName("WorkflowMetadataService")) {
private val log = LogManager.getLogger(this.javaClass)

private lateinit var client: Client

fun initialize(
client: Client,
) {
DeleteMonitorService.client = client
}

/**
* Deletes the monitor, docLevelQueries and monitor metadata
* @param monitor monitor to be deleted
* @param refreshPolicy
*/
suspend fun deleteMonitor(monitor: Monitor, refreshPolicy: RefreshPolicy): DeleteMonitorResponse {
val deleteResponse = deleteMonitor(monitor.id, refreshPolicy)
deleteDocLevelMonitorQueriesAndIndices(monitor)
deleteMetadata(monitor)
return DeleteMonitorResponse(deleteResponse.id, deleteResponse.version)
}

private suspend fun deleteMonitor(monitorId: String, refreshPolicy: RefreshPolicy): DeleteResponse {
val deleteMonitorRequest = DeleteRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, monitorId)
.setRefreshPolicy(refreshPolicy)
return client.suspendUntil { delete(deleteMonitorRequest, it) }
}

private suspend fun deleteMetadata(monitor: Monitor) {
val deleteRequest = DeleteRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, "${monitor.id}-metadata")
.setRefreshPolicy(RefreshPolicy.IMMEDIATE)
try {
val deleteResponse: DeleteResponse = client.suspendUntil { delete(deleteRequest, it) }
log.debug("Monitor metadata: ${deleteResponse.id} deletion result: ${deleteResponse.result}")
} catch (e: Exception) {
// we only log the error and don't fail the request because if monitor document has been deleted,
// we cannot retry based on this failure
log.error("Failed to delete monitor metadata ${deleteRequest.id()}.", e)
}
}

private suspend fun deleteDocLevelMonitorQueriesAndIndices(monitor: Monitor) {
try {
val metadata = MonitorMetadataService.getMetadata(monitor)
metadata?.sourceToQueryIndexMapping?.forEach { (_, queryIndex) ->

val indicesExistsResponse: IndicesExistsResponse =
client.suspendUntil {
client.admin().indices().exists(IndicesExistsRequest(queryIndex), it)
}
if (indicesExistsResponse.isExists == false) {
return
}
// Check if there's any queries from other monitors in this queryIndex,
// to avoid unnecessary doc deletion, if we could just delete index completely
val searchResponse: SearchResponse = client.suspendUntil {
search(
SearchRequest(queryIndex).source(
SearchSourceBuilder()
.size(0)
.query(
QueryBuilders.boolQuery().mustNot(
QueryBuilders.matchQuery("monitor_id", monitor.id)
)
)
).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
it
)
}
if (searchResponse.hits.totalHits.value == 0L) {
val ack: AcknowledgedResponse = client.suspendUntil {
client.admin().indices().delete(
DeleteIndexRequest(queryIndex).indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_HIDDEN),
it
)
}
if (ack.isAcknowledged == false) {
log.error("Deletion of concrete queryIndex:$queryIndex is not ack'd!")
}
} else {
// Delete all queries added by this monitor
val response: BulkByScrollResponse = suspendCoroutine { cont ->
DeleteByQueryRequestBuilder(client, DeleteByQueryAction.INSTANCE)
.source(queryIndex)
.filter(QueryBuilders.matchQuery("monitor_id", monitor.id))
.refresh(true)
.execute(
object : ActionListener<BulkByScrollResponse> {
override fun onResponse(response: BulkByScrollResponse) = cont.resume(response)
override fun onFailure(t: Exception) = cont.resumeWithException(t)
}
)
}
}
}
} catch (e: Exception) {
// we only log the error and don't fail the request because if monitor document has been deleted successfully,
// we cannot retry based on this failure
log.error("Failed to delete doc level queries from query index.", e)
}
}

/**
* Checks if the monitor is part of the workflow
*
* @param monitorId id of monitor that is checked if it is a workflow delegate
*/
suspend fun monitorIsWorkflowDelegate(monitorId: String): Boolean {
val queryBuilder = QueryBuilders.nestedQuery(
WORKFLOW_DELEGATE_PATH,
QueryBuilders.boolQuery().must(
QueryBuilders.matchQuery(
WORKFLOW_MONITOR_PATH,
monitorId
)
),
ScoreMode.None
)
try {
val searchRequest = SearchRequest()
.indices(ScheduledJob.SCHEDULED_JOBS_INDEX)
.source(SearchSourceBuilder().query(queryBuilder))

client.threadPool().threadContext.stashContext().use {
val searchResponse: SearchResponse = client.suspendUntil { search(searchRequest, it) }
if (searchResponse.hits.totalHits?.value == 0L) {
return false
}

val workflowIds = searchResponse.hits.hits.map { it.id }.joinToString()
log.info("Monitor $monitorId can't be deleted since it belongs to $workflowIds")
return true
}
} catch (ex: Exception) {
log.error("Error getting the monitor workflows", ex)
throw AlertingException.wrap(ex)
}
}
}
Loading

0 comments on commit c328bab

Please sign in to comment.