From 62674db25965044ae3bf33683d62309cc9e37479 Mon Sep 17 00:00:00 2001 From: Surya Sashank Nistala Date: Tue, 11 Jul 2023 18:59:09 -0700 Subject: [PATCH] Backport 2.x Adds chained alerts (#976) (#1007) * Adds chained alerts (#976) * chained alert triggers Signed-off-by: Surya Sashank Nistala * converge all single node test cases Signed-off-by: Surya Sashank Nistala * add license headers to files Signed-off-by: Surya Sashank Nistala * fix workflow not found issue Signed-off-by: Surya Sashank Nistala * added audit state alerts for doc level monitors Signed-off-by: Surya Sashank Nistala * add audit alerts in query level monitor Signed-off-by: Surya Sashank Nistala * temp: upload custom built common utils jar Signed-off-by: Surya Sashank Nistala * fix get monitor response parsing to include associated_workflows Signed-off-by: Surya Sashank Nistala * add query level monitor audit alerts tests Signed-off-by: Surya Sashank Nistala * add audit alerts in bucket level monitor Signed-off-by: Surya Sashank Nistala * fix workflow tests Signed-off-by: Surya Sashank Nistala * alerting Signed-off-by: Surya Sashank Nistala * verify bucket monitor audit alerts and chained alerts in workflow Signed-off-by: Surya Sashank Nistala * make execution id mandatory Signed-off-by: Surya Sashank Nistala * revert mapping update in run job method Signed-off-by: Surya Sashank Nistala * minor fixes in chained alert trigger result Signed-off-by: Surya Sashank Nistala * fix chained alert triggers tests Signed-off-by: Surya Sashank Nistala * fix acknowledge chained alert bug Signed-off-by: Surya Sashank Nistala * revert get alerts change Signed-off-by: Surya Sashank Nistala * refactor and remove transport actions being invoked in other transport actions Signed-off-by: Surya Sashank Nistala * add license header Signed-off-by: Surya Sashank Nistala * scheduled job mapping schema Signed-off-by: Surya Sashank Nistala * fix ktlint and revert gradle dev set up chanegs Signed-off-by: Surya Sashank Nistala * fix post delete method and refactor alert mover to add class level logger Signed-off-by: Surya Sashank Nistala * fix test - pass workflow id in get alerts Signed-off-by: Surya Sashank Nistala * remove monitor empty filter in get alerts api as there is dedicated api for fetching chained alerts - workflow alerts api Signed-off-by: Surya Sashank Nistala * fix check for workflow id is empty or null in get alerts action Signed-off-by: Surya Sashank Nistala * fix alert mover method delegate monitor parsing logic Signed-off-by: Surya Sashank Nistala * remove common utils jar from repo Signed-off-by: Surya Sashank Nistala --------- Signed-off-by: Surya Sashank Nistala * fix imports Signed-off-by: Surya Sashank Nistala --------- Signed-off-by: Surya Sashank Nistala (cherry picked from commit d2d03c6b7e6e6f2f852043ea4caa8f1c10db0d48) --- .../org/opensearch/alerting/AlertService.kt | 119 +- .../org/opensearch/alerting/AlertingPlugin.kt | 33 +- .../alerting/BucketLevelMonitorRunner.kt | 27 +- .../alerting/DocumentLevelMonitorRunner.kt | 28 +- .../org/opensearch/alerting/MonitorRunner.kt | 3 +- .../alerting/MonitorRunnerService.kt | 95 +- .../alerting/QueryLevelMonitorRunner.kt | 20 +- .../org/opensearch/alerting/TriggerService.kt | 38 +- .../opensearch/alerting/WorkflowService.kt | 6 + .../alerting/action/GetMonitorResponse.kt | 65 +- .../opensearch/alerting/alerts/AlertMover.kt | 291 +++- .../parsers/ChainedAlertExpressionParser.kt | 53 + .../ChainedAlertExpressionRPNBaseParser.kt | 114 ++ .../parsers/ExpressionParser.kt | 12 + .../resolvers/ChainedAlertRPNResolver.kt | 110 ++ .../ChainedAlertTriggerExpression.kt | 32 + .../resolvers/ChainedAlertTriggerResolver.kt | 11 + .../tokens/CAExpressionOperator.kt | 20 + .../tokens/CAExpressionToken.kt | 11 + .../tokens/ChainedAlertExpressionConstant.kt | 24 + .../tokens/ExpressionToken.kt | 8 + .../model/ChainedAlertTriggerRunResult.kt | 69 + .../alerting/model/WorkflowRunResult.kt | 57 +- .../RestAcknowledgeChainedAlertsAction.kt | 82 + .../resthandler/RestExecuteWorkflowAction.kt | 59 + .../RestGetWorkflowAlertsAction.kt | 86 + .../ChainedAlertTriggerExecutionContext.kt | 56 + .../alerting/service/DeleteMonitorService.kt | 4 +- .../TransportAcknowledgeChainedAlertAction.kt | 296 ++++ .../TransportDeleteWorkflowAction.kt | 7 +- .../TransportExecuteWorkflowAction.kt | 7 +- .../transport/TransportGetAlertsAction.kt | 13 +- .../transport/TransportGetMonitorAction.kt | 64 +- .../TransportGetWorkflowAlertsAction.kt | 240 +++ .../alerting/util/ScheduledJobUtils.kt | 70 + .../workflow/CompositeWorkflowRunner.kt | 296 +++- .../alerting/workflow/WorkflowRunContext.kt | 4 +- .../alerting/workflow/WorkflowRunner.kt | 174 ++ .../workflow/WorkflowRunnerService.kt | 252 --- .../opensearch/alerting/AlertServiceTests.kt | 30 +- .../alerting/AlertingRestTestCase.kt | 94 +- .../alerting/MonitorDataSourcesIT.kt | 1472 ++++++++++++++--- .../org/opensearch/alerting/TestHelpers.kt | 30 +- .../action/GetMonitorResponseTests.kt | 4 +- .../alerting/alerts/AlertIndicesIT.kt | 4 +- .../ChainedAlertsExpressionParserTests.kt | 84 + .../ChainedAlertsExpressionResolveTests.kt | 118 ++ .../alerting/resthandler/WorkflowRestApiIT.kt | 104 +- .../transport/AlertingSingleNodeTestCase.kt | 57 +- .../resources/mappings/scheduled-jobs.json | 5 +- 50 files changed, 4172 insertions(+), 786 deletions(-) create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionParser.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionRPNBaseParser.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ExpressionParser.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertRPNResolver.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerExpression.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerResolver.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionOperator.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionToken.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ChainedAlertExpressionConstant.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ExpressionToken.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/model/ChainedAlertTriggerRunResult.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestAcknowledgeChainedAlertsAction.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteWorkflowAction.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetWorkflowAlertsAction.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/script/ChainedAlertTriggerExecutionContext.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportAcknowledgeChainedAlertAction.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetWorkflowAlertsAction.kt create mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/util/ScheduledJobUtils.kt delete mode 100644 alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunnerService.kt create mode 100644 alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionParserTests.kt create mode 100644 alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionResolveTests.kt diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertService.kt index c2c5e024b..0e6b4f319 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertService.kt @@ -24,6 +24,7 @@ import org.opensearch.alerting.model.QueryLevelTriggerRunResult import org.opensearch.alerting.opensearchapi.firstFailureOrNull import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.script.ChainedAlertTriggerExecutionContext import org.opensearch.alerting.script.DocumentLevelTriggerExecutionContext import org.opensearch.alerting.script.QueryLevelTriggerExecutionContext import org.opensearch.alerting.util.IndexUtils @@ -47,6 +48,7 @@ import org.opensearch.commons.alerting.model.DataSources import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.NoOpTrigger import org.opensearch.commons.alerting.model.Trigger +import org.opensearch.commons.alerting.model.Workflow import org.opensearch.commons.alerting.model.action.AlertCategory import org.opensearch.core.xcontent.NamedXContentRegistry import org.opensearch.core.xcontent.XContentParser @@ -81,10 +83,11 @@ class AlertService( private val logger = LogManager.getLogger(AlertService::class.java) - suspend fun loadCurrentAlertsForQueryLevelMonitor(monitor: Monitor): Map { + suspend fun loadCurrentAlertsForQueryLevelMonitor(monitor: Monitor, workflowRunContext: WorkflowRunContext?): Map { val searchAlertsResponse: SearchResponse = searchAlerts( monitor = monitor, - size = monitor.triggers.size * 2 // We expect there to be only a single in-progress alert so fetch 2 to check + size = monitor.triggers.size * 2, // We expect there to be only a single in-progress alert so fetch 2 to check + workflowRunContext ) val foundAlerts = searchAlertsResponse.hits.map { Alert.parse(contentParser(it.sourceRef), it.id, it.version) } @@ -100,11 +103,15 @@ class AlertService( } } - suspend fun loadCurrentAlertsForBucketLevelMonitor(monitor: Monitor): Map> { + suspend fun loadCurrentAlertsForBucketLevelMonitor( + monitor: Monitor, + workflowRunContext: WorkflowRunContext?, + ): Map> { val searchAlertsResponse: SearchResponse = searchAlerts( monitor = monitor, // TODO: This should be limited based on a circuit breaker that limits Alerts - size = MAX_BUCKET_LEVEL_MONITOR_ALERT_SEARCH_COUNT + size = MAX_BUCKET_LEVEL_MONITOR_ALERT_SEARCH_COUNT, + workflowRunContext = workflowRunContext ) val foundAlerts = searchAlertsResponse.hits.map { Alert.parse(contentParser(it.sourceRef), it.id, it.version) } @@ -123,7 +130,9 @@ class AlertService( fun composeQueryLevelAlert( ctx: QueryLevelTriggerExecutionContext, result: QueryLevelTriggerRunResult, - alertError: AlertError? + alertError: AlertError?, + executionId: String, + workflorwRunContext: WorkflowRunContext? ): Alert? { val currentTime = Instant.now() val currentAlert = ctx.alert @@ -181,15 +190,19 @@ class AlertService( errorMessage = alertError?.message, errorHistory = updatedHistory, actionExecutionResults = updatedActionExecutionResults, - schemaVersion = IndexUtils.alertIndexSchemaVersion + schemaVersion = IndexUtils.alertIndexSchemaVersion, ) } else { - val alertState = if (alertError == null) Alert.State.ACTIVE else Alert.State.ERROR + val alertState = if (workflorwRunContext?.auditDelegateMonitorAlerts == true) { + Alert.State.AUDIT + } else if (alertError == null) Alert.State.ACTIVE + else Alert.State.ERROR Alert( monitor = ctx.monitor, trigger = ctx.trigger, startTime = currentTime, lastNotificationTime = currentTime, state = alertState, errorMessage = alertError?.message, errorHistory = updatedHistory, actionExecutionResults = updatedActionExecutionResults, - schemaVersion = IndexUtils.alertIndexSchemaVersion + schemaVersion = IndexUtils.alertIndexSchemaVersion, executionId = executionId, + workflowId = workflorwRunContext?.workflowId ?: "" ) } } @@ -199,15 +212,24 @@ class AlertService( findings: List, relatedDocIds: List, ctx: DocumentLevelTriggerExecutionContext, - alertError: AlertError? + alertError: AlertError?, + executionId: String, + workflorwRunContext: WorkflowRunContext? ): Alert { val currentTime = Instant.now() - val alertState = if (alertError == null) Alert.State.ACTIVE else Alert.State.ERROR + val alertState = if (workflorwRunContext?.auditDelegateMonitorAlerts == true) { + Alert.State.AUDIT + } else if (alertError == null) { + Alert.State.ACTIVE + } else { + Alert.State.ERROR + } return Alert( id = UUID.randomUUID().toString(), monitor = ctx.monitor, trigger = ctx.trigger, startTime = currentTime, lastNotificationTime = currentTime, state = alertState, errorMessage = alertError?.message, - schemaVersion = IndexUtils.alertIndexSchemaVersion, findingIds = findings, relatedDocIds = relatedDocIds + schemaVersion = IndexUtils.alertIndexSchemaVersion, findingIds = findings, relatedDocIds = relatedDocIds, + executionId = executionId, workflowId = workflorwRunContext?.workflowId ?: "" ) } @@ -219,12 +241,33 @@ class AlertService( workflowRunContext: WorkflowRunContext? ): Alert { val currentTime = Instant.now() + val alertState = if (workflowRunContext?.auditDelegateMonitorAlerts == true) { + Alert.State.AUDIT + } else { + Alert.State.ERROR + } return Alert( id = id, monitor = monitor, trigger = NoOpTrigger(), startTime = currentTime, - lastNotificationTime = currentTime, state = Alert.State.ERROR, errorMessage = alertError?.message, - schemaVersion = IndexUtils.alertIndexSchemaVersion, - workflowId = workflowRunContext?.workflowId ?: "", - executionId = executionId ?: "" + lastNotificationTime = currentTime, state = alertState, errorMessage = alertError?.message, + schemaVersion = IndexUtils.alertIndexSchemaVersion, executionId = executionId, workflowId = workflowRunContext?.workflowId ?: "" + ) + } + + fun composeChainedAlert( + ctx: ChainedAlertTriggerExecutionContext, + executionId: String, + workflow: Workflow, + associatedAlertIds: List + ): Alert { + return Alert( + startTime = Instant.now(), + lastNotificationTime = Instant.now(), + state = Alert.State.ACTIVE, + errorMessage = null, schemaVersion = -1, + chainedAlertTrigger = ctx.trigger, + executionId = executionId, + workflow = workflow, + associatedAlertIds = associatedAlertIds ) } @@ -279,7 +322,9 @@ class AlertService( trigger: BucketLevelTrigger, currentAlerts: MutableMap, aggResultBuckets: List, - findings: List + findings: List, + executionId: String, + workflorwRunContext: WorkflowRunContext? ): Map> { val dedupedAlerts = mutableListOf() val newAlerts = mutableListOf() @@ -295,12 +340,15 @@ class AlertService( currentAlerts.remove(aggAlertBucket.getBucketKeysHash()) } else { // New Alert + val alertState = if (workflorwRunContext?.auditDelegateMonitorAlerts == true) { + Alert.State.AUDIT + } else Alert.State.ACTIVE val newAlert = Alert( monitor = monitor, trigger = trigger, startTime = currentTime, - lastNotificationTime = currentTime, state = Alert.State.ACTIVE, errorMessage = null, + lastNotificationTime = currentTime, state = alertState, errorMessage = null, errorHistory = mutableListOf(), actionExecutionResults = mutableListOf(), schemaVersion = IndexUtils.alertIndexSchemaVersion, aggregationResultBucket = aggAlertBucket, - findingIds = findings + findingIds = findings, executionId = executionId, workflowId = workflorwRunContext?.workflowId ?: "" ) newAlerts.add(newAlert) } @@ -528,7 +576,8 @@ class AlertService( dataSources: DataSources, alerts: List, retryPolicy: BackoffPolicy, - allowUpdatingAcknowledgedAlert: Boolean = false + allowUpdatingAcknowledgedAlert: Boolean = false, + routingId: String // routing is mandatory and set as monitor id. for workflow chained alerts we pass workflow id as routing ) { val alertsIndex = dataSources.alertsIndex val alertsHistoryIndex = dataSources.alertsHistoryIndex @@ -542,7 +591,7 @@ class AlertService( Alert.State.ACTIVE, Alert.State.ERROR -> { listOf>( IndexRequest(alertsIndex) - .routing(alert.monitorId) + .routing(routingId) .source(alert.toXContentWithUser(XContentFactory.jsonBuilder())) .id(if (alert.id != Alert.NO_ID) alert.id else null) ) @@ -553,7 +602,7 @@ class AlertService( if (allowUpdatingAcknowledgedAlert) { listOf>( IndexRequest(alertsIndex) - .routing(alert.monitorId) + .routing(routingId) .source(alert.toXContentWithUser(XContentFactory.jsonBuilder())) .id(if (alert.id != Alert.NO_ID) alert.id else null) ) @@ -562,9 +611,12 @@ class AlertService( } } Alert.State.AUDIT -> { + val index = if (alertIndices.isAlertHistoryEnabled()) { + dataSources.alertsHistoryIndex + } else dataSources.alertsIndex listOf>( - IndexRequest(alertsIndex) - .routing(alert.monitorId) + IndexRequest(index) + .routing(routingId) .source(alert.toXContentWithUser(XContentFactory.jsonBuilder())) .id(if (alert.id != Alert.NO_ID) alert.id else null) ) @@ -575,11 +627,11 @@ class AlertService( Alert.State.COMPLETED -> { listOfNotNull>( DeleteRequest(alertsIndex, alert.id) - .routing(alert.monitorId), + .routing(routingId), // Only add completed alert to history index if history is enabled if (alertIndices.isAlertHistoryEnabled()) { IndexRequest(alertsHistoryIndex) - .routing(alert.monitorId) + .routing(routingId) .source(alert.toXContentWithUser(XContentFactory.jsonBuilder())) .id(alert.id) } else null @@ -591,7 +643,7 @@ class AlertService( if (requestsToRetry.isEmpty()) return // Retry Bulk requests if there was any 429 response retryPolicy.retry(logger, listOf(RestStatus.TOO_MANY_REQUESTS)) { - val bulkRequest = BulkRequest().add(requestsToRetry) + val bulkRequest = BulkRequest().add(requestsToRetry).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) val bulkResponse: BulkResponse = client.suspendUntil { client.bulk(bulkRequest, it) } val failedResponses = (bulkResponse.items ?: arrayOf()).filter { it.isFailed } requestsToRetry = failedResponses.filter { it.status() == RestStatus.TOO_MANY_REQUESTS } @@ -616,13 +668,16 @@ class AlertService( val savedAlerts = mutableListOf() var alertsBeingIndexed = alerts var requestsToRetry: MutableList = alerts.map { alert -> - if (alert.state != Alert.State.ACTIVE) { + if (alert.state != Alert.State.ACTIVE && alert.state != Alert.State.AUDIT) { throw IllegalStateException("Unexpected attempt to save new alert [$alert] with state [${alert.state}]") } if (alert.id != Alert.NO_ID) { throw IllegalStateException("Unexpected attempt to save new alert [$alert] with an existing alert ID [${alert.id}]") } - IndexRequest(dataSources.alertsIndex) + val alertIndex = if (alert.state == Alert.State.AUDIT && alertIndices.isAlertHistoryEnabled()) { + dataSources.alertsHistoryIndex + } else dataSources.alertsIndex + IndexRequest(alertIndex) .routing(alert.monitorId) .source(alert.toXContentWithUser(XContentFactory.jsonBuilder())) }.toMutableList() @@ -683,13 +738,15 @@ class AlertService( * @param monitorId The Monitor to get Alerts for * @param size The number of search hits (Alerts) to return */ - private suspend fun searchAlerts(monitor: Monitor, size: Int): SearchResponse { + private suspend fun searchAlerts(monitor: Monitor, size: Int, workflowRunContext: WorkflowRunContext?): SearchResponse { val monitorId = monitor.id val alertIndex = monitor.dataSources.alertsIndex val queryBuilder = QueryBuilders.boolQuery() - .filter(QueryBuilders.termQuery(Alert.MONITOR_ID_FIELD, monitorId)) - + .must(QueryBuilders.termQuery(Alert.MONITOR_ID_FIELD, monitorId)) + if (workflowRunContext != null) { + queryBuilder.must(QueryBuilders.termQuery(Alert.WORKFLOW_ID_FIELD, workflowRunContext.workflowId)) + } val searchSourceBuilder = SearchSourceBuilder() .size(size) .query(queryBuilder) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index 03dbfbac3..42d30add5 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -26,9 +26,11 @@ import org.opensearch.alerting.core.schedule.JobScheduler import org.opensearch.alerting.core.settings.LegacyOpenDistroScheduledJobSettings import org.opensearch.alerting.core.settings.ScheduledJobSettings import org.opensearch.alerting.resthandler.RestAcknowledgeAlertAction +import org.opensearch.alerting.resthandler.RestAcknowledgeChainedAlertAction import org.opensearch.alerting.resthandler.RestDeleteMonitorAction import org.opensearch.alerting.resthandler.RestDeleteWorkflowAction import org.opensearch.alerting.resthandler.RestExecuteMonitorAction +import org.opensearch.alerting.resthandler.RestExecuteWorkflowAction import org.opensearch.alerting.resthandler.RestGetAlertsAction import org.opensearch.alerting.resthandler.RestGetDestinationsAction import org.opensearch.alerting.resthandler.RestGetEmailAccountAction @@ -36,6 +38,7 @@ import org.opensearch.alerting.resthandler.RestGetEmailGroupAction import org.opensearch.alerting.resthandler.RestGetFindingsAction import org.opensearch.alerting.resthandler.RestGetMonitorAction import org.opensearch.alerting.resthandler.RestGetWorkflowAction +import org.opensearch.alerting.resthandler.RestGetWorkflowAlertsAction import org.opensearch.alerting.resthandler.RestIndexMonitorAction import org.opensearch.alerting.resthandler.RestIndexWorkflowAction import org.opensearch.alerting.resthandler.RestSearchEmailAccountAction @@ -48,6 +51,7 @@ import org.opensearch.alerting.settings.DestinationSettings import org.opensearch.alerting.settings.LegacyOpenDistroAlertingSettings import org.opensearch.alerting.settings.LegacyOpenDistroDestinationSettings import org.opensearch.alerting.transport.TransportAcknowledgeAlertAction +import org.opensearch.alerting.transport.TransportAcknowledgeChainedAlertAction import org.opensearch.alerting.transport.TransportDeleteMonitorAction import org.opensearch.alerting.transport.TransportDeleteWorkflowAction import org.opensearch.alerting.transport.TransportExecuteMonitorAction @@ -59,6 +63,7 @@ import org.opensearch.alerting.transport.TransportGetEmailGroupAction import org.opensearch.alerting.transport.TransportGetFindingsSearchAction import org.opensearch.alerting.transport.TransportGetMonitorAction import org.opensearch.alerting.transport.TransportGetWorkflowAction +import org.opensearch.alerting.transport.TransportGetWorkflowAlertsAction import org.opensearch.alerting.transport.TransportIndexMonitorAction import org.opensearch.alerting.transport.TransportIndexWorkflowAction import org.opensearch.alerting.transport.TransportSearchEmailAccountAction @@ -66,7 +71,6 @@ import org.opensearch.alerting.transport.TransportSearchEmailGroupAction import org.opensearch.alerting.transport.TransportSearchMonitorAction import org.opensearch.alerting.util.DocLevelMonitorQueries import org.opensearch.alerting.util.destinationmigration.DestinationMigrationCoordinator -import org.opensearch.alerting.workflow.WorkflowRunnerService import org.opensearch.client.Client import org.opensearch.cluster.metadata.IndexNameExpressionResolver import org.opensearch.cluster.node.DiscoveryNodes @@ -81,6 +85,7 @@ import org.opensearch.common.settings.SettingsFilter import org.opensearch.commons.alerting.action.AlertingActions import org.opensearch.commons.alerting.aggregation.bucketselectorext.BucketSelectorExtAggregationBuilder import org.opensearch.commons.alerting.model.BucketLevelTrigger +import org.opensearch.commons.alerting.model.ChainedAlertTrigger import org.opensearch.commons.alerting.model.ClusterMetricsInput import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.DocumentLevelTrigger @@ -147,11 +152,10 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R @JvmField val FINDING_BASE_URI = "/_plugins/_alerting/findings" - @JvmField val ALERTING_JOB_TYPES = listOf("monitor") + @JvmField val ALERTING_JOB_TYPES = listOf("monitor", "workflow") } lateinit var runner: MonitorRunnerService - lateinit var workflowRunner: WorkflowRunnerService lateinit var scheduler: JobScheduler lateinit var sweeper: JobSweeper lateinit var scheduledJobIndices: ScheduledJobIndices @@ -177,7 +181,9 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R RestIndexWorkflowAction(), RestSearchMonitorAction(settings, clusterService), RestExecuteMonitorAction(), + RestExecuteWorkflowAction(), RestAcknowledgeAlertAction(), + RestAcknowledgeChainedAlertAction(), RestScheduledJobStatsHandler("_alerting"), RestSearchEmailAccountAction(), RestGetEmailAccountAction(), @@ -185,6 +191,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R RestGetEmailGroupAction(), RestGetDestinationsAction(), RestGetAlertsAction(), + RestGetWorkflowAlertsAction(), RestGetFindingsAction(), RestGetWorkflowAction(), RestDeleteWorkflowAction() @@ -200,12 +207,16 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ActionPlugin.ActionHandler(SearchMonitorAction.INSTANCE, TransportSearchMonitorAction::class.java), ActionPlugin.ActionHandler(AlertingActions.DELETE_MONITOR_ACTION_TYPE, TransportDeleteMonitorAction::class.java), ActionPlugin.ActionHandler(AlertingActions.ACKNOWLEDGE_ALERTS_ACTION_TYPE, TransportAcknowledgeAlertAction::class.java), + ActionPlugin.ActionHandler( + AlertingActions.ACKNOWLEDGE_CHAINED_ALERTS_ACTION_TYPE, TransportAcknowledgeChainedAlertAction::class.java + ), ActionPlugin.ActionHandler(GetEmailAccountAction.INSTANCE, TransportGetEmailAccountAction::class.java), ActionPlugin.ActionHandler(SearchEmailAccountAction.INSTANCE, TransportSearchEmailAccountAction::class.java), ActionPlugin.ActionHandler(GetEmailGroupAction.INSTANCE, TransportGetEmailGroupAction::class.java), ActionPlugin.ActionHandler(SearchEmailGroupAction.INSTANCE, TransportSearchEmailGroupAction::class.java), ActionPlugin.ActionHandler(GetDestinationsAction.INSTANCE, TransportGetDestinationsAction::class.java), ActionPlugin.ActionHandler(AlertingActions.GET_ALERTS_ACTION_TYPE, TransportGetAlertsAction::class.java), + ActionPlugin.ActionHandler(AlertingActions.GET_WORKFLOW_ALERTS_ACTION_TYPE, TransportGetWorkflowAlertsAction::class.java), ActionPlugin.ActionHandler(AlertingActions.GET_FINDINGS_ACTION_TYPE, TransportGetFindingsSearchAction::class.java), ActionPlugin.ActionHandler(AlertingActions.INDEX_WORKFLOW_ACTION_TYPE, TransportIndexWorkflowAction::class.java), ActionPlugin.ActionHandler(AlertingActions.GET_WORKFLOW_ACTION_TYPE, TransportGetWorkflowAction::class.java), @@ -223,6 +234,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R BucketLevelTrigger.XCONTENT_REGISTRY, ClusterMetricsInput.XCONTENT_REGISTRY, DocumentLevelTrigger.XCONTENT_REGISTRY, + ChainedAlertTrigger.XCONTENT_REGISTRY, Workflow.XCONTENT_REGISTRY ) } @@ -256,21 +268,6 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R .registerTriggerService(TriggerService(scriptService)) .registerAlertService(AlertService(client, xContentRegistry, alertIndices)) .registerDocLevelMonitorQueries(DocLevelMonitorQueries(client, clusterService)) - .registerConsumers() - .registerDestinationSettings() - workflowRunner = WorkflowRunnerService - .registerClusterService(clusterService) - .registerClient(client) - .registerNamedXContentRegistry(xContentRegistry) - .registerScriptService(scriptService) - .registerIndexNameExpressionResolver(indexNameExpressionResolver) - .registerSettings(settings) - .registerThreadPool(threadPool) - .registerAlertIndices(alertIndices) - .registerInputService(InputService(client, scriptService, namedWriteableRegistry, xContentRegistry, clusterService, settings)) - .registerTriggerService(TriggerService(scriptService)) - .registerAlertService(AlertService(client, xContentRegistry, alertIndices)) - .registerDocLevelMonitorQueries(DocLevelMonitorQueries(client, clusterService)) .registerWorkflowService(WorkflowService(client, xContentRegistry)) .registerConsumers() .registerDestinationSettings() diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt index 93989c7aa..e7a6909a9 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt @@ -61,7 +61,8 @@ object BucketLevelMonitorRunner : MonitorRunner() { periodStart: Instant, periodEnd: Instant, dryrun: Boolean, - workflowRunContext: WorkflowRunContext? + workflowRunContext: WorkflowRunContext?, + executionId: String ): MonitorRunResult { val roles = MonitorRunnerService.getRolesForMonitor(monitor) logger.debug("Running monitor: ${monitor.name} with roles: $roles Thread: ${Thread.currentThread().name}") @@ -77,7 +78,7 @@ object BucketLevelMonitorRunner : MonitorRunner() { if (monitor.dataSources.findingsEnabled == true) { monitorCtx.alertIndices!!.createOrUpdateInitialFindingHistoryIndex(monitor.dataSources) } - monitorCtx.alertService!!.loadCurrentAlertsForBucketLevelMonitor(monitor) + monitorCtx.alertService!!.loadCurrentAlertsForBucketLevelMonitor(monitor, workflowRunContext) } catch (e: Exception) { // We can't save ERROR alerts to the index here as we don't know if there are existing ACTIVE alerts val id = if (monitor.id.trim().isEmpty()) "_na_" else monitor.id @@ -158,7 +159,7 @@ object BucketLevelMonitorRunner : MonitorRunner() { periodStart, periodEnd, !dryrun && monitor.id != Monitor.NO_ID, - workflowRunContext + executionId ) } else { emptyList() @@ -170,7 +171,9 @@ object BucketLevelMonitorRunner : MonitorRunner() { trigger, currentAlertsForTrigger, triggerResult.aggregationResultBuckets.values.toList(), - findings + findings, + executionId, + workflowRunContext ).toMutableMap() val dedupedAlerts = categorizedAlerts.getOrDefault(AlertCategory.DEDUPED, emptyList()) var newAlerts = categorizedAlerts.getOrDefault(AlertCategory.NEW, emptyList()) @@ -189,7 +192,8 @@ object BucketLevelMonitorRunner : MonitorRunner() { monitor.dataSources, dedupedAlerts, monitorCtx.retryPolicy!!, - allowUpdatingAcknowledgedAlert = true + allowUpdatingAcknowledgedAlert = true, + monitor.id ) newAlerts = monitorCtx.alertService!!.saveNewAlerts(monitor.dataSources, newAlerts, monitorCtx.retryPolicy!!) } @@ -330,17 +334,16 @@ object BucketLevelMonitorRunner : MonitorRunner() { // ACKNOWLEDGED Alerts should not be saved here since actions are not executed for them. if (!dryrun && monitor.id != Monitor.NO_ID) { monitorCtx.alertService!!.saveAlerts( - monitor.dataSources, - updatedAlerts, - monitorCtx.retryPolicy!!, - allowUpdatingAcknowledgedAlert = false + monitor.dataSources, updatedAlerts, monitorCtx.retryPolicy!!, allowUpdatingAcknowledgedAlert = false, + routingId = monitor.id ) // Save any COMPLETED Alerts that were not covered in updatedAlerts monitorCtx.alertService!!.saveAlerts( monitor.dataSources, completedAlertsToUpdate.toList(), monitorCtx.retryPolicy!!, - allowUpdatingAcknowledgedAlert = false + allowUpdatingAcknowledgedAlert = false, + monitor.id ) } } @@ -355,7 +358,7 @@ object BucketLevelMonitorRunner : MonitorRunner() { periodStart: Instant, periodEnd: Instant, shouldCreateFinding: Boolean, - workflowRunContext: WorkflowRunContext? = null + executionId: String, ): List { monitor.inputs.forEach { input -> if (input is SearchInput) { @@ -414,7 +417,7 @@ object BucketLevelMonitorRunner : MonitorRunner() { sr.source().query(queryBuilder) } val searchResponse: SearchResponse = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.search(sr, it) } - return createFindingPerIndex(searchResponse, monitor, monitorCtx, shouldCreateFinding, workflowRunContext?.executionId) + return createFindingPerIndex(searchResponse, monitor, monitorCtx, shouldCreateFinding, executionId) } else { logger.error("Couldn't resolve groupBy field. Not generating bucket level monitor findings for monitor %${monitor.id}") } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt index 9c3cfe0be..4e3fd25b0 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt @@ -72,7 +72,8 @@ object DocumentLevelMonitorRunner : MonitorRunner() { periodStart: Instant, periodEnd: Instant, dryrun: Boolean, - workflowRunContext: WorkflowRunContext? + workflowRunContext: WorkflowRunContext?, + executionId: String ): MonitorRunResult { logger.debug("Document-level-monitor is running ...") val isTempMonitor = dryrun || monitor.id == Monitor.NO_ID @@ -239,7 +240,8 @@ object DocumentLevelMonitorRunner : MonitorRunner() { docsToQueries, queryToDocIds, dryrun, - workflowRunContext?.executionId + executionId = executionId, + workflowRunContext = workflowRunContext ) } } @@ -251,7 +253,7 @@ object DocumentLevelMonitorRunner : MonitorRunner() { monitorCtx.alertService!!.upsertMonitorErrorAlert( monitor = monitor, errorMessage = errorMessage, - executionId = workflowRunContext?.executionId, + executionId = executionId, workflowRunContext ) } else { @@ -268,7 +270,7 @@ object DocumentLevelMonitorRunner : MonitorRunner() { return monitorResult.copy(triggerResults = triggerResults) } catch (e: Exception) { val errorMessage = ExceptionsHelper.detailedMessage(e) - monitorCtx.alertService!!.upsertMonitorErrorAlert(monitor, errorMessage, workflowRunContext?.executionId, workflowRunContext) + monitorCtx.alertService!!.upsertMonitorErrorAlert(monitor, errorMessage, executionId, workflowRunContext) logger.error("Failed running Document-level-monitor ${monitor.name}", e) val alertingException = AlertingException( errorMessage, @@ -317,7 +319,8 @@ object DocumentLevelMonitorRunner : MonitorRunner() { docsToQueries: Map>, queryToDocIds: Map>, dryrun: Boolean, - workflowExecutionId: String? = null + workflowRunContext: WorkflowRunContext?, + executionId: String ): DocumentLevelTriggerRunResult { val triggerCtx = DocumentLevelTriggerExecutionContext(monitor, trigger) val triggerResult = monitorCtx.triggerService!!.runDocLevelTrigger(monitor, trigger, queryToDocIds) @@ -334,7 +337,7 @@ object DocumentLevelMonitorRunner : MonitorRunner() { triggeredQueries, it.key, !dryrun && monitor.id != Monitor.NO_ID, - workflowExecutionId + executionId ) findings.add(findingId) @@ -355,7 +358,9 @@ object DocumentLevelMonitorRunner : MonitorRunner() { listOf(it.first), listOf(it.second), triggerCtx, - monitorResult.alertError() ?: triggerResult.alertError() + monitorResult.alertError() ?: triggerResult.alertError(), + executionId = executionId, + workflorwRunContext = workflowRunContext ) alerts.add(alert) } @@ -395,7 +400,14 @@ object DocumentLevelMonitorRunner : MonitorRunner() { alert.copy(actionExecutionResults = actionExecutionResults) } - monitorCtx.retryPolicy?.let { monitorCtx.alertService!!.saveAlerts(monitor.dataSources, updatedAlerts, it) } + monitorCtx.retryPolicy?.let { + monitorCtx.alertService!!.saveAlerts( + monitor.dataSources, + updatedAlerts, + it, + routingId = monitor.id + ) + } } return triggerResult } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt index 65901fc69..3340cac43 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt @@ -41,7 +41,8 @@ abstract class MonitorRunner { periodStart: Instant, periodEnd: Instant, dryRun: Boolean, - workflowRunContext: WorkflowRunContext? = null + workflowRunContext: WorkflowRunContext? = null, + executionId: String ): MonitorRunResult<*> suspend fun runAction( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index e7b2f007f..1200f101b 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -15,10 +15,11 @@ import org.opensearch.action.ActionListener import org.opensearch.action.bulk.BackoffPolicy import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.alerting.alerts.AlertIndices -import org.opensearch.alerting.alerts.moveAlerts +import org.opensearch.alerting.alerts.AlertMover.Companion.moveAlerts import org.opensearch.alerting.core.JobRunner import org.opensearch.alerting.core.ScheduledJobIndices import org.opensearch.alerting.model.MonitorRunResult +import org.opensearch.alerting.model.WorkflowRunResult import org.opensearch.alerting.model.destination.DestinationContextFactory import org.opensearch.alerting.opensearchapi.retry import org.opensearch.alerting.script.TriggerExecutionContext @@ -34,6 +35,7 @@ import org.opensearch.alerting.settings.DestinationSettings.Companion.loadDestin import org.opensearch.alerting.util.DocLevelMonitorQueries import org.opensearch.alerting.util.IndexUtils import org.opensearch.alerting.util.isDocLevelMonitor +import org.opensearch.alerting.workflow.CompositeWorkflowRunner import org.opensearch.client.Client import org.opensearch.cluster.metadata.IndexNameExpressionResolver import org.opensearch.cluster.service.ClusterService @@ -42,6 +44,7 @@ import org.opensearch.common.settings.Settings import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.model.Workflow import org.opensearch.commons.alerting.model.action.Action import org.opensearch.commons.alerting.util.isBucketLevelMonitor import org.opensearch.core.xcontent.NamedXContentRegistry @@ -50,6 +53,9 @@ import org.opensearch.script.ScriptService import org.opensearch.script.TemplateScript import org.opensearch.threadpool.ThreadPool import java.time.Instant +import java.time.LocalDateTime +import java.time.ZoneOffset +import java.util.UUID import kotlin.coroutines.CoroutineContext object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleComponent() { @@ -121,6 +127,11 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon return this } + fun registerWorkflowService(workflowService: WorkflowService): MonitorRunnerService { + this.monitorCtx.workflowService = workflowService + return this + } + // Must be called after registerClusterService and registerSettings in AlertingPlugin fun registerConsumers(): MonitorRunnerService { monitorCtx.retryPolicy = BackoffPolicy.constantBackoff( @@ -136,8 +147,10 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon MOVE_ALERTS_BACKOFF_MILLIS.get(monitorCtx.settings), MOVE_ALERTS_BACKOFF_COUNT.get(monitorCtx.settings) ) - monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(MOVE_ALERTS_BACKOFF_MILLIS, MOVE_ALERTS_BACKOFF_COUNT) { - millis, count -> + monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer( + MOVE_ALERTS_BACKOFF_MILLIS, + MOVE_ALERTS_BACKOFF_COUNT + ) { millis, count -> monitorCtx.moveAlertsRetryPolicy = BackoffPolicy.exponentialBackoff(millis, count) } @@ -183,28 +196,45 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon runnerSupervisor.cancel() } - override fun doClose() { } + override fun doClose() {} override fun postIndex(job: ScheduledJob) { - if (job !is Monitor) { + if (job is Monitor) { + launch { + try { + monitorCtx.moveAlertsRetryPolicy!!.retry(logger) { + if (monitorCtx.alertIndices!!.isAlertInitialized(job.dataSources)) { + moveAlerts(monitorCtx.client!!, job.id, job) + } + } + } catch (e: Exception) { + logger.error("Failed to move active alerts for monitor [${job.id}].", e) + } + } + } else if (job is Workflow) { + launch { + try { + monitorCtx.moveAlertsRetryPolicy!!.retry(logger) { + moveAlerts(monitorCtx.client!!, job.id, job, monitorCtx) + } + } catch (e: Exception) { + logger.error("Failed to move active alerts for monitor [${job.id}].", e) + } + } + } else { throw IllegalArgumentException("Invalid job type") } + } + override fun postDelete(jobId: String) { launch { try { monitorCtx.moveAlertsRetryPolicy!!.retry(logger) { - if (monitorCtx.alertIndices!!.isAlertInitialized(job.dataSources)) { - moveAlerts(monitorCtx.client!!, job.id, job) - } + moveAlerts(monitorCtx.client!!, jobId, null, monitorCtx) } } catch (e: Exception) { - logger.error("Failed to move active alerts for monitor [${job.id}].", e) + logger.error("Failed to move active alerts for workflow [$jobId]. Could be a monitor", e) } - } - } - - override fun postDelete(jobId: String) { - launch { try { monitorCtx.moveAlertsRetryPolicy!!.retry(logger) { if (monitorCtx.alertIndices!!.isAlertInitialized()) { @@ -218,16 +248,28 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } override fun runJob(job: ScheduledJob, periodStart: Instant, periodEnd: Instant) { - if (job !is Monitor) { - throw IllegalArgumentException("Invalid job type") - } - launch { - runJob(job, periodStart, periodEnd, false) + when (job) { + is Workflow -> { + launch { + runJob(job, periodStart, periodEnd, false) + } + } + is Monitor -> { + launch { + runJob(job, periodStart, periodEnd, false) + } + } + else -> { + throw IllegalArgumentException("Invalid job type") + } } } - suspend fun runJob(job: ScheduledJob, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): MonitorRunResult<*> { + suspend fun runJob(workflow: Workflow, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): WorkflowRunResult { + return CompositeWorkflowRunner.runWorkflow(workflow, monitorCtx, periodStart, periodEnd, dryrun) + } + suspend fun runJob(job: ScheduledJob, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): MonitorRunResult<*> { // Updating the scheduled job index at the start of monitor execution runs for when there is an upgrade the the schema mapping // has not been updated. if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { @@ -237,6 +279,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon object : ActionListener { override fun onResponse(response: AcknowledgedResponse) { } + override fun onFailure(t: Exception) { logger.error("Failed to update config index schema", t) } @@ -244,13 +287,17 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon ) } + if (job is Workflow) { + CompositeWorkflowRunner.runWorkflow(workflow = job, monitorCtx, periodStart, periodEnd, dryrun) + } val monitor = job as Monitor + val executionId = "${monitor.id}_${LocalDateTime.now(ZoneOffset.UTC)}_${UUID.randomUUID()}" val runResult = if (monitor.isBucketLevelMonitor()) { - BucketLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun) + BucketLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) } else if (monitor.isDocLevelMonitor()) { - DocumentLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun) + DocumentLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) } else { - QueryLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun) + QueryLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) } return runResult } @@ -279,6 +326,8 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon internal fun currentTime() = Instant.ofEpochMilli(monitorCtx.threadPool!!.absoluteTimeInMillis()) internal fun isActionActionable(action: Action, alert: Alert?): Boolean { + if (alert != null && alert.state == Alert.State.AUDIT) + return false if (alert == null || action.throttle == null) { return true } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt index 884ad372d..691071517 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt @@ -27,7 +27,8 @@ object QueryLevelMonitorRunner : MonitorRunner() { periodStart: Instant, periodEnd: Instant, dryrun: Boolean, - workflowRunContext: WorkflowRunContext? + workflowRunContext: WorkflowRunContext?, + executionId: String ): MonitorRunResult { val roles = MonitorRunnerService.getRolesForMonitor(monitor) logger.debug("Running monitor: ${monitor.name} with roles: $roles Thread: ${Thread.currentThread().name}") @@ -40,7 +41,7 @@ object QueryLevelMonitorRunner : MonitorRunner() { val currentAlerts = try { monitorCtx.alertIndices!!.createOrUpdateAlertIndex(monitor.dataSources) monitorCtx.alertIndices!!.createOrUpdateInitialAlertHistoryIndex(monitor.dataSources) - monitorCtx.alertService!!.loadCurrentAlertsForQueryLevelMonitor(monitor) + monitorCtx.alertService!!.loadCurrentAlertsForQueryLevelMonitor(monitor, workflowRunContext) } catch (e: Exception) { // We can't save ERROR alerts to the index here as we don't know if there are existing ACTIVE alerts val id = if (monitor.id.trim().isEmpty()) "_na_" else monitor.id @@ -67,7 +68,7 @@ object QueryLevelMonitorRunner : MonitorRunner() { val triggerResult = monitorCtx.triggerService!!.runQueryLevelTrigger(monitor, trigger, triggerCtx) triggerResults[trigger.id] = triggerResult - if (monitorCtx.triggerService!!.isQueryLevelTriggerActionable(triggerCtx, triggerResult)) { + if (monitorCtx.triggerService!!.isQueryLevelTriggerActionable(triggerCtx, triggerResult, workflowRunContext)) { val actionCtx = triggerCtx.copy(error = monitorResult.error ?: triggerResult.error) for (action in trigger.actions) { triggerResult.actionResults[action.id] = this.runAction(action, actionCtx, monitorCtx, monitor, dryrun) @@ -77,14 +78,23 @@ object QueryLevelMonitorRunner : MonitorRunner() { val updatedAlert = monitorCtx.alertService!!.composeQueryLevelAlert( triggerCtx, triggerResult, - monitorResult.alertError() ?: triggerResult.alertError() + monitorResult.alertError() ?: triggerResult.alertError(), + executionId, + workflowRunContext ) if (updatedAlert != null) updatedAlerts += updatedAlert } // Don't save alerts if this is a test monitor if (!dryrun && monitor.id != Monitor.NO_ID) { - monitorCtx.retryPolicy?.let { monitorCtx.alertService!!.saveAlerts(monitor.dataSources, updatedAlerts, it) } + monitorCtx.retryPolicy?.let { + monitorCtx.alertService!!.saveAlerts( + monitor.dataSources, + updatedAlerts, + it, + routingId = monitor.id + ) + } } return monitorResult.copy(triggerResults = triggerResults) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/TriggerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/TriggerService.kt index 85e5108d8..8c64e43be 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/TriggerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/TriggerService.kt @@ -6,7 +6,9 @@ package org.opensearch.alerting import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.chainedAlertCondition.parsers.ChainedAlertExpressionParser import org.opensearch.alerting.model.BucketLevelTriggerRunResult +import org.opensearch.alerting.model.ChainedAlertTriggerRunResult import org.opensearch.alerting.model.DocumentLevelTriggerRunResult import org.opensearch.alerting.model.QueryLevelTriggerRunResult import org.opensearch.alerting.script.BucketLevelTriggerExecutionContext @@ -14,15 +16,18 @@ import org.opensearch.alerting.script.QueryLevelTriggerExecutionContext import org.opensearch.alerting.script.TriggerScript import org.opensearch.alerting.triggercondition.parsers.TriggerExpressionParser import org.opensearch.alerting.util.getBucketKeysHash +import org.opensearch.alerting.workflow.WorkflowRunContext import org.opensearch.commons.alerting.aggregation.bucketselectorext.BucketSelectorIndices.Fields.BUCKET_INDICES import org.opensearch.commons.alerting.aggregation.bucketselectorext.BucketSelectorIndices.Fields.PARENT_BUCKET_PATH import org.opensearch.commons.alerting.model.AggregationResultBucket import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.BucketLevelTrigger +import org.opensearch.commons.alerting.model.ChainedAlertTrigger import org.opensearch.commons.alerting.model.DocLevelQuery import org.opensearch.commons.alerting.model.DocumentLevelTrigger import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.QueryLevelTrigger +import org.opensearch.commons.alerting.model.Workflow import org.opensearch.script.Script import org.opensearch.script.ScriptService import org.opensearch.search.aggregations.Aggregation @@ -36,7 +41,12 @@ class TriggerService(val scriptService: ScriptService) { private val ALWAYS_RUN = Script("return true") private val NEVER_RUN = Script("return false") - fun isQueryLevelTriggerActionable(ctx: QueryLevelTriggerExecutionContext, result: QueryLevelTriggerRunResult): Boolean { + fun isQueryLevelTriggerActionable( + ctx: QueryLevelTriggerExecutionContext, + result: QueryLevelTriggerRunResult, + workflowRunContext: WorkflowRunContext?, + ): Boolean { + if (workflowRunContext?.auditDelegateMonitorAlerts == true) return false // Suppress actions if the current alert is acknowledged and there are no errors. val suppress = ctx.alert?.state == Alert.State.ACKNOWLEDGED && result.error == null && ctx.error == null return result.triggered && !suppress @@ -85,6 +95,32 @@ class TriggerService(val scriptService: ScriptService) { } } + fun runChainedAlertTrigger( + workflow: Workflow, + trigger: ChainedAlertTrigger, + alertGeneratingMonitors: Set, + monitorIdToAlertIdsMap: Map>, + ): ChainedAlertTriggerRunResult { + val associatedAlertIds = mutableSetOf() + return try { + val parsedTriggerCondition = ChainedAlertExpressionParser(trigger.condition.idOrCode).parse() + val evaluate = parsedTriggerCondition.evaluate(alertGeneratingMonitors) + if (evaluate) { + val monitorIdsInTriggerCondition = parsedTriggerCondition.getMonitorIds(parsedTriggerCondition) + monitorIdsInTriggerCondition.forEach { associatedAlertIds.addAll(monitorIdToAlertIdsMap.getOrDefault(it, emptySet())) } + } + ChainedAlertTriggerRunResult(trigger.name, triggered = evaluate, null, associatedAlertIds = associatedAlertIds) + } catch (e: Exception) { + logger.error("Error running chained alert trigger script for workflow ${workflow.id}, trigger: ${trigger.id}", e) + ChainedAlertTriggerRunResult( + triggerName = trigger.name, + triggered = false, + error = e, + associatedAlertIds = emptySet() + ) + } + } + @Suppress("UNCHECKED_CAST") fun runBucketLevelTrigger( monitor: Monitor, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/WorkflowService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/WorkflowService.kt index 0fa93587f..a2c60ce74 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/WorkflowService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/WorkflowService.kt @@ -7,6 +7,8 @@ package org.opensearch.alerting import org.apache.logging.log4j.LogManager import org.opensearch.OpenSearchException +import org.opensearch.action.admin.indices.exists.indices.IndicesExistsRequest +import org.opensearch.action.admin.indices.exists.indices.IndicesExistsResponse import org.opensearch.action.search.SearchRequest import org.opensearch.action.search.SearchResponse import org.opensearch.alerting.opensearchapi.suspendUntil @@ -42,6 +44,10 @@ class WorkflowService( */ suspend fun getFindingDocIdsByExecutionId(chainedMonitor: Monitor, workflowExecutionId: String): Map> { try { + val existsResponse: IndicesExistsResponse = client.admin().indices().suspendUntil { + exists(IndicesExistsRequest(chainedMonitor.dataSources.findingsIndex).local(true), it) + } + if (existsResponse.isExists == false) return emptyMap() // Search findings index per monitor and workflow execution id val bqb = QueryBuilders.boolQuery().filter(QueryBuilders.termQuery(Finding.MONITOR_ID_FIELD, chainedMonitor.id)) .filter(QueryBuilders.termQuery(Finding.EXECUTION_ID_FIELD, workflowExecutionId)) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/GetMonitorResponse.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/GetMonitorResponse.kt index 863dcc815..085c05d1f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/GetMonitorResponse.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/GetMonitorResponse.kt @@ -14,6 +14,7 @@ import org.opensearch.commons.alerting.util.IndexUtils.Companion._PRIMARY_TERM import org.opensearch.commons.alerting.util.IndexUtils.Companion._SEQ_NO import org.opensearch.commons.alerting.util.IndexUtils.Companion._VERSION import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentFragment import org.opensearch.core.xcontent.ToXContentObject import org.opensearch.core.xcontent.XContentBuilder import org.opensearch.rest.RestStatus @@ -26,6 +27,7 @@ class GetMonitorResponse : ActionResponse, ToXContentObject { var primaryTerm: Long var status: RestStatus var monitor: Monitor? + var associatedWorkflows: List? constructor( id: String, @@ -33,7 +35,8 @@ class GetMonitorResponse : ActionResponse, ToXContentObject { seqNo: Long, primaryTerm: Long, status: RestStatus, - monitor: Monitor? + monitor: Monitor?, + associatedCompositeMonitors: List?, ) : super() { this.id = id this.version = version @@ -41,18 +44,20 @@ class GetMonitorResponse : ActionResponse, ToXContentObject { this.primaryTerm = primaryTerm this.status = status this.monitor = monitor + this.associatedWorkflows = associatedCompositeMonitors ?: emptyList() } @Throws(IOException::class) constructor(sin: StreamInput) : this( - sin.readString(), // id - sin.readLong(), // version - sin.readLong(), // seqNo - sin.readLong(), // primaryTerm - sin.readEnum(RestStatus::class.java), // RestStatus - if (sin.readBoolean()) { + id = sin.readString(), // id + version = sin.readLong(), // version + seqNo = sin.readLong(), // seqNo + primaryTerm = sin.readLong(), // primaryTerm + status = sin.readEnum(RestStatus::class.java), // RestStatus + monitor = if (sin.readBoolean()) { Monitor.readFrom(sin) // monitor - } else null + } else null, + associatedCompositeMonitors = sin.readList((AssociatedWorkflow)::readFrom), ) @Throws(IOException::class) @@ -68,6 +73,9 @@ class GetMonitorResponse : ActionResponse, ToXContentObject { } else { out.writeBoolean(false) } + associatedWorkflows?.forEach { + it.writeTo(out) + } } @Throws(IOException::class) @@ -80,7 +88,46 @@ class GetMonitorResponse : ActionResponse, ToXContentObject { if (monitor != null) { builder.field("monitor", monitor) } - + if (associatedWorkflows != null) { + builder.field("associated_workflows", associatedWorkflows!!.toTypedArray()) + } return builder.endObject() } + + class AssociatedWorkflow : ToXContentFragment { + val id: String + val name: String + + constructor(id: String, name: String) { + this.id = id + this.name = name + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + builder.startObject() + .field("id", id) + .field("name", name) + .endObject() + return builder + } + + fun writeTo(out: StreamOutput) { + out.writeString(id) + out.writeString(name) + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + sin.readString(), + sin.readString() + ) + + companion object { + @JvmStatic + @Throws(IOException::class) + fun readFrom(sin: StreamInput): AssociatedWorkflow { + return AssociatedWorkflow(sin) + } + } + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/alerts/AlertMover.kt b/alerting/src/main/kotlin/org/opensearch/alerting/alerts/AlertMover.kt index a28740516..c7742d1c7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/alerts/AlertMover.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/alerts/AlertMover.kt @@ -5,15 +5,20 @@ package org.opensearch.alerting.alerts +import org.apache.logging.log4j.LogManager import org.opensearch.action.bulk.BulkRequest import org.opensearch.action.bulk.BulkResponse import org.opensearch.action.delete.DeleteRequest +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse import org.opensearch.action.index.IndexRequest import org.opensearch.action.search.SearchRequest import org.opensearch.action.search.SearchResponse +import org.opensearch.alerting.MonitorRunnerExecutionContext import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_HISTORY_WRITE_INDEX import org.opensearch.alerting.alerts.AlertIndices.Companion.ALERT_INDEX import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.util.ScheduledJobUtils import org.opensearch.client.Client import org.opensearch.common.bytes.BytesReference import org.opensearch.common.xcontent.LoggingDeprecationHandler @@ -22,7 +27,10 @@ import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentParserUtils import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.CompositeInput import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.model.Workflow import org.opensearch.core.xcontent.NamedXContentRegistry import org.opensearch.core.xcontent.XContentParser import org.opensearch.index.VersionType @@ -30,91 +38,210 @@ import org.opensearch.index.query.QueryBuilders import org.opensearch.rest.RestStatus import org.opensearch.search.builder.SearchSourceBuilder -/** - * Moves defunct active alerts to the alert history index when the corresponding monitor or trigger is deleted. - * - * The logic for moving alerts consists of: - * 1. Find active alerts: - * a. matching monitorId if no monitor is provided (postDelete) - * b. matching monitorId and no triggerIds if monitor is provided (postIndex) - * 2. Move alerts over to DataSources.alertsHistoryIndex as DELETED - * 3. Delete alerts from monitor's DataSources.alertsIndex - * 4. Schedule a retry if there were any failures - */ -suspend fun moveAlerts(client: Client, monitorId: String, monitor: Monitor?) { - var alertIndex = monitor?.dataSources?.alertsIndex ?: ALERT_INDEX - var alertHistoryIndex = monitor?.dataSources?.alertsHistoryIndex ?: ALERT_HISTORY_WRITE_INDEX +private val log = LogManager.getLogger(AlertMover::class.java) - val boolQuery = QueryBuilders.boolQuery() - .filter(QueryBuilders.termQuery(Alert.MONITOR_ID_FIELD, monitorId)) +class AlertMover { + companion object { + /** + * Moves defunct active alerts to the alert history index when the corresponding monitor or trigger is deleted. + * + * The logic for moving alerts consists of: + * 1. Find active alerts: + * a. matching monitorId if no monitor is provided (postDelete) + * b. matching monitorId and no triggerIds if monitor is provided (postIndex) + * 2. Move alerts over to DataSources.alertsHistoryIndex as DELETED + * 3. Delete alerts from monitor's DataSources.alertsIndex + * 4. Schedule a retry if there were any failures + */ + suspend fun moveAlerts(client: Client, monitorId: String, monitor: Monitor?) { + var alertIndex = monitor?.dataSources?.alertsIndex ?: ALERT_INDEX + var alertHistoryIndex = monitor?.dataSources?.alertsHistoryIndex ?: ALERT_HISTORY_WRITE_INDEX - if (monitor != null) { - boolQuery.mustNot(QueryBuilders.termsQuery(Alert.TRIGGER_ID_FIELD, monitor.triggers.map { it.id })) - } + val boolQuery = QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery(Alert.MONITOR_ID_FIELD, monitorId)) + + if (monitor != null) { + boolQuery.mustNot(QueryBuilders.termsQuery(Alert.TRIGGER_ID_FIELD, monitor.triggers.map { it.id })) + } + + val activeAlertsQuery = SearchSourceBuilder.searchSource() + .query(boolQuery) + .version(true) + + val activeAlertsRequest = SearchRequest(alertIndex) + .routing(monitorId) + .source(activeAlertsQuery) + val response: SearchResponse = client.suspendUntil { search(activeAlertsRequest, it) } - val activeAlertsQuery = SearchSourceBuilder.searchSource() - .query(boolQuery) - .version(true) - - val activeAlertsRequest = SearchRequest(alertIndex) - .routing(monitorId) - .source(activeAlertsQuery) - val response: SearchResponse = client.suspendUntil { search(activeAlertsRequest, it) } - - // If no alerts are found, simply return - if (response.hits.totalHits?.value == 0L) return - val indexRequests = response.hits.map { hit -> - IndexRequest(alertHistoryIndex) - .routing(monitorId) - .source( - Alert.parse(alertContentParser(hit.sourceRef), hit.id, hit.version) - .copy(state = Alert.State.DELETED) - .toXContentWithUser(XContentFactory.jsonBuilder()) + // If no alerts are found, simply return + if (response.hits.totalHits?.value == 0L) return + val indexRequests = response.hits.map { hit -> + IndexRequest(alertHistoryIndex) + .routing(monitorId) + .source( + Alert.parse(alertContentParser(hit.sourceRef), hit.id, hit.version) + .copy(state = Alert.State.DELETED) + .toXContentWithUser(XContentFactory.jsonBuilder()) + ) + .version(hit.version) + .versionType(VersionType.EXTERNAL_GTE) + .id(hit.id) + } + val copyRequest = BulkRequest().add(indexRequests) + val copyResponse: BulkResponse = client.suspendUntil { bulk(copyRequest, it) } + + val deleteRequests = copyResponse.items.filterNot { it.isFailed }.map { + DeleteRequest(alertIndex, it.id) + .routing(monitorId) + .version(it.version) + .versionType(VersionType.EXTERNAL_GTE) + } + val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) } + + if (copyResponse.hasFailures()) { + val retryCause = copyResponse.items.filter { it.isFailed } + .firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS } + ?.failure?.cause + throw RuntimeException( + "Failed to copy alerts for [$monitorId, ${monitor?.triggers?.map { it.id }}]: " + + copyResponse.buildFailureMessage(), + retryCause + ) + } + if (deleteResponse.hasFailures()) { + val retryCause = deleteResponse.items.filter { it.isFailed } + .firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS } + ?.failure?.cause + throw RuntimeException( + "Failed to delete alerts for [$monitorId, ${monitor?.triggers?.map { it.id }}]: " + + deleteResponse.buildFailureMessage(), + retryCause + ) + } + } + + private fun alertContentParser(bytesReference: BytesReference): XContentParser { + val xcp = XContentHelper.createParser( + NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, + bytesReference, XContentType.JSON ) - .version(hit.version) - .versionType(VersionType.EXTERNAL_GTE) - .id(hit.id) - } - val copyRequest = BulkRequest().add(indexRequests) - val copyResponse: BulkResponse = client.suspendUntil { bulk(copyRequest, it) } - - val deleteRequests = copyResponse.items.filterNot { it.isFailed }.map { - DeleteRequest(alertIndex, it.id) - .routing(monitorId) - .version(it.version) - .versionType(VersionType.EXTERNAL_GTE) - } - val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) } - - if (copyResponse.hasFailures()) { - val retryCause = copyResponse.items.filter { it.isFailed } - .firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS } - ?.failure?.cause - throw RuntimeException( - "Failed to copy alerts for [$monitorId, ${monitor?.triggers?.map { it.id }}]: " + - copyResponse.buildFailureMessage(), - retryCause - ) - } - if (deleteResponse.hasFailures()) { - val retryCause = deleteResponse.items.filter { it.isFailed } - .firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS } - ?.failure?.cause - throw RuntimeException( - "Failed to delete alerts for [$monitorId, ${monitor?.triggers?.map { it.id }}]: " + - deleteResponse.buildFailureMessage(), - retryCause - ) - } -} + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + return xcp + } + + /** + * Moves defunct active alerts to the alert history index when the corresponding workflow or trigger is deleted. + * + * The logic for moving alerts consists of: + * 1. Find active alerts: + * a. matching workflowId if no workflow is provided (postDelete) + * b. matching workflowid and chained alert triggerIds if monitor is provided (postIndex) + * 2. Move alerts over to DataSources.alertsHistoryIndex as DELETED + * 3. Delete alerts from monitor's DataSources.alertsIndex + * 4. Schedule a retry if there were any failures + */ + suspend fun moveAlerts(client: Client, workflowId: String, workflow: Workflow?, monitorCtx: MonitorRunnerExecutionContext) { + var alertIndex = ALERT_INDEX + var alertHistoryIndex = ALERT_HISTORY_WRITE_INDEX + if (workflow != null) { + if ( + workflow.inputs.isNotEmpty() && workflow.inputs[0] is CompositeInput && + (workflow.inputs[0] as CompositeInput).sequence.delegates.isNotEmpty() + ) { + var i = 0 + val delegates = (workflow.inputs[0] as CompositeInput).sequence.delegates + try { + var getResponse: GetResponse? = null + while (i < delegates.size && (getResponse == null || getResponse.isExists == false)) { + getResponse = + client.suspendUntil { + client.get( + GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, delegates[i].monitorId), + it + ) + } + if (getResponse!!.isExists) { + val monitor = + ScheduledJobUtils.parseMonitorFromScheduledJobDocSource( + monitorCtx.xContentRegistry!!, + response = getResponse + ) + /** check if alert index is initialized **/ + if (monitorCtx.alertIndices!!.isAlertInitialized(monitor.dataSources) == false) + return + alertIndex = monitor.dataSources.alertsIndex + alertHistoryIndex = + if (monitor.dataSources.alertsHistoryIndex == null) alertHistoryIndex + else monitor.dataSources.alertsHistoryIndex!! + } + i++ + } + } catch (e: Exception) { + log.error("Failed to get delegate monitor for workflow $workflowId. Assuming default alert indices", e) + } + } + } + val boolQuery = QueryBuilders.boolQuery() + .filter(QueryBuilders.termQuery(Alert.WORKFLOW_ID_FIELD, workflowId)) + + if (workflow != null) { + boolQuery.mustNot(QueryBuilders.termsQuery(Alert.TRIGGER_ID_FIELD, workflow.triggers.map { it.id })) + } -private fun alertContentParser(bytesReference: BytesReference): XContentParser { - val xcp = XContentHelper.createParser( - NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, - bytesReference, - XContentType.JSON - ) - XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) - return xcp + val activeAlertsQuery = SearchSourceBuilder.searchSource() + .query(boolQuery) + .version(true) + + val activeAlertsRequest = SearchRequest(alertIndex) + .routing(workflowId) + .source(activeAlertsQuery) + val response: SearchResponse = client.suspendUntil { search(activeAlertsRequest, it) } + + // If no alerts are found, simply return + if (response.hits.totalHits?.value == 0L) return + val indexRequests = response.hits.map { hit -> + IndexRequest(alertHistoryIndex) + .routing(workflowId) + .source( + Alert.parse(alertContentParser(hit.sourceRef), hit.id, hit.version) + .copy(state = Alert.State.DELETED) + .toXContentWithUser(XContentFactory.jsonBuilder()) + ) + .version(hit.version) + .versionType(VersionType.EXTERNAL_GTE) + .id(hit.id) + } + val copyRequest = BulkRequest().add(indexRequests) + val copyResponse: BulkResponse = client.suspendUntil { bulk(copyRequest, it) } + + val deleteRequests = copyResponse.items.filterNot { it.isFailed }.map { + DeleteRequest(alertIndex, it.id) + .routing(workflowId) + .version(it.version) + .versionType(VersionType.EXTERNAL_GTE) + } + val deleteResponse: BulkResponse = client.suspendUntil { bulk(BulkRequest().add(deleteRequests), it) } + + if (copyResponse.hasFailures()) { + val retryCause = copyResponse.items.filter { it.isFailed } + .firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS } + ?.failure?.cause + throw RuntimeException( + "Failed to copy alerts for [$workflowId, ${workflow?.triggers?.map { it.id }}]: " + + copyResponse.buildFailureMessage(), + retryCause + ) + } + if (deleteResponse.hasFailures()) { + val retryCause = deleteResponse.items.filter { it.isFailed } + .firstOrNull { it.status() == RestStatus.TOO_MANY_REQUESTS } + ?.failure?.cause + throw RuntimeException( + "Failed to delete alerts for [$workflowId, ${workflow?.triggers?.map { it.id }}]: " + + deleteResponse.buildFailureMessage(), + retryCause + ) + } + } + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionParser.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionParser.kt new file mode 100644 index 000000000..999b9a977 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionParser.kt @@ -0,0 +1,53 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.parsers + +import org.opensearch.alerting.chainedAlertCondition.resolvers.ChainedAlertRPNResolver +import org.opensearch.alerting.chainedAlertCondition.tokens.CAExpressionOperator + +/** + * The postfix (Reverse Polish Notation) parser. + * Uses the Shunting-yard algorithm to parse a mathematical expression + * @param triggerExpression String containing the trigger expression for the monitor + */ +class ChainedAlertExpressionParser( + triggerExpression: String +) : ChainedAlertExpressionRPNBaseParser(triggerExpression) { + + override fun parse(): ChainedAlertRPNResolver { + val expression = expressionToParse.replace(" ", "") + + val splitters = ArrayList() + CAExpressionOperator.values().forEach { splitters.add(it.value) } + + val breaks = ArrayList().apply { add(expression) } + for (s in splitters) { + val a = ArrayList() + for (ind in 0 until breaks.size) { + breaks[ind].let { + if (it.length > 1) { + a.addAll(breakString(breaks[ind], s)) + } else a.add(it) + } + } + breaks.clear() + breaks.addAll(a) + } + + return ChainedAlertRPNResolver(convertInfixToPostfix(breaks)) + } + + private fun breakString(input: String, delimeter: String): ArrayList { + val tokens = input.split(delimeter) + val array = ArrayList() + for (t in tokens) { + array.add(t) + array.add(delimeter) + } + array.removeAt(array.size - 1) + return array + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionRPNBaseParser.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionRPNBaseParser.kt new file mode 100644 index 000000000..ff3c29db7 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ChainedAlertExpressionRPNBaseParser.kt @@ -0,0 +1,114 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.parsers + +import org.opensearch.alerting.chainedAlertCondition.tokens.CAExpressionOperator +import org.opensearch.alerting.chainedAlertCondition.tokens.CAExpressionToken +import org.opensearch.alerting.chainedAlertCondition.tokens.ChainedAlertExpressionConstant +import org.opensearch.alerting.chainedAlertCondition.tokens.ExpressionToken +import java.util.Stack + +/** + * This is the abstract base class which holds the trigger expression parsing logic; + * using the Infix to Postfix a.k.a. Reverse Polish Notation (RPN) parser. + * It also uses the Shunting-Yard algorithm to parse the given trigger expression. + * + * @param expressionToParse Complete string containing the trigger expression + */ +abstract class ChainedAlertExpressionRPNBaseParser( + protected val expressionToParse: String +) : ExpressionParser { + /** + * To perform the Infix-to-postfix conversion of the trigger expression + */ + protected fun convertInfixToPostfix(expTokens: List): ArrayList { + val expTokenStack = Stack() + val outputExpTokens = ArrayList() + + for (tokenString in expTokens) { + if (tokenString.isEmpty()) continue + when (val expToken = assignToken(tokenString)) { + is CAExpressionToken -> outputExpTokens.add(expToken) + is CAExpressionOperator -> { + when (expToken) { + CAExpressionOperator.PAR_LEFT -> expTokenStack.push(expToken) + CAExpressionOperator.PAR_RIGHT -> { + var topExpToken = expTokenStack.popExpTokenOrNull() + while (topExpToken != null && topExpToken != CAExpressionOperator.PAR_LEFT) { + outputExpTokens.add(topExpToken) + topExpToken = expTokenStack.popExpTokenOrNull() + } + if (topExpToken != CAExpressionOperator.PAR_LEFT) + throw java.lang.IllegalArgumentException("No matching left parenthesis.") + } + else -> { + var op2 = expTokenStack.peekExpTokenOrNull() + while (op2 != null) { + val c = expToken.precedence.compareTo(op2.precedence) + if (c < 0 || !expToken.rightAssociative && c <= 0) { + outputExpTokens.add(expTokenStack.pop()) + } else { + break + } + op2 = expTokenStack.peekExpTokenOrNull() + } + expTokenStack.push(expToken) + } + } + } + } + } + + while (!expTokenStack.isEmpty()) { + expTokenStack.peekExpTokenOrNull()?.let { + if (it == CAExpressionOperator.PAR_LEFT) + throw java.lang.IllegalArgumentException("No matching right parenthesis.") + } + val top = expTokenStack.pop() + outputExpTokens.add(top) + } + + return outputExpTokens + } + + /** + * Looks up and maps the expression token that matches the string version of that expression unit + */ + private fun assignToken(tokenString: String): ExpressionToken { + + // Check "query" string in trigger expression such as in 'query[name="abc"]' + if (tokenString.startsWith(ChainedAlertExpressionConstant.ConstantType.MONITOR.ident)) + return CAExpressionToken(tokenString) + + // Check operators in trigger expression such as in [&&, ||, !] + for (op in CAExpressionOperator.values()) { + if (op.value == tokenString) return op + } + + // Check any constants in trigger expression such as in ["name, "id", "tag", [", "]", "="] + for (con in ChainedAlertExpressionConstant.ConstantType.values()) { + if (tokenString == con.ident) return ChainedAlertExpressionConstant(con) + } + + throw IllegalArgumentException("Error while processing the trigger expression '$tokenString'") + } + + private inline fun Stack.popExpTokenOrNull(): T? { + return try { + pop() as T + } catch (e: java.lang.Exception) { + null + } + } + + private inline fun Stack.peekExpTokenOrNull(): T? { + return try { + peek() as T + } catch (e: java.lang.Exception) { + null + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ExpressionParser.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ExpressionParser.kt new file mode 100644 index 000000000..e2ece9d40 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/parsers/ExpressionParser.kt @@ -0,0 +1,12 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.parsers + +import org.opensearch.alerting.chainedAlertCondition.resolvers.ChainedAlertTriggerResolver + +interface ExpressionParser { + fun parse(): ChainedAlertTriggerResolver +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertRPNResolver.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertRPNResolver.kt new file mode 100644 index 000000000..dfec9614f --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertRPNResolver.kt @@ -0,0 +1,110 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.resolvers + +import org.opensearch.alerting.chainedAlertCondition.tokens.CAExpressionOperator +import org.opensearch.alerting.chainedAlertCondition.tokens.CAExpressionToken +import org.opensearch.alerting.chainedAlertCondition.tokens.ChainedAlertExpressionConstant +import org.opensearch.alerting.chainedAlertCondition.tokens.ExpressionToken +import java.util.Stack + +/** + * Solves the Trigger Expression using the Reverse Polish Notation (RPN) based solver + * @param polishNotation an array of expression tokens organized in the RPN order + */ +class ChainedAlertRPNResolver( + private val polishNotation: ArrayList, +) : ChainedAlertTriggerResolver { + + private val eqString by lazy { + val stringBuilder = StringBuilder() + for (expToken in polishNotation) { + when (expToken) { + is CAExpressionToken -> stringBuilder.append(expToken.value) + is CAExpressionOperator -> stringBuilder.append(expToken.value) + is ChainedAlertExpressionConstant -> stringBuilder.append(expToken.type.ident) + else -> throw Exception() + } + stringBuilder.append(" ") + } + stringBuilder.toString() + } + + override fun toString(): String = eqString + + /** + * Evaluates the trigger expression expressed provided in form of the RPN token array. + * @param queryToDocIds Map to hold the resultant document id per query id + * @return evaluates the final set of document id + */ + override fun evaluate(alertGeneratingMonitors: Set): Boolean { + val tokenStack = Stack() + val res = true + for (expToken in polishNotation) { + when (expToken) { + is CAExpressionToken -> tokenStack.push(resolveMonitorExpression(expToken.value, alertGeneratingMonitors)) + is CAExpressionOperator -> { + val right = tokenStack.pop() + val expr = when (expToken) { + CAExpressionOperator.AND -> ChainedAlertTriggerExpression.And(tokenStack.pop(), right) + CAExpressionOperator.OR -> ChainedAlertTriggerExpression.Or(tokenStack.pop(), right) + CAExpressionOperator.NOT -> ChainedAlertTriggerExpression.Not(res, right) + else -> throw IllegalArgumentException("No matching operator.") + } + tokenStack.push(expr.resolve()) + } + } + } + return tokenStack.pop() + } + + override fun getMonitorIds(parsedTriggerCondition: ChainedAlertRPNResolver): Set { + val monitorIds = mutableSetOf() + for (expToken in polishNotation) { + when (expToken) { + is CAExpressionToken -> { + val monitorExpString = expToken.value + if (!monitorExpString.startsWith(ChainedAlertExpressionConstant.ConstantType.MONITOR.ident)) + continue + val token = monitorExpString.substringAfter(ChainedAlertExpressionConstant.ConstantType.BRACKET_LEFT.ident) + .substringBefore(ChainedAlertExpressionConstant.ConstantType.BRACKET_RIGHT.ident) + if (token.isEmpty()) continue + val tokens = token.split(ChainedAlertExpressionConstant.ConstantType.EQUALS.ident) + if (tokens.isEmpty() || tokens.size != 2) continue + val identifier = tokens[0] + val value = tokens[1] + when (identifier) { + ChainedAlertExpressionConstant.ConstantType.ID.ident -> { + monitorIds.add(value) + } + } + } + is CAExpressionOperator -> { + continue + } + } + } + return monitorIds + } + + private fun resolveMonitorExpression(monitorExpString: String, alertGeneratingMonitors: Set): Boolean { + if (!monitorExpString.startsWith(ChainedAlertExpressionConstant.ConstantType.MONITOR.ident)) return false + val token = monitorExpString.substringAfter(ChainedAlertExpressionConstant.ConstantType.BRACKET_LEFT.ident) + .substringBefore(ChainedAlertExpressionConstant.ConstantType.BRACKET_RIGHT.ident) + if (token.isEmpty()) return false + + val tokens = token.split(ChainedAlertExpressionConstant.ConstantType.EQUALS.ident) + if (tokens.isEmpty() || tokens.size != 2) return false + + val identifier = tokens[0] + val value = tokens[1] + + return when (identifier) { + ChainedAlertExpressionConstant.ConstantType.ID.ident -> alertGeneratingMonitors.contains(value) + else -> false + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerExpression.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerExpression.kt new file mode 100644 index 000000000..4b373d853 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerExpression.kt @@ -0,0 +1,32 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.resolvers + +sealed class ChainedAlertTriggerExpression { + + fun resolve(): Boolean = when (this) { + is And -> resolveAnd(boolean1, boolean2) + is Or -> resolveOr(boolean1, boolean2) + is Not -> resolveNot(result, boolean2) + } + + private fun resolveAnd(boolean1: Boolean, boolean2: Boolean): Boolean { + return boolean1 && boolean2 + } + + private fun resolveOr(boolean1: Boolean, boolean2: Boolean): Boolean { + return boolean1 || boolean2 + } + + private fun resolveNot(result: Boolean, boolean2: Boolean): Boolean { + return result && !boolean2 + } + + // Operators implemented as operator functions + class And(val boolean1: Boolean, val boolean2: Boolean) : ChainedAlertTriggerExpression() + class Or(val boolean1: Boolean, val boolean2: Boolean) : ChainedAlertTriggerExpression() + class Not(val result: Boolean, val boolean2: Boolean) : ChainedAlertTriggerExpression() +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerResolver.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerResolver.kt new file mode 100644 index 000000000..6f2ff2de0 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/resolvers/ChainedAlertTriggerResolver.kt @@ -0,0 +1,11 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.resolvers + +interface ChainedAlertTriggerResolver { + fun getMonitorIds(parsedTriggerCondition: ChainedAlertRPNResolver): Set + fun evaluate(alertGeneratingMonitors: Set): Boolean +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionOperator.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionOperator.kt new file mode 100644 index 000000000..084b6aa70 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionOperator.kt @@ -0,0 +1,20 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.tokens + +/** + * To define all the operators used in the trigger expression + */ +enum class CAExpressionOperator(val value: String, val precedence: Int, val rightAssociative: Boolean) : ExpressionToken { + + AND("&&", 2, false), + OR("||", 2, false), + + NOT("!", 3, true), + + PAR_LEFT("(", 1, false), + PAR_RIGHT(")", 1, false) +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionToken.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionToken.kt new file mode 100644 index 000000000..ddf439d3f --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/CAExpressionToken.kt @@ -0,0 +1,11 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.tokens + +/** + * To define the tokens in Trigger expression such as monitor[id=“id1"] or monitor[id=“id2"] and monitor[id=“id3"] + */ +internal data class CAExpressionToken(val value: String) : ExpressionToken diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ChainedAlertExpressionConstant.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ChainedAlertExpressionConstant.kt new file mode 100644 index 000000000..4b35bc4a8 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ChainedAlertExpressionConstant.kt @@ -0,0 +1,24 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.tokens + +/** + * To define all the tokens which could be part of expression constant such as query[id=new_id], query[name=new_name], + * query[tag=new_tag] + */ +class ChainedAlertExpressionConstant(val type: ConstantType) : ExpressionToken { + + enum class ConstantType(val ident: String) { + MONITOR("monitor"), + + ID("id"), + + BRACKET_LEFT("["), + BRACKET_RIGHT("]"), + + EQUALS("=") + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ExpressionToken.kt b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ExpressionToken.kt new file mode 100644 index 000000000..38efed313 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/chainedAlertCondition/tokens/ExpressionToken.kt @@ -0,0 +1,8 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition.tokens + +interface ExpressionToken diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/model/ChainedAlertTriggerRunResult.kt b/alerting/src/main/kotlin/org/opensearch/alerting/model/ChainedAlertTriggerRunResult.kt new file mode 100644 index 000000000..960338531 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/model/ChainedAlertTriggerRunResult.kt @@ -0,0 +1,69 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.model + +import org.opensearch.common.io.stream.StreamInput +import org.opensearch.common.io.stream.StreamOutput +import org.opensearch.commons.alerting.alerts.AlertError +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import org.opensearch.script.ScriptException +import java.io.IOException +import java.time.Instant + +data class ChainedAlertTriggerRunResult( + override var triggerName: String, + var triggered: Boolean, + override var error: Exception?, + var actionResults: MutableMap = mutableMapOf(), + val associatedAlertIds: Set, +) : TriggerRunResult(triggerName, error) { + + @Throws(IOException::class) + @Suppress("UNCHECKED_CAST") + constructor(sin: StreamInput) : this( + triggerName = sin.readString(), + error = sin.readException(), + triggered = sin.readBoolean(), + actionResults = sin.readMap() as MutableMap, + associatedAlertIds = sin.readStringList().toSet() + ) + + override fun alertError(): AlertError? { + if (error != null) { + return AlertError(Instant.now(), "Failed evaluating trigger:\n${error!!.userErrorMessage()}") + } + for (actionResult in actionResults.values) { + if (actionResult.error != null) { + return AlertError(Instant.now(), "Failed running action:\n${actionResult.error.userErrorMessage()}") + } + } + return null + } + + override fun internalXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + if (error is ScriptException) error = Exception((error as ScriptException).toJsonString(), error) + return builder + .field("triggered", triggered) + .field("action_results", actionResults as Map) + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeBoolean(triggered) + out.writeMap(actionResults as Map) + out.writeStringCollection(associatedAlertIds) + } + + companion object { + @JvmStatic + @Throws(IOException::class) + fun readFrom(sin: StreamInput): TriggerRunResult { + return ChainedAlertTriggerRunResult(sin) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/model/WorkflowRunResult.kt b/alerting/src/main/kotlin/org/opensearch/alerting/model/WorkflowRunResult.kt index 9ef7a3536..ee8885173 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/model/WorkflowRunResult.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/model/WorkflowRunResult.kt @@ -15,39 +15,68 @@ import java.lang.Exception import java.time.Instant data class WorkflowRunResult( - val workflowRunResult: List> = mutableListOf(), + val workflowId: String, + val workflowName: String, + val monitorRunResults: List> = mutableListOf(), val executionStartTime: Instant, - val executionEndTime: Instant? = null, + var executionEndTime: Instant? = null, val executionId: String, - val error: Exception? = null + val error: Exception? = null, + val triggerResults: Map = mapOf(), ) : Writeable, ToXContent { @Throws(IOException::class) @Suppress("UNCHECKED_CAST") constructor(sin: StreamInput) : this( - sin.readList> { s: StreamInput -> MonitorRunResult.readFrom(s) }, - sin.readInstant(), - sin.readInstant(), - sin.readString(), - sin.readException() + workflowId = sin.readString(), + workflowName = sin.readString(), + monitorRunResults = sin.readList> { s: StreamInput -> MonitorRunResult.readFrom(s) }, + executionStartTime = sin.readInstant(), + executionEndTime = sin.readOptionalInstant(), + executionId = sin.readString(), + error = sin.readException(), + triggerResults = suppressWarning(sin.readMap()) as Map ) override fun writeTo(out: StreamOutput) { - out.writeList(workflowRunResult) + out.writeString(workflowId) + out.writeString(workflowName) + out.writeList(monitorRunResults) out.writeInstant(executionStartTime) - out.writeInstant(executionEndTime) + out.writeOptionalInstant(executionEndTime) out.writeString(executionId) out.writeException(error) + out.writeMap(triggerResults) } override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { - builder.startObject().startArray("workflow_run_result") - for (monitorResult in workflowRunResult) { + builder.startObject() + builder.field("execution_id", executionId) + builder.field("workflow_name", workflowName) + builder.field("workflow_id", workflowId) + builder.field("trigger_results", triggerResults) + builder.startArray("monitor_run_results") + for (monitorResult in monitorRunResults) { monitorResult.toXContent(builder, ToXContent.EMPTY_PARAMS) } - builder.endArray().field("execution_start_time", executionStartTime) + builder.endArray() + .field("execution_start_time", executionStartTime) .field("execution_end_time", executionEndTime) - .field("error", error?.message).endObject() + .field("error", error?.message) + .endObject() return builder } + + companion object { + @JvmStatic + @Throws(IOException::class) + fun readFrom(sin: StreamInput): WorkflowRunResult { + return WorkflowRunResult(sin) + } + + @Suppress("UNCHECKED_CAST") + fun suppressWarning(map: MutableMap?): Map { + return map as Map + } + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestAcknowledgeChainedAlertsAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestAcknowledgeChainedAlertsAction.kt new file mode 100644 index 000000000..4402ca455 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestAcknowledgeChainedAlertsAction.kt @@ -0,0 +1,82 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.apache.logging.log4j.Logger +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.client.node.NodeClient +import org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken +import org.opensearch.commons.alerting.action.AcknowledgeChainedAlertRequest +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.BaseRestHandler.RestChannelConsumer +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.POST +import org.opensearch.rest.action.RestToXContentListener +import java.io.IOException + +private val log: Logger = LogManager.getLogger(RestAcknowledgeAlertAction::class.java) + +/** + * This class consists of the REST handler to acknowledge chained alerts. + * The user provides the workflowID to which these alerts pertain and in the content of the request provides + * the ids to the chained alerts user would like to acknowledge. + */ +class RestAcknowledgeChainedAlertAction : BaseRestHandler() { + + override fun getName(): String { + return "acknowledge_chained_alert_action" + } + + override fun routes(): List { + // Acknowledge alerts + return mutableListOf( + Route( + POST, + "${AlertingPlugin.WORKFLOW_BASE_URI}/{workflowID}/_acknowledge/alerts" + ) + ) + } + + @Throws(IOException::class) + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${AlertingPlugin.WORKFLOW_BASE_URI}/{workflowID}/_acknowledge/alerts") + + val workflowId = request.param("workflowID") + require(!workflowId.isNullOrEmpty()) { "Missing workflow id." } + val alertIds = getAlertIds(request.contentParser()) + require(alertIds.isNotEmpty()) { "You must provide at least one alert id." } + + val acknowledgeAlertRequest = AcknowledgeChainedAlertRequest(workflowId, alertIds) + return RestChannelConsumer { channel -> + client.execute(AlertingActions.ACKNOWLEDGE_CHAINED_ALERTS_ACTION_TYPE, acknowledgeAlertRequest, RestToXContentListener(channel)) + } + } + + /** + * Parse the request content and return a list of the alert ids to acknowledge + */ + private fun getAlertIds(xcp: XContentParser): List { + val ids = mutableListOf() + ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + when (fieldName) { + "alerts" -> { + ensureExpectedToken(XContentParser.Token.START_ARRAY, xcp.currentToken(), xcp) + while (xcp.nextToken() != XContentParser.Token.END_ARRAY) { + ids.add(xcp.text()) + } + } + } + } + return ids + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteWorkflowAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteWorkflowAction.kt new file mode 100644 index 000000000..4787fdbdb --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestExecuteWorkflowAction.kt @@ -0,0 +1,59 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.alerting.action.ExecuteWorkflowAction +import org.opensearch.alerting.action.ExecuteWorkflowRequest +import org.opensearch.client.node.NodeClient +import org.opensearch.common.unit.TimeValue +import org.opensearch.common.xcontent.XContentParserUtils +import org.opensearch.commons.alerting.model.Workflow +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.RestHandler +import org.opensearch.rest.RestRequest +import org.opensearch.rest.action.RestToXContentListener +import java.time.Instant + +private val log = LogManager.getLogger(RestExecuteWorkflowAction::class.java) + +class RestExecuteWorkflowAction : BaseRestHandler() { + + override fun getName(): String = "execute_workflow_action" + + override fun routes(): List { + return listOf( + RestHandler.Route(RestRequest.Method.POST, "${AlertingPlugin.WORKFLOW_BASE_URI}/{workflowID}/_execute") + ) + } + + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${AlertingPlugin.WORKFLOW_BASE_URI}/_execute") + + return RestChannelConsumer { channel -> + val dryrun = request.paramAsBoolean("dryrun", false) + val requestEnd = request.paramAsTime("period_end", TimeValue(Instant.now().toEpochMilli())) + + if (request.hasParam("workflowID")) { + val workflowId = request.param("workflowID") + val execWorkflowRequest = ExecuteWorkflowRequest(dryrun, requestEnd, workflowId, null) + client.execute(ExecuteWorkflowAction.INSTANCE, execWorkflowRequest, RestToXContentListener(channel)) + } else { + val xcp = request.contentParser() + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + val workflow = Workflow.parse(xcp, Workflow.NO_ID, Workflow.NO_VERSION) + val execWorkflowRequest = ExecuteWorkflowRequest(dryrun, requestEnd, null, workflow) + client.execute(ExecuteWorkflowAction.INSTANCE, execWorkflowRequest, RestToXContentListener(channel)) + } + } + } + + override fun responseParams(): Set { + return setOf("dryrun", "period_end", "workflowID") + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetWorkflowAlertsAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetWorkflowAlertsAction.kt new file mode 100644 index 000000000..5fb7d8ffc --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/resthandler/RestGetWorkflowAlertsAction.kt @@ -0,0 +1,86 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.resthandler + +import org.apache.logging.log4j.LogManager +import org.opensearch.alerting.AlertingPlugin +import org.opensearch.client.node.NodeClient +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.GetWorkflowAlertsRequest +import org.opensearch.commons.alerting.model.Table +import org.opensearch.rest.BaseRestHandler +import org.opensearch.rest.BaseRestHandler.RestChannelConsumer +import org.opensearch.rest.RestHandler.ReplacedRoute +import org.opensearch.rest.RestHandler.Route +import org.opensearch.rest.RestRequest +import org.opensearch.rest.RestRequest.Method.GET +import org.opensearch.rest.action.RestToXContentListener + +/** + * This class consists of the REST handler to retrieve chained alerts by workflow id. + */ +class RestGetWorkflowAlertsAction : BaseRestHandler() { + + private val log = LogManager.getLogger(RestGetWorkflowAlertsAction::class.java) + + override fun getName(): String { + return "get_workflow_alerts_action" + } + + override fun routes(): List { + return mutableListOf( + Route( + GET, + "${AlertingPlugin.WORKFLOW_BASE_URI}/alerts" + ) + ) + } + + override fun replacedRoutes(): MutableList { + return mutableListOf() + } + + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + log.debug("${request.method()} ${AlertingPlugin.WORKFLOW_BASE_URI}/alerts") + + val sortString = request.param("sortString", "monitor_name.keyword") + val sortOrder = request.param("sortOrder", "asc") + val missing: String? = request.param("missing") + val size = request.paramAsInt("size", 20) + val startIndex = request.paramAsInt("startIndex", 0) + val searchString = request.param("searchString", "") + val severityLevel = request.param("severityLevel", "ALL") + val alertState = request.param("alertState", "ALL") + val workflowId: String? = request.param("workflowIds") + val getAssociatedAlerts: Boolean = request.param("getAssociatedAlerts", "false").toBoolean() + val workflowIds = mutableListOf() + if (workflowId.isNullOrEmpty() == false) { + workflowIds.add(workflowId) + } + val table = Table( + sortOrder, + sortString, + missing, + size, + startIndex, + searchString + ) + + val getWorkflowAlertsRequest = GetWorkflowAlertsRequest( + table, + severityLevel, + alertState, + alertIndex = null, + associatedAlertsIndex = null, + workflowIds = workflowIds, + monitorIds = emptyList(), + getAssociatedAlerts = getAssociatedAlerts + ) + return RestChannelConsumer { channel -> + client.execute(AlertingActions.GET_WORKFLOW_ALERTS_ACTION_TYPE, getWorkflowAlertsRequest, RestToXContentListener(channel)) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/script/ChainedAlertTriggerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/script/ChainedAlertTriggerExecutionContext.kt new file mode 100644 index 000000000..a626c7667 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/script/ChainedAlertTriggerExecutionContext.kt @@ -0,0 +1,56 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.script + +import org.opensearch.alerting.model.WorkflowRunResult +import org.opensearch.commons.alerting.model.ChainedAlertTrigger +import org.opensearch.commons.alerting.model.Workflow +import java.time.Instant + +data class ChainedAlertTriggerExecutionContext( + val workflow: Workflow, + val workflowRunResult: WorkflowRunResult, + val periodStart: Instant, + val periodEnd: Instant?, + val error: Exception? = null, + val trigger: ChainedAlertTrigger, + val alertGeneratingMonitors: Set, + val monitorIdToAlertIdsMap: Map> +) { + + constructor( + workflow: Workflow, + workflowRunResult: WorkflowRunResult, + trigger: ChainedAlertTrigger, + alertGeneratingMonitors: Set, + monitorIdToAlertIdsMap: Map> + ) : + this( + workflow, + workflowRunResult, + workflowRunResult.executionStartTime, + workflowRunResult.executionEndTime, + workflowRunResult.error, + trigger, + alertGeneratingMonitors, + monitorIdToAlertIdsMap + ) + + /** + * Mustache templates need special permissions to reflectively introspect field names. To avoid doing this we + * translate the context to a Map of Strings to primitive types, which can be accessed without reflection. + */ + open fun asTemplateArg(): Map { + return mapOf( + "monitor" to workflow.asTemplateArg(), + "results" to workflowRunResult, + "periodStart" to periodStart, + "error" to error, + "alertGeneratingMonitors" to alertGeneratingMonitors, + "monitorIdToAlertIdsMap" to monitorIdToAlertIdsMap + ) + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt index a21ec379e..8c96b1b4a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/service/DeleteMonitorService.kt @@ -24,9 +24,9 @@ import org.opensearch.action.support.WriteRequest.RefreshPolicy import org.opensearch.action.support.master.AcknowledgedResponse import org.opensearch.alerting.MonitorMetadataService import org.opensearch.alerting.opensearchapi.suspendUntil -import org.opensearch.alerting.transport.TransportDeleteWorkflowAction.Companion.WORKFLOW_DELEGATE_PATH -import org.opensearch.alerting.transport.TransportDeleteWorkflowAction.Companion.WORKFLOW_MONITOR_PATH import org.opensearch.alerting.util.AlertingException +import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_DELEGATE_PATH +import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_MONITOR_PATH import org.opensearch.client.Client import org.opensearch.commons.alerting.action.DeleteMonitorResponse import org.opensearch.commons.alerting.model.Monitor diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportAcknowledgeChainedAlertAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportAcknowledgeChainedAlertAction.kt new file mode 100644 index 000000000..4278bd278 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportAcknowledgeChainedAlertAction.kt @@ -0,0 +1,296 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchStatusException +import org.opensearch.ResourceNotFoundException +import org.opensearch.action.ActionListener +import org.opensearch.action.ActionRequest +import org.opensearch.action.bulk.BulkRequest +import org.opensearch.action.bulk.BulkResponse +import org.opensearch.action.delete.DeleteRequest +import org.opensearch.action.get.GetRequest +import org.opensearch.action.get.GetResponse +import org.opensearch.action.index.IndexRequest +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.action.support.WriteRequest +import org.opensearch.action.update.UpdateRequest +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.alerting.util.AlertingException +import org.opensearch.alerting.util.ScheduledJobUtils +import org.opensearch.client.Client +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentParserUtils +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.action.AcknowledgeAlertResponse +import org.opensearch.commons.alerting.action.AcknowledgeChainedAlertRequest +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.CompositeInput +import org.opensearch.commons.alerting.model.DataSources +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.model.Workflow +import org.opensearch.commons.alerting.util.optionalTimeField +import org.opensearch.commons.utils.recreateObject +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.index.query.QueryBuilders +import org.opensearch.rest.RestStatus +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import java.time.Instant +import java.util.Locale + +private val log = LogManager.getLogger(TransportAcknowledgeChainedAlertAction::class.java) +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + +class TransportAcknowledgeChainedAlertAction @Inject constructor( + transportService: TransportService, + val client: Client, + clusterService: ClusterService, + actionFilters: ActionFilters, + val settings: Settings, + val xContentRegistry: NamedXContentRegistry, +) : HandledTransportAction( + AlertingActions.ACKNOWLEDGE_CHAINED_ALERTS_ACTION_NAME, + transportService, + actionFilters, + ::AcknowledgeChainedAlertRequest +) { + @Volatile + private var isAlertHistoryEnabled = AlertingSettings.ALERT_HISTORY_ENABLED.get(settings) + + init { + clusterService.clusterSettings.addSettingsUpdateConsumer(AlertingSettings.ALERT_HISTORY_ENABLED) { isAlertHistoryEnabled = it } + } + + override fun doExecute( + task: Task, + AcknowledgeChainedAlertRequest: ActionRequest, + actionListener: ActionListener, + ) { + val request = AcknowledgeChainedAlertRequest as? AcknowledgeChainedAlertRequest + ?: recreateObject(AcknowledgeChainedAlertRequest) { AcknowledgeChainedAlertRequest(it) } + client.threadPool().threadContext.stashContext().use { + scope.launch { + try { + val getResponse = getWorkflow(request.workflowId) + if (getResponse.isExists == false) { + actionListener.onFailure( + AlertingException.wrap( + ResourceNotFoundException( + String.format( + Locale.ROOT, + "No workflow found with id [%s]", + request.workflowId + ) + ) + ) + ) + } else { + val workflow = ScheduledJobUtils.parseWorkflowFromScheduledJobDocSource(xContentRegistry, getResponse) + AcknowledgeHandler(client, actionListener, request).start(workflow = workflow) + } + } catch (e: Exception) { + log.error("Failed to acknowledge chained alerts from request $request", e) + actionListener.onFailure(AlertingException.wrap(e)) + } + } + } + } + + private suspend fun getWorkflow(workflowId: String): GetResponse { + return client.suspendUntil { client.get(GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, workflowId), it) } + } + + inner class AcknowledgeHandler( + private val client: Client, + private val actionListener: ActionListener, + private val request: AcknowledgeChainedAlertRequest, + ) { + val alerts = mutableMapOf() + + suspend fun start(workflow: Workflow) = findActiveAlerts(workflow) + + private suspend fun findActiveAlerts(workflow: Workflow) { + try { + val queryBuilder = QueryBuilders.boolQuery() + .must( + QueryBuilders.wildcardQuery("workflow_id", request.workflowId) + ) + .must(QueryBuilders.termsQuery("_id", request.alertIds)) + if (workflow.inputs.isEmpty() || (workflow.inputs[0] is CompositeInput) == false) { + actionListener.onFailure( + OpenSearchStatusException("Workflow ${workflow.id} is invalid", RestStatus.INTERNAL_SERVER_ERROR) + ) + return + } + val compositeInput = workflow.inputs[0] as CompositeInput + val workflowId = compositeInput.sequence.delegates[0].monitorId + val dataSources: DataSources = getDataSources(workflowId) + val searchRequest = SearchRequest() + .indices(dataSources.alertsIndex) + .routing(request.workflowId) + .source( + SearchSourceBuilder() + .query(queryBuilder) + .version(true) + .seqNoAndPrimaryTerm(true) + .size(request.alertIds.size) + ) + + val searchResponse: SearchResponse = client.suspendUntil { client.search(searchRequest, it) } + onSearchResponse(searchResponse, workflow, dataSources) + } catch (t: Exception) { + log.error("Failed to acknowledge chained alert ${request.alertIds} for workflow ${request.workflowId}", t) + actionListener.onFailure(AlertingException.wrap(t)) + } + } + + private suspend fun getDataSources(monitorId: String): DataSources { + val getResponse: GetResponse = client.suspendUntil { client.get(GetRequest(ScheduledJob.SCHEDULED_JOBS_INDEX, monitorId), it) } + return ScheduledJobUtils.parseMonitorFromScheduledJobDocSource(xContentRegistry, getResponse).dataSources + } + + private suspend fun onSearchResponse(response: SearchResponse, workflow: Workflow, dataSources: DataSources) { + val alertsHistoryIndex = dataSources.alertsHistoryIndex + val updateRequests = mutableListOf() + val copyRequests = mutableListOf() + response.hits.forEach { hit -> + val xcp = XContentHelper.createParser( + xContentRegistry, + LoggingDeprecationHandler.INSTANCE, + hit.sourceRef, + XContentType.JSON + ) + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + val alert = Alert.parse(xcp, hit.id, hit.version) + alerts[alert.id] = alert + + if (alert.state == Alert.State.ACTIVE) { + if ( + alert.findingIds.isEmpty() || + !isAlertHistoryEnabled + ) { + val updateRequest = UpdateRequest(dataSources.alertsIndex, alert.id) + .routing(request.workflowId) + .setIfSeqNo(hit.seqNo) + .setIfPrimaryTerm(hit.primaryTerm) + .doc( + XContentFactory.jsonBuilder().startObject() + .field(Alert.STATE_FIELD, Alert.State.ACKNOWLEDGED.toString()) + .optionalTimeField(Alert.ACKNOWLEDGED_TIME_FIELD, Instant.now()) + .endObject() + ) + updateRequests.add(updateRequest) + } else { + val copyRequest = IndexRequest(alertsHistoryIndex) + .routing(request.workflowId) + .id(alert.id) + .source( + alert.copy(state = Alert.State.ACKNOWLEDGED, acknowledgedTime = Instant.now()) + .toXContentWithUser(XContentFactory.jsonBuilder()) + ) + copyRequests.add(copyRequest) + } + } + } + + try { + val updateResponse: BulkResponse? = if (updateRequests.isNotEmpty()) { + client.suspendUntil { + client.bulk(BulkRequest().add(updateRequests).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE), it) + } + } else null + val copyResponse: BulkResponse? = if (copyRequests.isNotEmpty()) { + client.suspendUntil { + client.bulk(BulkRequest().add(copyRequests).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE), it) + } + } else null + onBulkResponse(updateResponse, copyResponse, dataSources) + } catch (t: Exception) { + log.error("Failed to acknowledge chained alert ${request.alertIds} for workflow ${request.workflowId}", t) + actionListener.onFailure(AlertingException.wrap(t)) + } + } + + private suspend fun onBulkResponse(updateResponse: BulkResponse?, copyResponse: BulkResponse?, dataSources: DataSources) { + val deleteRequests = mutableListOf() + val acknowledged = mutableListOf() + val missing = request.alertIds.toMutableSet() + val failed = mutableListOf() + + alerts.values.forEach { + if (it.state != Alert.State.ACTIVE) { + missing.remove(it.id) + failed.add(it) + } + } + + updateResponse?.items?.forEach { item -> + missing.remove(item.id) + if (item.isFailed) { + failed.add(alerts[item.id]!!) + } else { + acknowledged.add(alerts[item.id]!!) + } + } + + copyResponse?.items?.forEach { item -> + log.info("got a copyResponse: $item") + missing.remove(item.id) + if (item.isFailed) { + log.info("got a failureResponse: ${item.failureMessage}") + failed.add(alerts[item.id]!!) + } else { + val deleteRequest = DeleteRequest(dataSources.alertsIndex, item.id) + .routing(request.workflowId) + deleteRequests.add(deleteRequest) + } + } + + if (deleteRequests.isNotEmpty()) { + try { + val deleteResponse: BulkResponse = client.suspendUntil { + client.bulk(BulkRequest().add(deleteRequests).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE), it) + } + deleteResponse.items.forEach { item -> + missing.remove(item.id) + if (item.isFailed) { + failed.add(alerts[item.id]!!) + } else { + acknowledged.add(alerts[item.id]!!) + } + } + } catch (t: Exception) { + actionListener.onFailure(AlertingException.wrap(t)) + return + } + } + actionListener.onResponse( + AcknowledgeAlertResponse( + acknowledged.toList(), + failed.toList(), + missing.toList() + ) + ) + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteWorkflowAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteWorkflowAction.kt index 79fd817ca..b43ec3c3f 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteWorkflowAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDeleteWorkflowAction.kt @@ -31,6 +31,8 @@ import org.opensearch.alerting.opensearchapi.suspendUntil import org.opensearch.alerting.service.DeleteMonitorService import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.util.AlertingException +import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_DELEGATE_PATH +import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_MONITOR_PATH import org.opensearch.client.Client import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject @@ -322,9 +324,4 @@ class TransportDeleteWorkflowAction @Inject constructor( val deleteResponse: DeleteResponse = client.suspendUntil { delete(deleteRequest, it) } } } - - companion object { - const val WORKFLOW_DELEGATE_PATH = "workflow.inputs.composite_input.sequence.delegates" - const val WORKFLOW_MONITOR_PATH = "workflow.inputs.composite_input.sequence.delegates.monitor_id" - } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt index 45c2a5cd1..332cbae92 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt @@ -20,7 +20,6 @@ import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.ExecuteWorkflowRequest import org.opensearch.alerting.action.ExecuteWorkflowResponse import org.opensearch.alerting.util.AlertingException -import org.opensearch.alerting.workflow.WorkflowRunnerService import org.opensearch.client.Client import org.opensearch.common.inject.Inject import org.opensearch.common.xcontent.LoggingDeprecationHandler @@ -63,14 +62,14 @@ class TransportExecuteWorkflowAction @Inject constructor( workflow.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execWorkflowRequest.requestEnd.millis)) try { val workflowRunResult = - WorkflowRunnerService.runJob(workflow, periodStart, periodEnd, execWorkflowRequest.dryrun) - withContext(Dispatchers.IO) { + MonitorRunnerService.runJob(workflow, periodStart, periodEnd, execWorkflowRequest.dryrun) + withContext(Dispatchers.IO, { actionListener.onResponse( ExecuteWorkflowResponse( workflowRunResult ) ) - } + }) } catch (e: Exception) { log.error("Unexpected error running workflow", e) withContext(Dispatchers.IO) { diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsAction.kt index 4a266fa99..0d6e050b2 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetAlertsAction.kt @@ -110,6 +110,14 @@ class TransportGetAlertsAction @Inject constructor( } else if (getAlertsRequest.monitorIds.isNullOrEmpty() == false) { queryBuilder.filter(QueryBuilders.termsQuery("monitor_id", getAlertsRequest.monitorIds)) } + if (getAlertsRequest.workflowIds.isNullOrEmpty() == false) { + queryBuilder.must(QueryBuilders.termsQuery("workflow_id", getAlertsRequest.workflowIds)) + } else { + val noWorklfowIdQuery = QueryBuilders.boolQuery() + .should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(Alert.WORKFLOW_ID_FIELD))) + .should(QueryBuilders.termsQuery(Alert.WORKFLOW_ID_FIELD, "")) + queryBuilder.must(noWorklfowIdQuery) + } if (!tableProp.searchString.isNullOrBlank()) { queryBuilder .must( @@ -152,9 +160,9 @@ class TransportGetAlertsAction @Inject constructor( */ suspend fun resolveAlertsIndexName(getAlertsRequest: GetAlertsRequest): String { var alertIndex = AlertIndices.ALL_ALERT_INDEX_PATTERN - if (!getAlertsRequest.alertIndex.isNullOrEmpty()) { + if (getAlertsRequest.alertIndex.isNullOrEmpty() == false) { alertIndex = getAlertsRequest.alertIndex!! - } else if (!getAlertsRequest.monitorId.isNullOrEmpty()) { + } else if (getAlertsRequest.monitorId.isNullOrEmpty() == false) { val retrievedMonitor = getMonitor(getAlertsRequest) if (retrievedMonitor != null) { alertIndex = retrievedMonitor.dataSources.alertsIndex @@ -179,6 +187,7 @@ class TransportGetAlertsAction @Inject constructor( ) return ScheduledJob.parse(xcp, getResponse.id, getResponse.version) as Monitor } catch (t: Exception) { + log.error("Failure in fetching monitor ${getAlertsRequest.monitorId} to resolve alert index in get alerts action", t) return null } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt index 63f81954a..470f990fd 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetMonitorAction.kt @@ -6,17 +6,24 @@ package org.opensearch.alerting.transport import org.apache.logging.log4j.LogManager +import org.apache.lucene.search.join.ScoreMode import org.opensearch.OpenSearchStatusException import org.opensearch.action.ActionListener import org.opensearch.action.get.GetRequest import org.opensearch.action.get.GetResponse +import org.opensearch.action.search.SearchAction +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse import org.opensearch.action.support.ActionFilters import org.opensearch.action.support.HandledTransportAction import org.opensearch.alerting.action.GetMonitorAction import org.opensearch.alerting.action.GetMonitorRequest import org.opensearch.alerting.action.GetMonitorResponse +import org.opensearch.alerting.action.GetMonitorResponse.AssociatedWorkflow import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.alerting.util.AlertingException +import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_DELEGATE_PATH +import org.opensearch.alerting.util.ScheduledJobUtils.Companion.WORKFLOW_MONITOR_PATH import org.opensearch.client.Client import org.opensearch.cluster.service.ClusterService import org.opensearch.common.inject.Inject @@ -26,8 +33,11 @@ import org.opensearch.common.xcontent.XContentHelper import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.model.Workflow import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.index.query.QueryBuilders import org.opensearch.rest.RestStatus +import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.tasks.Task import org.opensearch.transport.TransportService @@ -39,8 +49,8 @@ class TransportGetMonitorAction @Inject constructor( actionFilters: ActionFilters, val xContentRegistry: NamedXContentRegistry, val clusterService: ClusterService, - settings: Settings -) : HandledTransportAction ( + settings: Settings, +) : HandledTransportAction( GetMonitorAction.NAME, transportService, actionFilters, @@ -48,7 +58,8 @@ class TransportGetMonitorAction @Inject constructor( ), SecureTransportAction { - @Volatile override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + @Volatile + override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) init { listenFilterBySettingChange(clusterService) @@ -108,7 +119,15 @@ class TransportGetMonitorAction @Inject constructor( } actionListener.onResponse( - GetMonitorResponse(response.id, response.version, response.seqNo, response.primaryTerm, RestStatus.OK, monitor) + GetMonitorResponse( + response.id, + response.version, + response.seqNo, + response.primaryTerm, + RestStatus.OK, + monitor, + getAssociatedWorkflows(response.id) + ) ) } @@ -119,4 +138,41 @@ class TransportGetMonitorAction @Inject constructor( ) } } + + private fun getAssociatedWorkflows(id: String): List { + try { + val associatedWorkflows = mutableListOf() + val queryBuilder = QueryBuilders.nestedQuery( + WORKFLOW_DELEGATE_PATH, + QueryBuilders.boolQuery().must( + QueryBuilders.matchQuery( + WORKFLOW_MONITOR_PATH, + id + ) + ), + ScoreMode.None + ) + val searchRequest = SearchRequest() + .indices(ScheduledJob.SCHEDULED_JOBS_INDEX) + .source(SearchSourceBuilder().query(queryBuilder).fetchField("_id")) + val response: SearchResponse = client.execute(SearchAction.INSTANCE, searchRequest).get() + + for (hit in response.hits) { + XContentType.JSON.xContent().createParser( + xContentRegistry, + LoggingDeprecationHandler.INSTANCE, + hit.sourceAsString + ).use { hitsParser -> + val workflow = ScheduledJob.parse(hitsParser, hit.id, hit.version) + if (workflow is Workflow) { + associatedWorkflows.add(AssociatedWorkflow(hit.id, workflow.name)) + } + } + } + return associatedWorkflows + } catch (e: java.lang.Exception) { + log.error("failed to fetch associated workflows for monitor $id", e) + return emptyList() + } + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetWorkflowAlertsAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetWorkflowAlertsAction.kt new file mode 100644 index 000000000..7ef24de11 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportGetWorkflowAlertsAction.kt @@ -0,0 +1,240 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.action.ActionListener +import org.opensearch.action.ActionRequest +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.alerts.AlertIndices +import org.opensearch.alerting.opensearchapi.addFilter +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.alerting.util.AlertingException +import org.opensearch.client.Client +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentParserUtils +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.action.AlertingActions +import org.opensearch.commons.alerting.action.GetAlertsRequest +import org.opensearch.commons.alerting.action.GetWorkflowAlertsRequest +import org.opensearch.commons.alerting.action.GetWorkflowAlertsResponse +import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.authuser.User +import org.opensearch.commons.utils.recreateObject +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.index.query.Operator +import org.opensearch.index.query.QueryBuilders +import org.opensearch.rest.RestStatus +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.search.sort.SortBuilders +import org.opensearch.search.sort.SortOrder +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import java.io.IOException + +private val log = LogManager.getLogger(TransportGetAlertsAction::class.java) +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + +class TransportGetWorkflowAlertsAction @Inject constructor( + transportService: TransportService, + val client: Client, + clusterService: ClusterService, + actionFilters: ActionFilters, + val settings: Settings, + val xContentRegistry: NamedXContentRegistry, +) : HandledTransportAction( + AlertingActions.GET_WORKFLOW_ALERTS_ACTION_NAME, + transportService, + actionFilters, + ::GetAlertsRequest +), + SecureTransportAction { + + @Volatile + override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + @Volatile + private var isAlertHistoryEnabled = AlertingSettings.ALERT_HISTORY_ENABLED.get(settings) + + init { + clusterService.clusterSettings.addSettingsUpdateConsumer(AlertingSettings.ALERT_HISTORY_ENABLED) { isAlertHistoryEnabled = it } + listenFilterBySettingChange(clusterService) + } + + override fun doExecute( + task: Task, + request: ActionRequest, + actionListener: ActionListener, + ) { + val getWorkflowAlertsRequest = request as? GetWorkflowAlertsRequest + ?: recreateObject(request) { GetWorkflowAlertsRequest(it) } + val user = readUserFromThreadContext(client) + + val tableProp = getWorkflowAlertsRequest.table + val sortBuilder = SortBuilders.fieldSort(tableProp.sortString) + .order(SortOrder.fromString(tableProp.sortOrder)) + if (!tableProp.missing.isNullOrBlank()) { + sortBuilder.missing(tableProp.missing) + } + + val queryBuilder = QueryBuilders.boolQuery() + + if (getWorkflowAlertsRequest.severityLevel != "ALL") { + queryBuilder.filter(QueryBuilders.termQuery("severity", getWorkflowAlertsRequest.severityLevel)) + } + + if (getWorkflowAlertsRequest.alertState != "ALL") { + queryBuilder.filter(QueryBuilders.termQuery("state", getWorkflowAlertsRequest.alertState)) + } + + if (getWorkflowAlertsRequest.alertIds.isNullOrEmpty() == false) { + queryBuilder.filter(QueryBuilders.termsQuery("_id", getWorkflowAlertsRequest.alertIds)) + } + + if (getWorkflowAlertsRequest.monitorIds.isNullOrEmpty() == false) { + queryBuilder.filter(QueryBuilders.termsQuery("monitor_id", getWorkflowAlertsRequest.monitorIds)) + } + if (getWorkflowAlertsRequest.workflowIds.isNullOrEmpty() == false) { + queryBuilder.must(QueryBuilders.termsQuery("workflow_id", getWorkflowAlertsRequest.workflowIds)) + queryBuilder.must(QueryBuilders.termQuery("monitor_id", "")) + } + if (!tableProp.searchString.isNullOrBlank()) { + queryBuilder + .must( + QueryBuilders.queryStringQuery(tableProp.searchString) + .defaultOperator(Operator.AND) + .field("monitor_name") + .field("trigger_name") + ) + } + val searchSourceBuilder = SearchSourceBuilder() + .version(true) + .seqNoAndPrimaryTerm(true) + .query(queryBuilder) + .sort(sortBuilder) + .size(tableProp.size) + .from(tableProp.startIndex) + + client.threadPool().threadContext.stashContext().use { + scope.launch { + try { + val alertIndex = resolveAlertsIndexName(getWorkflowAlertsRequest) + getAlerts(getWorkflowAlertsRequest, alertIndex, searchSourceBuilder, actionListener, user) + } catch (t: Exception) { + log.error("Failed to get alerts", t) + if (t is AlertingException) { + actionListener.onFailure(t) + } else { + actionListener.onFailure(AlertingException.wrap(t)) + } + } + } + } + } + + fun resolveAlertsIndexName(getAlertsRequest: GetWorkflowAlertsRequest): String { + return if (getAlertsRequest.alertIndex.isNullOrEmpty()) AlertIndices.ALL_ALERT_INDEX_PATTERN + else getAlertsRequest.alertIndex!! + } + + fun resolveAssociatedAlertsIndexName(getAlertsRequest: GetWorkflowAlertsRequest): String { + return if (getAlertsRequest.alertIndex.isNullOrEmpty()) AlertIndices.ALL_ALERT_INDEX_PATTERN + else getAlertsRequest.associatedAlertsIndex!! + } + + suspend fun getAlerts( + getWorkflowAlertsRequest: GetWorkflowAlertsRequest, + alertIndex: String, + searchSourceBuilder: SearchSourceBuilder, + actionListener: ActionListener, + user: User?, + ) { + // user is null when: 1/ security is disabled. 2/when user is super-admin. + if (user == null) { + // user is null when: 1/ security is disabled. 2/when user is super-admin. + search(getWorkflowAlertsRequest, alertIndex, searchSourceBuilder, actionListener) + } else if (!doFilterForUser(user)) { + // security is enabled and filterby is disabled. + search(getWorkflowAlertsRequest, alertIndex, searchSourceBuilder, actionListener) + } else { + // security is enabled and filterby is enabled. + try { + log.info("Filtering result by: ${user.backendRoles}") + addFilter(user, searchSourceBuilder, "monitor_user.backend_roles.keyword") + search(getWorkflowAlertsRequest, alertIndex, searchSourceBuilder, actionListener) + } catch (ex: IOException) { + actionListener.onFailure(AlertingException.wrap(ex)) + } + } + } + + suspend fun search( + getWorkflowAlertsRequest: GetWorkflowAlertsRequest, + alertIndex: String, + searchSourceBuilder: SearchSourceBuilder, + actionListener: ActionListener, + ) { + try { + val searchRequest = SearchRequest() + .indices(alertIndex) + .source(searchSourceBuilder) + val alerts = mutableListOf() + val associatedAlerts = mutableListOf() + + val response: SearchResponse = client.suspendUntil { search(searchRequest, it) } + val totalAlertCount = response.hits.totalHits?.value?.toInt() + alerts.addAll( + parseAlertsFromSearchResponse(response) + ) + if (alerts.isNotEmpty() && getWorkflowAlertsRequest.getAssociatedAlerts == true) + getAssociatedAlerts(associatedAlerts, alerts, resolveAssociatedAlertsIndexName(getWorkflowAlertsRequest)) + actionListener.onResponse(GetWorkflowAlertsResponse(alerts, associatedAlerts, totalAlertCount)) + } catch (e: Exception) { + actionListener.onFailure(AlertingException("Failed to get alerts", RestStatus.INTERNAL_SERVER_ERROR, e)) + } + } + + private suspend fun getAssociatedAlerts(associatedAlerts: MutableList, alerts: MutableList, alertIndex: String) { + try { + val associatedAlertIds = mutableSetOf() + alerts.forEach { associatedAlertIds.addAll(it.associatedAlertIds) } + if (associatedAlertIds.isEmpty()) return + val queryBuilder = QueryBuilders.boolQuery() + queryBuilder.must(QueryBuilders.termsQuery("_id", associatedAlertIds)) + queryBuilder.must(QueryBuilders.termQuery(Alert.STATE_FIELD, Alert.State.AUDIT)) + val searchRequest = SearchRequest(alertIndex) + searchRequest.source().query(queryBuilder) + val response: SearchResponse = client.suspendUntil { search(searchRequest, it) } + associatedAlerts.addAll(parseAlertsFromSearchResponse(response)) + } catch (e: Exception) { + log.error("Failed to get associated alerts in get workflow alerts action", e) + } + } + + private fun parseAlertsFromSearchResponse(response: SearchResponse) = response.hits.map { hit -> + val xcp = XContentHelper.createParser( + xContentRegistry, + LoggingDeprecationHandler.INSTANCE, + hit.sourceRef, + XContentType.JSON + ) + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + val alert = Alert.parse(xcp, hit.id, hit.version) + alert + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/util/ScheduledJobUtils.kt b/alerting/src/main/kotlin/org/opensearch/alerting/util/ScheduledJobUtils.kt new file mode 100644 index 000000000..8e3a946e8 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/util/ScheduledJobUtils.kt @@ -0,0 +1,70 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.util + +import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchStatusException +import org.opensearch.action.get.GetResponse +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.ScheduledJob +import org.opensearch.commons.alerting.model.Workflow +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.rest.RestStatus + +private val log = LogManager.getLogger(ScheduledJobUtils::class.java) + +class ScheduledJobUtils { + companion object { + const val WORKFLOW_DELEGATE_PATH = "workflow.inputs.composite_input.sequence.delegates" + const val WORKFLOW_MONITOR_PATH = "workflow.inputs.composite_input.sequence.delegates.monitor_id" + fun parseWorkflowFromScheduledJobDocSource(xContentRegistry: NamedXContentRegistry, response: GetResponse): Workflow { + XContentHelper.createParser( + xContentRegistry, LoggingDeprecationHandler.INSTANCE, + response.sourceAsBytesRef, XContentType.JSON + ).use { xcp -> + try { + val workflow = ScheduledJob.parse(xcp, response.id, response.version) + if (workflow is Workflow) { + return workflow + } else { + log.error("Unable to parse workflow from ${response.source}") + throw OpenSearchStatusException( + "Unable to parse workflow from ${response.source}", + RestStatus.INTERNAL_SERVER_ERROR + ) + } + } catch (e: java.lang.Exception) { + throw AlertingException("Unable to parse workflow from ${response.source}", RestStatus.INTERNAL_SERVER_ERROR, e) + } + } + } + + fun parseMonitorFromScheduledJobDocSource(xContentRegistry: NamedXContentRegistry, response: GetResponse): Monitor { + XContentHelper.createParser( + xContentRegistry, LoggingDeprecationHandler.INSTANCE, + response.sourceAsBytesRef, XContentType.JSON + ).use { xcp -> + try { + val monitor = ScheduledJob.parse(xcp, response.id, response.version) + if (monitor is Monitor) { + return monitor + } else { + log.error("Unable to parse monitor from ${response.source}") + throw OpenSearchStatusException( + "Unable to parse monitor from ${response.source}", + RestStatus.INTERNAL_SERVER_ERROR + ) + } + } catch (e: java.lang.Exception) { + throw AlertingException("Unable to parse monitor from ${response.source}", RestStatus.INTERNAL_SERVER_ERROR, e) + } + } + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt index 013ee3056..ce74e6ec3 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt @@ -6,28 +6,45 @@ package org.opensearch.alerting.workflow import org.apache.logging.log4j.LogManager -import org.opensearch.ExceptionsHelper +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse import org.opensearch.alerting.BucketLevelMonitorRunner import org.opensearch.alerting.DocumentLevelMonitorRunner import org.opensearch.alerting.MonitorRunnerExecutionContext import org.opensearch.alerting.QueryLevelMonitorRunner import org.opensearch.alerting.WorkflowMetadataService +import org.opensearch.alerting.model.ChainedAlertTriggerRunResult import org.opensearch.alerting.model.MonitorRunResult import org.opensearch.alerting.model.WorkflowRunResult +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.script.ChainedAlertTriggerExecutionContext import org.opensearch.alerting.util.AlertingException import org.opensearch.alerting.util.isDocLevelMonitor import org.opensearch.alerting.util.isQueryLevelMonitor +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentParserUtils +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.ChainedAlertTrigger import org.opensearch.commons.alerting.model.CompositeInput +import org.opensearch.commons.alerting.model.DataSources import org.opensearch.commons.alerting.model.Delegate import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.Workflow import org.opensearch.commons.alerting.util.isBucketLevelMonitor +import org.opensearch.core.xcontent.XContentParser +import org.opensearch.index.query.QueryBuilders +import org.opensearch.index.query.QueryBuilders.boolQuery +import org.opensearch.index.query.QueryBuilders.existsQuery +import org.opensearch.index.query.QueryBuilders.termsQuery import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset import java.util.UUID object CompositeWorkflowRunner : WorkflowRunner() { + private val logger = LogManager.getLogger(javaClass) override suspend fun runWorkflow( @@ -35,7 +52,7 @@ object CompositeWorkflowRunner : WorkflowRunner() { monitorCtx: MonitorRunnerExecutionContext, periodStart: Instant, periodEnd: Instant, - dryRun: Boolean + dryRun: Boolean, ): WorkflowRunResult { val workflowExecutionStartTime = Instant.now() @@ -48,9 +65,7 @@ object CompositeWorkflowRunner : WorkflowRunner() { skipIndex = isTempWorkflow, executionId = executionId ) - - var workflowResult = WorkflowRunResult(mutableListOf(), workflowExecutionStartTime, null, executionId) - + var dataSources: DataSources? = null logger.debug("Workflow ${workflow.id} in $executionId execution is running") val delegates = (workflow.inputs[0] as CompositeInput).sequence.delegates.sortedBy { it.order } var monitors: List @@ -59,11 +74,18 @@ object CompositeWorkflowRunner : WorkflowRunner() { monitors = monitorCtx.workflowService!!.getMonitorsById(delegates.map { it.monitorId }, delegates.size) } catch (e: Exception) { logger.error("Failed getting workflow delegates. Error: ${e.message}", e) - return workflowResult.copy(error = AlertingException.wrap(e)) + return WorkflowRunResult( + workflow.id, + workflow.name, + emptyList(), + workflowExecutionStartTime, + Instant.now(), + executionId, + AlertingException.wrap(e) + ) } // Validate the monitors size validateMonitorSize(delegates, monitors, workflow) - val monitorsById = monitors.associateBy { it.id } val resultList = mutableListOf>() var lastErrorDelegateRun: Exception? = null @@ -80,71 +102,36 @@ object CompositeWorkflowRunner : WorkflowRunner() { ?: throw AlertingException.wrap( IllegalStateException("Chained finding monitor not found ${delegate.monitorId} for the workflow $workflow.id") ) - try { indexToDocIds = monitorCtx.workflowService!!.getFindingDocIdsByExecutionId(chainedMonitor, executionId) } catch (e: Exception) { - val unwrappedException = ExceptionsHelper.unwrapCause(e) as Exception - // If it is not IndexNotFound exception return the result - if (unwrappedException.message?.contains("Configured indices are not found") == false) { - logger.error("Failed to execute workflow. Error: ${e.message}", e) - return workflowResult.copy(error = AlertingException.wrap(e)) - } - // Log that finding index is not found and proceed with the execution - logger.error("Finding index ${chainedMonitor.dataSources.findingsIndex} doesn't exist") + logger.error("Failed to execute workflow due to failure in chained findings. Error: ${e.message}", e) + return WorkflowRunResult( + workflow.id, workflow.name, emptyList(), workflowExecutionStartTime, Instant.now(), executionId, + AlertingException.wrap(e) + ) } } - val workflowRunContext = WorkflowRunContext( workflowId = workflowMetadata.workflowId, workflowMetadataId = workflowMetadata.id, chainedMonitorId = delegate.chainedMonitorFindings?.monitorId, - executionId = executionId, - matchingDocIdsPerIndex = indexToDocIds + matchingDocIdsPerIndex = indexToDocIds, + auditDelegateMonitorAlerts = if (workflow.auditDelegateMonitorAlerts == null) true + else workflow.auditDelegateMonitorAlerts!! ) - - var delegateRunResult: MonitorRunResult<*>? try { - delegateRunResult = if (delegateMonitor.isBucketLevelMonitor()) { - BucketLevelMonitorRunner.runMonitor( - delegateMonitor, - monitorCtx, - periodStart, - periodEnd, - dryRun, - workflowRunContext - ) - } else if (delegateMonitor.isDocLevelMonitor()) { - DocumentLevelMonitorRunner.runMonitor( - delegateMonitor, - monitorCtx, - periodStart, - periodEnd, - dryRun, - workflowRunContext - ) - } else if (delegateMonitor.isQueryLevelMonitor()) { - QueryLevelMonitorRunner.runMonitor( - delegateMonitor, - monitorCtx, - periodStart, - periodEnd, - dryRun, - workflowRunContext - ) - } else { - throw AlertingException.wrap( - IllegalStateException("Unsupported monitor type") - ) - } + dataSources = delegateMonitor.dataSources + val delegateRunResult = + runDelegateMonitor(delegateMonitor, monitorCtx, periodStart, periodEnd, dryRun, workflowRunContext, executionId) + resultList.add(delegateRunResult!!) } catch (ex: Exception) { - logger.error("Error executing workflow delegate. Error: ${ex.message}", ex) + logger.error("Error executing workflow delegate monitor ${delegate.monitorId}", ex) lastErrorDelegateRun = AlertingException.wrap(ex) - continue + break } - if (delegateRunResult != null) resultList.add(delegateRunResult) } - logger.debug("Workflow ${workflow.id} in $executionId finished") + logger.debug("Workflow ${workflow.id} delegate monitors in execution $executionId completed") // Update metadata only if the workflow is not temp if (!isTempWorkflow) { WorkflowMetadataService.upsertWorkflowMetadata( @@ -152,15 +139,109 @@ object CompositeWorkflowRunner : WorkflowRunner() { true ) } + val triggerResults = mutableMapOf() + val workflowRunResult = WorkflowRunResult( + workflowId = workflow.id, + workflowName = workflow.name, + monitorRunResults = resultList, + executionStartTime = workflowExecutionStartTime, + executionEndTime = null, + executionId = executionId, + error = lastErrorDelegateRun, + triggerResults = triggerResults + ) + if (dataSources != null) { + try { + monitorCtx.alertIndices!!.createOrUpdateAlertIndex(dataSources) + val monitorIdToAlertIdsMap = fetchAlertsGeneratedInCurrentExecution(dataSources, executionId, monitorCtx, workflow) + for (trigger in workflow.triggers) { + val caTrigger = trigger as ChainedAlertTrigger + val triggerCtx = ChainedAlertTriggerExecutionContext( + workflow = workflow, + workflowRunResult = workflowRunResult, + trigger = caTrigger, + alertGeneratingMonitors = monitorIdToAlertIdsMap.keys, + monitorIdToAlertIdsMap = monitorIdToAlertIdsMap + ) + runChainedAlertTrigger(dataSources, monitorCtx, workflow, trigger, executionId, triggerCtx, dryRun, triggerResults) + } + } catch (e: Exception) { + // We can't save ERROR alerts to the index here as we don't know if there are existing ACTIVE alerts + val id = if (workflow.id.trim().isEmpty()) "_na_" else workflow.id + logger.error("Error loading current chained alerts for workflow: $id", e) + return WorkflowRunResult( + workflowId = workflow.id, + workflowName = workflow.name, + monitorRunResults = emptyList(), + executionStartTime = workflowExecutionStartTime, + executionEndTime = Instant.now(), + executionId = executionId, + error = AlertingException.wrap(e), + triggerResults = emptyMap() + ) + } + } + workflowRunResult.executionEndTime = Instant.now() - return workflowResult.copy(workflowRunResult = resultList, executionEndTime = Instant.now(), error = lastErrorDelegateRun) + val sr = SearchRequest(dataSources!!.alertsIndex) + sr.source().query(QueryBuilders.matchAllQuery()).size(10) + val searchResponse: SearchResponse = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.search(sr, it) } + searchResponse.hits + return workflowRunResult + } + + private suspend fun runDelegateMonitor( + delegateMonitor: Monitor, + monitorCtx: MonitorRunnerExecutionContext, + periodStart: Instant, + periodEnd: Instant, + dryRun: Boolean, + workflowRunContext: WorkflowRunContext, + executionId: String, + ): MonitorRunResult<*>? { + + if (delegateMonitor.isBucketLevelMonitor()) { + return BucketLevelMonitorRunner.runMonitor( + delegateMonitor, + monitorCtx, + periodStart, + periodEnd, + dryRun, + workflowRunContext, + executionId + ) + } else if (delegateMonitor.isDocLevelMonitor()) { + return DocumentLevelMonitorRunner.runMonitor( + delegateMonitor, + monitorCtx, + periodStart, + periodEnd, + dryRun, + workflowRunContext, + executionId + ) + } else if (delegateMonitor.isQueryLevelMonitor()) { + return QueryLevelMonitorRunner.runMonitor( + delegateMonitor, + monitorCtx, + periodStart, + periodEnd, + dryRun, + workflowRunContext, + executionId + ) + } else { + throw AlertingException.wrap( + IllegalStateException("Unsupported monitor type ${delegateMonitor.monitorType}") + ) + } } private fun generateExecutionId( isTempWorkflow: Boolean, workflow: Workflow, ): String { - val randomPart = "${LocalDateTime.now(ZoneOffset.UTC)}${UUID.randomUUID()}" + val randomPart = "_${LocalDateTime.now(ZoneOffset.UTC)}_${UUID.randomUUID()}" return if (isTempWorkflow) randomPart else workflow.id.plus(randomPart) } @@ -177,4 +258,101 @@ object CompositeWorkflowRunner : WorkflowRunner() { ) } } + + private suspend fun runChainedAlertTrigger( + dataSources: DataSources, + monitorCtx: MonitorRunnerExecutionContext, + workflow: Workflow, + trigger: ChainedAlertTrigger, + executionId: String, + triggerCtx: ChainedAlertTriggerExecutionContext, + dryRun: Boolean, + triggerResults: MutableMap, + ) { + val triggerRunResult = monitorCtx.triggerService!!.runChainedAlertTrigger( + workflow, trigger, triggerCtx.alertGeneratingMonitors, triggerCtx.monitorIdToAlertIdsMap + ) + triggerResults[trigger.id] = triggerRunResult + if (triggerRunResult.triggered) { + val actionCtx = triggerCtx + for (action in trigger.actions) { + triggerRunResult.actionResults[action.id] = this.runAction(action, actionCtx, monitorCtx, workflow, dryRun) + } + val alert = monitorCtx.alertService!!.composeChainedAlert( + triggerCtx, executionId, workflow, triggerRunResult.associatedAlertIds.toList() + ) + if (!dryRun && workflow.id != Workflow.NO_ID) { + monitorCtx.retryPolicy?.let { + monitorCtx.alertService!!.saveAlerts( + dataSources, + listOf(alert), + it, + routingId = workflow.id + ) + } + } + } + } + + private suspend fun fetchAlertsGeneratedInCurrentExecution( + dataSources: DataSources, + executionId: String, + monitorCtx: MonitorRunnerExecutionContext, + workflow: Workflow, + ): MutableMap> { + try { + val searchRequest = + SearchRequest(getDelegateMonitorAlertIndex(dataSources, workflow, monitorCtx.alertIndices!!.isAlertHistoryEnabled())) + val queryBuilder = boolQuery() + queryBuilder.must(QueryBuilders.termQuery("execution_id", executionId)) + queryBuilder.must(QueryBuilders.termQuery("state", getDelegateMonitorAlertState(workflow))) + val noErrorQuery = boolQuery() + .should(boolQuery().mustNot(existsQuery(Alert.ERROR_MESSAGE_FIELD))) + .should(termsQuery(Alert.ERROR_MESSAGE_FIELD, "")) + queryBuilder.must(noErrorQuery) + searchRequest.source().query(queryBuilder) + val searchResponse: SearchResponse = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.search(searchRequest, it) } + val alerts = searchResponse.hits.map { hit -> + val xcp = XContentHelper.createParser( + monitorCtx.xContentRegistry, LoggingDeprecationHandler.INSTANCE, + hit.sourceRef, XContentType.JSON + ) + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + val alert = Alert.parse(xcp, hit.id, hit.version) + alert + } + val map = mutableMapOf>() + for (alert in alerts) { + if (map.containsKey(alert.monitorId)) { + map[alert.monitorId]!!.add(alert.id) + } else { + map[alert.monitorId] = mutableSetOf(alert.id) + } + } + return map + } catch (e: Exception) { + logger.error("failed to get alerts generated by delegate monitors in current execution $executionId", e) + return mutableMapOf() + } + } + + fun getDelegateMonitorAlertIndex( + dataSources: DataSources, + workflow: Workflow, + isAlertHistoryEnabled: Boolean, + ): String { + return if (workflow.triggers.isNotEmpty()) { + if (isAlertHistoryEnabled) { + dataSources.alertsHistoryIndex!! + } else dataSources.alertsIndex + } else dataSources.alertsIndex + } + + fun getDelegateMonitorAlertState( + workflow: Workflow, + ): Alert.State { + return if (workflow.triggers.isNotEmpty()) { + Alert.State.AUDIT + } else Alert.State.ACTIVE + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt index 60285e70b..14488a16a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt @@ -10,6 +10,6 @@ data class WorkflowRunContext( val workflowId: String, val workflowMetadataId: String, val chainedMonitorId: String?, - val executionId: String, - val matchingDocIdsPerIndex: Map> + val matchingDocIdsPerIndex: Map>, + val auditDelegateMonitorAlerts: Boolean ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt index a7272a3dc..60afd5f6a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt @@ -5,9 +5,33 @@ package org.opensearch.alerting.workflow +import org.opensearch.OpenSearchSecurityException import org.opensearch.alerting.MonitorRunnerExecutionContext +import org.opensearch.alerting.MonitorRunnerService +import org.opensearch.alerting.action.GetDestinationsAction +import org.opensearch.alerting.action.GetDestinationsRequest +import org.opensearch.alerting.action.GetDestinationsResponse +import org.opensearch.alerting.model.ActionRunResult import org.opensearch.alerting.model.WorkflowRunResult +import org.opensearch.alerting.model.destination.Destination +import org.opensearch.alerting.opensearchapi.InjectorContextElement +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.opensearchapi.withClosableContext +import org.opensearch.alerting.script.ChainedAlertTriggerExecutionContext +import org.opensearch.alerting.util.destinationmigration.NotificationActionConfigs +import org.opensearch.alerting.util.destinationmigration.NotificationApiUtils +import org.opensearch.alerting.util.destinationmigration.getTitle +import org.opensearch.alerting.util.destinationmigration.publishLegacyNotification +import org.opensearch.alerting.util.destinationmigration.sendNotification +import org.opensearch.alerting.util.isAllowed +import org.opensearch.alerting.util.isTestAction +import org.opensearch.client.node.NodeClient +import org.opensearch.commons.alerting.model.Table import org.opensearch.commons.alerting.model.Workflow +import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.commons.notifications.model.NotificationConfigInfo +import org.opensearch.script.Script +import org.opensearch.script.TemplateScript import java.time.Instant abstract class WorkflowRunner { @@ -18,4 +42,154 @@ abstract class WorkflowRunner { periodEnd: Instant, dryRun: Boolean ): WorkflowRunResult + + suspend fun runAction( + action: Action, + ctx: ChainedAlertTriggerExecutionContext, + monitorCtx: MonitorRunnerExecutionContext, + workflow: Workflow, + dryrun: Boolean + ): ActionRunResult { + return try { + val actionOutput = mutableMapOf() + actionOutput[Action.SUBJECT] = if (action.subjectTemplate != null) { + compileTemplate(action.subjectTemplate!!, ctx) + } else "" + actionOutput[Action.MESSAGE] = compileTemplate(action.messageTemplate, ctx) + if (actionOutput[Action.MESSAGE].isNullOrEmpty()) { + throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") + } + if (!dryrun) { + val client = monitorCtx.client + client!!.threadPool().threadContext.stashContext().use { + withClosableContext( + InjectorContextElement( + workflow.id, + monitorCtx.settings!!, + monitorCtx.threadPool!!.threadContext, + workflow.user?.roles, + workflow.user + ) + ) { + actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( + action, + monitorCtx, + actionOutput[Action.SUBJECT], + actionOutput[Action.MESSAGE]!! + ) + } + } + } + ActionRunResult(action.id, action.name, actionOutput, false, MonitorRunnerService.currentTime(), null) + } catch (e: Exception) { + ActionRunResult(action.id, action.name, mapOf(), false, MonitorRunnerService.currentTime(), e) + } + } + + protected suspend fun getConfigAndSendNotification( + action: Action, + monitorCtx: MonitorRunnerExecutionContext, + subject: String?, + message: String + ): String { + val config = getConfigForNotificationAction(action, monitorCtx) + if (config.destination == null && config.channel == null) { + throw IllegalStateException("Unable to find a Notification Channel or Destination config with id [${action.destinationId}]") + } + + // Adding a check on TEST_ACTION Destination type here to avoid supporting it as a LegacyBaseMessage type + // just for Alerting integration tests + if (config.destination?.isTestAction() == true) { + return "test action" + } + + if (config.destination?.isAllowed(monitorCtx.allowList) == false) { + throw IllegalStateException( + "Monitor contains a Destination type that is not allowed: ${config.destination.type}" + ) + } + + var actionResponseContent = "" + actionResponseContent = config.channel + ?.sendNotification( + monitorCtx.client!!, + config.channel.getTitle(subject), + message + ) ?: actionResponseContent + + actionResponseContent = config.destination + ?.buildLegacyBaseMessage(subject, message, monitorCtx.destinationContextFactory!!.getDestinationContext(config.destination)) + ?.publishLegacyNotification(monitorCtx.client!!) + ?: actionResponseContent + + return actionResponseContent + } + + /** + * The "destination" ID referenced in a Monitor Action could either be a Notification config or a Destination config + * depending on whether the background migration process has already migrated it from a Destination to a Notification config. + * + * To cover both of these cases, the Notification config will take precedence and if it is not found, the Destination will be retrieved. + */ + private suspend fun getConfigForNotificationAction( + action: Action, + monitorCtx: MonitorRunnerExecutionContext + ): NotificationActionConfigs { + var destination: Destination? = null + var notificationPermissionException: Exception? = null + + var channel: NotificationConfigInfo? = null + try { + channel = NotificationApiUtils.getNotificationConfigInfo(monitorCtx.client as NodeClient, action.destinationId) + } catch (e: OpenSearchSecurityException) { + notificationPermissionException = e + } + + // If the channel was not found, try to retrieve the Destination + if (channel == null) { + destination = try { + val table = Table( + "asc", + "destination.name.keyword", + null, + 1, + 0, + null + ) + val getDestinationsRequest = GetDestinationsRequest( + action.destinationId, + 0L, + null, + table, + "ALL" + ) + + val getDestinationsResponse: GetDestinationsResponse = monitorCtx.client!!.suspendUntil { + monitorCtx.client!!.execute(GetDestinationsAction.INSTANCE, getDestinationsRequest, it) + } + getDestinationsResponse.destinations.firstOrNull() + } catch (e: IllegalStateException) { + // Catching the exception thrown when the Destination was not found so the NotificationActionConfigs object can be returned + null + } catch (e: OpenSearchSecurityException) { + if (notificationPermissionException != null) { + throw notificationPermissionException + } else { + throw e + } + } + + if (destination == null && notificationPermissionException != null) { + throw notificationPermissionException + } + } + + return NotificationActionConfigs(destination, channel) + } + + internal fun compileTemplate(template: Script, ctx: ChainedAlertTriggerExecutionContext): String { + return MonitorRunnerService.monitorCtx.scriptService!!.compile(template, TemplateScript.CONTEXT) + .newInstance(template.params + mapOf("ctx" to ctx.asTemplateArg())) + .execute() + } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunnerService.kt deleted file mode 100644 index 5ea979119..000000000 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunnerService.kt +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.alerting.workflow - -import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.Job -import kotlinx.coroutines.SupervisorJob -import kotlinx.coroutines.launch -import org.apache.logging.log4j.LogManager -import org.opensearch.action.bulk.BackoffPolicy -import org.opensearch.alerting.AlertService -import org.opensearch.alerting.InputService -import org.opensearch.alerting.MonitorRunnerExecutionContext -import org.opensearch.alerting.TriggerService -import org.opensearch.alerting.WorkflowService -import org.opensearch.alerting.alerts.AlertIndices -import org.opensearch.alerting.core.JobRunner -import org.opensearch.alerting.model.WorkflowRunResult -import org.opensearch.alerting.model.destination.DestinationContextFactory -import org.opensearch.alerting.script.TriggerExecutionContext -import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_COUNT -import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_MILLIS -import org.opensearch.alerting.settings.AlertingSettings.Companion.INDEX_TIMEOUT -import org.opensearch.alerting.settings.AlertingSettings.Companion.MAX_ACTIONABLE_ALERT_COUNT -import org.opensearch.alerting.settings.AlertingSettings.Companion.MOVE_ALERTS_BACKOFF_COUNT -import org.opensearch.alerting.settings.AlertingSettings.Companion.MOVE_ALERTS_BACKOFF_MILLIS -import org.opensearch.alerting.settings.DestinationSettings.Companion.ALLOW_LIST -import org.opensearch.alerting.settings.DestinationSettings.Companion.HOST_DENY_LIST -import org.opensearch.alerting.settings.DestinationSettings.Companion.loadDestinationSettings -import org.opensearch.alerting.util.DocLevelMonitorQueries -import org.opensearch.client.Client -import org.opensearch.cluster.metadata.IndexNameExpressionResolver -import org.opensearch.cluster.service.ClusterService -import org.opensearch.common.component.AbstractLifecycleComponent -import org.opensearch.common.settings.Settings -import org.opensearch.commons.alerting.model.Alert -import org.opensearch.commons.alerting.model.Monitor -import org.opensearch.commons.alerting.model.ScheduledJob -import org.opensearch.commons.alerting.model.Workflow -import org.opensearch.commons.alerting.model.action.Action -import org.opensearch.core.xcontent.NamedXContentRegistry -import org.opensearch.script.Script -import org.opensearch.script.ScriptService -import org.opensearch.script.TemplateScript -import org.opensearch.threadpool.ThreadPool -import java.time.Instant -import kotlin.coroutines.CoroutineContext - -object WorkflowRunnerService : JobRunner, CoroutineScope, AbstractLifecycleComponent() { - - private val logger = LogManager.getLogger(javaClass) - - var monitorCtx: MonitorRunnerExecutionContext = MonitorRunnerExecutionContext() - private lateinit var runnerSupervisor: Job - override val coroutineContext: CoroutineContext - get() = Dispatchers.Default + runnerSupervisor - - fun registerClusterService(clusterService: ClusterService): WorkflowRunnerService { - monitorCtx.clusterService = clusterService - return this - } - - fun registerClient(client: Client): WorkflowRunnerService { - monitorCtx.client = client - return this - } - - fun registerNamedXContentRegistry(xContentRegistry: NamedXContentRegistry): WorkflowRunnerService { - monitorCtx.xContentRegistry = xContentRegistry - return this - } - - fun registerScriptService(scriptService: ScriptService): WorkflowRunnerService { - monitorCtx.scriptService = scriptService - return this - } - - fun registerIndexNameExpressionResolver(indexNameExpressionResolver: IndexNameExpressionResolver): WorkflowRunnerService { - monitorCtx.indexNameExpressionResolver = indexNameExpressionResolver - return this - } - - fun registerSettings(settings: Settings): WorkflowRunnerService { - monitorCtx.settings = settings - return this - } - - fun registerThreadPool(threadPool: ThreadPool): WorkflowRunnerService { - monitorCtx.threadPool = threadPool - return this - } - - fun registerAlertIndices(alertIndices: AlertIndices): WorkflowRunnerService { - monitorCtx.alertIndices = alertIndices - return this - } - - fun registerInputService(inputService: InputService): WorkflowRunnerService { - monitorCtx.inputService = inputService - return this - } - - fun registerWorkflowService(workflowService: WorkflowService): WorkflowRunnerService { - monitorCtx.workflowService = workflowService - return this - } - - fun registerTriggerService(triggerService: TriggerService): WorkflowRunnerService { - monitorCtx.triggerService = triggerService - return this - } - - fun registerAlertService(alertService: AlertService): WorkflowRunnerService { - monitorCtx.alertService = alertService - return this - } - - fun registerDocLevelMonitorQueries(docLevelMonitorQueries: DocLevelMonitorQueries): WorkflowRunnerService { - monitorCtx.docLevelMonitorQueries = docLevelMonitorQueries - return this - } - - // Must be called after registerClusterService and registerSettings in AlertingPlugin - fun registerConsumers(): WorkflowRunnerService { - monitorCtx.retryPolicy = BackoffPolicy.constantBackoff( - ALERT_BACKOFF_MILLIS.get(monitorCtx.settings), - ALERT_BACKOFF_COUNT.get(monitorCtx.settings) - ) - monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(ALERT_BACKOFF_MILLIS, ALERT_BACKOFF_COUNT) { millis, count -> - monitorCtx.retryPolicy = BackoffPolicy.constantBackoff(millis, count) - } - - monitorCtx.moveAlertsRetryPolicy = - BackoffPolicy.exponentialBackoff( - MOVE_ALERTS_BACKOFF_MILLIS.get(monitorCtx.settings), - MOVE_ALERTS_BACKOFF_COUNT.get(monitorCtx.settings) - ) - monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(MOVE_ALERTS_BACKOFF_MILLIS, MOVE_ALERTS_BACKOFF_COUNT) { - millis, count -> - monitorCtx.moveAlertsRetryPolicy = BackoffPolicy.exponentialBackoff(millis, count) - } - - monitorCtx.allowList = ALLOW_LIST.get(monitorCtx.settings) - monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(ALLOW_LIST) { - monitorCtx.allowList = it - } - - // Host deny list is not a dynamic setting so no consumer is registered but the variable is set here - monitorCtx.hostDenyList = HOST_DENY_LIST.get(monitorCtx.settings) - - monitorCtx.maxActionableAlertCount = MAX_ACTIONABLE_ALERT_COUNT.get(monitorCtx.settings) - monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(MAX_ACTIONABLE_ALERT_COUNT) { - monitorCtx.maxActionableAlertCount = it - } - - monitorCtx.indexTimeout = INDEX_TIMEOUT.get(monitorCtx.settings) - - return this - } - - // To be safe, call this last as it depends on a number of other components being registered beforehand (client, settings, etc.) - fun registerDestinationSettings(): WorkflowRunnerService { - monitorCtx.destinationSettings = loadDestinationSettings(monitorCtx.settings!!) - monitorCtx.destinationContextFactory = - DestinationContextFactory(monitorCtx.client!!, monitorCtx.xContentRegistry!!, monitorCtx.destinationSettings!!) - return this - } - - // Updates destination settings when the reload API is called so that new keystore values are visible - fun reloadDestinationSettings(settings: Settings) { - monitorCtx.destinationSettings = loadDestinationSettings(settings) - - // Update destinationContextFactory as well since destinationSettings has been updated - monitorCtx.destinationContextFactory!!.updateDestinationSettings(monitorCtx.destinationSettings!!) - } - - override fun doStart() { - runnerSupervisor = SupervisorJob() - } - - override fun doStop() { - runnerSupervisor.cancel() - } - - override fun doClose() { } - - override fun postIndex(job: ScheduledJob) { - } - - override fun postDelete(jobId: String) { - } - - override fun runJob(job: ScheduledJob, periodStart: Instant, periodEnd: Instant) { - if (job !is Workflow) { - throw IllegalArgumentException("Invalid job type") - } - launch { - runJob(job, periodStart, periodEnd, false) - } - } - - suspend fun runJob(job: ScheduledJob, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): WorkflowRunResult { - val workflow = job as Workflow - return CompositeWorkflowRunner.runWorkflow(workflow, monitorCtx, periodStart, periodEnd, dryrun) - } - - // TODO: See if we can move below methods (or few of these) to a common utils - internal fun getRolesForMonitor(monitor: Monitor): List { - /* - * We need to handle 3 cases: - * 1. Monitors created by older versions and never updated. These monitors wont have User details in the - * monitor object. `monitor.user` will be null. Insert `all_access, AmazonES_all_access` role. - * 2. Monitors are created when security plugin is disabled, these will have empty User object. - * (`monitor.user.name`, `monitor.user.roles` are empty ) - * 3. Monitors are created when security plugin is enabled, these will have an User object. - */ - return if (monitor.user == null) { - // fixme: discuss and remove hardcoded to settings? - // TODO: Remove "AmazonES_all_access" role? - monitorCtx.settings!!.getAsList("", listOf("all_access", "AmazonES_all_access")) - } else { - monitor.user!!.roles - } - } - - // TODO: Can this be updated to just use 'Instant.now()'? - // 'threadPool.absoluteTimeInMillis()' is referring to a cached value of System.currentTimeMillis() that by default updates every 200ms - internal fun currentTime() = Instant.ofEpochMilli(monitorCtx.threadPool!!.absoluteTimeInMillis()) - - internal fun isActionActionable(action: Action, alert: Alert?): Boolean { - if (alert == null || action.throttle == null) { - return true - } - if (action.throttleEnabled) { - val result = alert.actionExecutionResults.firstOrNull { r -> r.actionId == action.id } - val lastExecutionTime: Instant? = result?.lastExecutionTime - val throttledTimeBound = currentTime().minus(action.throttle!!.value.toLong(), action.throttle!!.unit) - return (lastExecutionTime == null || lastExecutionTime.isBefore(throttledTimeBound)) - } - return true - } - - internal fun compileTemplate(template: Script, ctx: TriggerExecutionContext): String { - return monitorCtx.scriptService!!.compile(template, TemplateScript.CONTEXT) - .newInstance(template.params + mapOf("ctx" to ctx.asTemplateArg())) - .execute() - } -} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/AlertServiceTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/AlertServiceTests.kt index fd4b26ef1..8583ae0db 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/AlertServiceTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/AlertServiceTests.kt @@ -84,11 +84,7 @@ class AlertServiceTests : OpenSearchTestCase() { ) val categorizedAlerts = alertService.getCategorizedAlertsForBucketLevelMonitor( - monitor, - trigger, - currentAlerts, - aggResultBuckets, - emptyList() + monitor, trigger, currentAlerts, aggResultBuckets, emptyList(), "", null ) // Completed Alerts are what remains in currentAlerts after categorization val completedAlerts = currentAlerts.values.toList() @@ -123,11 +119,7 @@ class AlertServiceTests : OpenSearchTestCase() { ) val categorizedAlerts = alertService.getCategorizedAlertsForBucketLevelMonitor( - monitor, - trigger, - currentAlerts, - aggResultBuckets, - emptyList() + monitor, trigger, currentAlerts, aggResultBuckets, emptyList(), "", null ) // Completed Alerts are what remains in currentAlerts after categorization val completedAlerts = currentAlerts.values.toList() @@ -157,11 +149,7 @@ class AlertServiceTests : OpenSearchTestCase() { val aggResultBuckets = listOf() val categorizedAlerts = alertService.getCategorizedAlertsForBucketLevelMonitor( - monitor, - trigger, - currentAlerts, - aggResultBuckets, - emptyList() + monitor, trigger, currentAlerts, aggResultBuckets, emptyList(), "", null ) // Completed Alerts are what remains in currentAlerts after categorization val completedAlerts = currentAlerts.values.toList() @@ -196,11 +184,7 @@ class AlertServiceTests : OpenSearchTestCase() { ) val categorizedAlerts = alertService.getCategorizedAlertsForBucketLevelMonitor( - monitor, - trigger, - currentAlerts, - aggResultBuckets, - emptyList() + monitor, trigger, currentAlerts, aggResultBuckets, emptyList(), "", null ) // Completed Alerts are what remains in currentAlerts after categorization val completedAlerts = currentAlerts.values.toList() @@ -227,11 +211,7 @@ class AlertServiceTests : OpenSearchTestCase() { ) val categorizedAlerts = alertService.getCategorizedAlertsForBucketLevelMonitor( - monitor, - trigger, - currentAlerts, - aggResultBuckets, - emptyList() + monitor, trigger, currentAlerts, aggResultBuckets, emptyList(), "", null ) // Completed Alerts are what remains in currentAlerts after categorization val completedAlerts = currentAlerts.values.toList() diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt b/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt index 3c79ad683..dccb67536 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt @@ -46,6 +46,7 @@ import org.opensearch.common.xcontent.json.JsonXContent.jsonXContent import org.opensearch.commons.alerting.action.GetFindingsResponse import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.BucketLevelTrigger +import org.opensearch.commons.alerting.model.ChainedAlertTrigger import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.DocLevelQuery import org.opensearch.commons.alerting.model.DocumentLevelTrigger @@ -100,7 +101,9 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { DocLevelMonitorInput.XCONTENT_REGISTRY, QueryLevelTrigger.XCONTENT_REGISTRY, BucketLevelTrigger.XCONTENT_REGISTRY, - DocumentLevelTrigger.XCONTENT_REGISTRY + DocumentLevelTrigger.XCONTENT_REGISTRY, + Workflow.XCONTENT_REGISTRY, + ChainedAlertTrigger.XCONTENT_REGISTRY ) + SearchModule(Settings.EMPTY, emptyList()).namedXContents ) } @@ -124,7 +127,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { client: RestClient, monitor: Monitor, rbacRoles: List? = null, - refresh: Boolean = true + refresh: Boolean = true, ): Monitor { val response = client.makeRequest( "POST", @@ -175,7 +178,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { client: RestClient, workflow: Workflow, deleteDelegates: Boolean = false, - refresh: Boolean = true + refresh: Boolean = true, ): Response { val response = client.makeRequest( "DELETE", @@ -247,7 +250,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun getEmailAccount( emailAccountID: String, - header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json") + header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json"), ): EmailAccount { val response = client().makeRequest("GET", "$EMAIL_ACCOUNT_BASE_URI/$emailAccountID", null, header) assertEquals("Unable to get email account $emailAccountID", RestStatus.OK, response.restStatus()) @@ -308,7 +311,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun getEmailGroup( emailGroupID: String, - header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json") + header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json"), ): EmailGroup { val response = client().makeRequest("GET", "$EMAIL_GROUP_BASE_URI/$emailGroupID", null, header) assertEquals("Unable to get email group $emailGroupID", RestStatus.OK, response.restStatus()) @@ -392,7 +395,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun getDestinations( client: RestClient, dataMap: Map = emptyMap(), - header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json") + header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json"), ): List> { var baseEndpoint = "$DESTINATION_BASE_URI?" for (entry in dataMap.entries) { @@ -578,7 +581,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { client: RestClient, monitor: Monitor, rbacRoles: List = emptyList(), - refresh: Boolean = true + refresh: Boolean = true, ): Monitor { val response = client.makeRequest( "PUT", @@ -595,7 +598,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { client: RestClient, workflow: Workflow, rbacRoles: List = emptyList(), - refresh: Boolean = true + refresh: Boolean = true, ): Workflow { val response = client.makeRequest( "PUT", @@ -626,6 +629,16 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { "_id" -> id = parser.text() "_version" -> version = parser.longValue() "monitor" -> monitor = Monitor.parse(parser) + "associated_workflows" -> { + XContentParserUtils.ensureExpectedToken( + XContentParser.Token.START_ARRAY, + parser.currentToken(), + parser + ) + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + // do nothing + } + } } } @@ -637,7 +650,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun searchAlertsWithFilter( monitor: Monitor, indices: String = AlertIndices.ALERT_INDEX, - refresh: Boolean = true + refresh: Boolean = true, ): List { if (refresh) refreshIndex(indices) @@ -661,7 +674,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { monitorName: String = "NO_NAME", index: String = "testIndex", docLevelQueries: List = listOf(DocLevelQuery(query = "test_field:\"us-west-2\"", name = "testQuery")), - matchingDocIds: List + matchingDocIds: List, ): String { val finding = Finding( id = UUID.randomUUID().toString(), @@ -682,7 +695,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun searchFindings( monitor: Monitor, indices: String = AlertIndices.ALL_FINDING_INDEX_PATTERN, - refresh: Boolean = true + refresh: Boolean = true, ): List { if (refresh) refreshIndex(indices) @@ -743,10 +756,25 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { return response } + protected fun acknowledgeChainedAlerts(workflowId: String, vararg alertId: String): Response { + val request = jsonBuilder().startObject() + .array("alerts", *alertId.map { it }.toTypedArray()) + .endObject() + .string() + .let { StringEntity(it, APPLICATION_JSON) } + + val response = client().makeRequest( + "POST", "${AlertingPlugin.WORKFLOW_BASE_URI}/$workflowId/_acknowledge/alerts", + emptyMap(), request + ) + assertEquals("Acknowledge call failed.", RestStatus.OK, response.restStatus()) + return response + } + protected fun getAlerts( client: RestClient, dataMap: Map = emptyMap(), - header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json") + header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json"), ): Response { var baseEndpoint = "$ALERTING_BASE_URI/alerts?" for (entry in dataMap.entries) { @@ -760,7 +788,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun getAlerts( dataMap: Map = emptyMap(), - header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json") + header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json"), ): Response { return getAlerts(client(), dataMap, header) } @@ -785,6 +813,28 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { return executeWorkflow(client(), workflowId, params) } + protected fun getWorkflowAlerts( + workflowId: String, + getAssociatedAlerts: Boolean = true, + ): Response { + return getWorkflowAlerts(client(), mutableMapOf(Pair("workflowIds", workflowId), Pair("getAssociatedAlerts", getAssociatedAlerts))) + } + + protected fun getWorkflowAlerts( + client: RestClient, + dataMap: Map = emptyMap(), + header: BasicHeader = BasicHeader(HttpHeaders.CONTENT_TYPE, "application/json"), + ): Response { + var baseEndpoint = "$WORKFLOW_ALERTING_BASE_URI/alerts?" + for (entry in dataMap.entries) { + baseEndpoint += "${entry.key}=${entry.value}&" + } + + val response = client.makeRequest("GET", baseEndpoint, null, header) + assertEquals("Get call failed.", RestStatus.OK, response.restStatus()) + return response + } + protected fun executeMonitor(client: RestClient, monitorId: String, params: Map = mutableMapOf()): Response { return client.makeRequest("POST", "$ALERTING_BASE_URI/$monitorId/_execute", params) } @@ -900,7 +950,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun createTestAlias( alias: String = randomAlphaOfLength(10).lowercase(Locale.ROOT), numOfAliasIndices: Int = randomIntBetween(1, 10), - includeWriteIndex: Boolean = true + includeWriteIndex: Boolean = true, ): MutableMap> { return createTestAlias(alias = alias, indices = randomAliasIndices(alias, numOfAliasIndices, includeWriteIndex)) } @@ -911,7 +961,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { alias = alias, num = randomIntBetween(1, 10), includeWriteIndex = true - ) + ), ): MutableMap> { val indicesMap = mutableMapOf() val indicesJson = jsonBuilder().startObject().startArray("actions") @@ -936,7 +986,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { protected fun randomAliasIndices( alias: String, num: Int = randomIntBetween(1, 10), - includeWriteIndex: Boolean = true + includeWriteIndex: Boolean = true, ): Map { val indices = mutableMapOf() val writeIndex = randomIntBetween(0, num) @@ -1419,7 +1469,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { index: String, role: String, backendRoles: List, - clusterPermissions: String? + clusterPermissions: String?, ) { createUser(user, backendRoles.toTypedArray()) createTestIndex(index) @@ -1432,7 +1482,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { index: String, role: String, backendRoles: List, - clusterPermissions: List + clusterPermissions: List, ) { createUser(user, backendRoles.toTypedArray()) createTestIndex(index) @@ -1444,7 +1494,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { user: String, roles: List, backendRoles: List, - isExistingRole: Boolean + isExistingRole: Boolean, ) { createUser(user, backendRoles.toTypedArray()) for (role in roles) { @@ -1461,7 +1511,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { index: String, role: String, backendRole: String, - dlsQuery: String + dlsQuery: String, ) { createUser(user, arrayOf(backendRole)) createTestIndex(index) @@ -1475,7 +1525,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { role: String, backendRole: String, dlsQuery: String, - clusterPermissions: String? + clusterPermissions: String?, ) { createUser(user, arrayOf(backendRole)) createTestIndex(index) @@ -1548,7 +1598,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { client: RestClient, workflow: Workflow, rbacRoles: List? = null, - refresh: Boolean = true + refresh: Boolean = true, ): Workflow { val response = client.makeRequest( "POST", "$WORKFLOW_ALERTING_BASE_URI?refresh=$refresh", emptyMap(), diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt index 325b40bfc..4d33a1823 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt @@ -31,9 +31,15 @@ import org.opensearch.alerting.transport.AlertingSingleNodeTestCase import org.opensearch.alerting.util.AlertingException import org.opensearch.alerting.util.DocLevelMonitorQueries import org.opensearch.alerting.util.DocLevelMonitorQueries.Companion.INDEX_PATTERN_SUFFIX +import org.opensearch.alerting.workflow.CompositeWorkflowRunner import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.LoggingDeprecationHandler +import org.opensearch.common.xcontent.XContentHelper +import org.opensearch.common.xcontent.XContentParserUtils import org.opensearch.common.xcontent.XContentType import org.opensearch.commons.alerting.action.AcknowledgeAlertRequest +import org.opensearch.commons.alerting.action.AcknowledgeAlertResponse +import org.opensearch.commons.alerting.action.AcknowledgeChainedAlertRequest import org.opensearch.commons.alerting.action.AlertingActions import org.opensearch.commons.alerting.action.DeleteMonitorRequest import org.opensearch.commons.alerting.action.GetAlertsRequest @@ -41,6 +47,7 @@ import org.opensearch.commons.alerting.action.GetAlertsResponse import org.opensearch.commons.alerting.action.IndexMonitorResponse import org.opensearch.commons.alerting.aggregation.bucketselectorext.BucketSelectorExtAggregationBuilder import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.ChainedAlertTrigger import org.opensearch.commons.alerting.model.ChainedMonitorFindings import org.opensearch.commons.alerting.model.CompositeInput import org.opensearch.commons.alerting.model.DataSources @@ -53,9 +60,11 @@ import org.opensearch.commons.alerting.model.ScheduledJob.Companion.DOC_LEVEL_QU import org.opensearch.commons.alerting.model.ScheduledJob.Companion.SCHEDULED_JOBS_INDEX import org.opensearch.commons.alerting.model.SearchInput import org.opensearch.commons.alerting.model.Table +import org.opensearch.core.xcontent.XContentParser import org.opensearch.index.mapper.MapperService import org.opensearch.index.query.MatchQueryBuilder import org.opensearch.index.query.QueryBuilders +import org.opensearch.index.query.TermQueryBuilder import org.opensearch.rest.RestRequest import org.opensearch.rest.RestStatus import org.opensearch.script.Script @@ -2101,6 +2110,369 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { } // TODO - revisit single node integ tests setup to figure out why we cannot have multiple test classes implementing it + + fun `test execute workflow with custom alerts and finding index when bucket monitor is used in chained finding of doc monitor`() { + val query = QueryBuilders.rangeQuery("test_strict_date_time") + .gt("{{period_end}}||-10d") + .lte("{{period_end}}") + .format("epoch_millis") + val compositeSources = listOf( + TermsValuesSourceBuilder("test_field_1").field("test_field_1") + ) + val customAlertsHistoryIndex = "custom_alerts_history_index" + val customAlertsHistoryIndexPattern = "" + val compositeAgg = CompositeAggregationBuilder("composite_agg", compositeSources) + val input = SearchInput(indices = listOf(index), query = SearchSourceBuilder().size(0).query(query).aggregation(compositeAgg)) + // Bucket level monitor will reduce the size of matched doc ids on those that belong + // to a bucket that contains more than 1 document after term grouping + val triggerScript = """ + params.docCount > 1 + """.trimIndent() + + var trigger = randomBucketLevelTrigger() + trigger = trigger.copy( + bucketSelector = BucketSelectorExtAggregationBuilder( + name = trigger.id, + bucketsPathsMap = mapOf("docCount" to "_count"), + script = Script(triggerScript), + parentBucketPath = "composite_agg", + filter = null, + ) + ) + val bucketCustomAlertsIndex = "custom_alerts_index" + val bucketCustomFindingsIndex = "custom_findings_index" + val bucketCustomFindingsIndexPattern = "custom_findings_index-1" + + val bucketLevelMonitorResponse = createMonitor( + randomBucketLevelMonitor( + inputs = listOf(input), + enabled = false, + triggers = listOf(trigger), + dataSources = DataSources( + findingsEnabled = true, + alertsIndex = bucketCustomAlertsIndex, + findingsIndex = bucketCustomFindingsIndex, + findingsIndexPattern = bucketCustomFindingsIndexPattern + ) + ) + )!! + + val docQuery1 = DocLevelQuery(query = "test_field_1:\"test_value_2\"", name = "1") + val docQuery2 = DocLevelQuery(query = "test_field_1:\"test_value_1\"", name = "2") + val docQuery3 = DocLevelQuery(query = "test_field_1:\"test_value_3\"", name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1, docQuery2, docQuery3)) + val docTrigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val docCustomAlertsIndex = "custom_alerts_index" + val docCustomFindingsIndex = "custom_findings_index" + val docCustomFindingsIndexPattern = "custom_findings_index-1" + var docLevelMonitor = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(docTrigger), + dataSources = DataSources( + alertsIndex = docCustomAlertsIndex, + findingsIndex = docCustomFindingsIndex, + findingsIndexPattern = docCustomFindingsIndexPattern + ) + ) + + val docLevelMonitorResponse = createMonitor(docLevelMonitor)!! + // 1. bucketMonitor (chainedFinding = null) 2. docMonitor (chainedFinding = bucketMonitor) + var workflow = randomWorkflow( + monitorIds = listOf(bucketLevelMonitorResponse.id, docLevelMonitorResponse.id), + enabled = false, + auditDelegateMonitorAlerts = false + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id) + assertNotNull(workflowById) + + // Creates 5 documents + insertSampleTimeSerializedData( + index, + listOf( + "test_value_1", + "test_value_1", // adding duplicate to verify aggregation + "test_value_2", + "test_value_2", + "test_value_3" + ) + ) + + val workflowId = workflowResponse.id + // 1. bucket level monitor should reduce the doc findings to 4 (1, 2, 3, 4) + // 2. Doc level monitor will match those 4 documents although it contains rules for matching all 5 documents (docQuery3 matches the fifth) + val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + assertNotNull(executeWorkflowResponse) + + for (monitorRunResults in executeWorkflowResponse.workflowRunResult.monitorRunResults) { + if (bucketLevelMonitorResponse.monitor.name == monitorRunResults.monitorName) { + val searchResult = monitorRunResults.inputResults.results.first() + + @Suppress("UNCHECKED_CAST") + val buckets = searchResult.stringMap("aggregations")?.stringMap("composite_agg") + ?.get("buckets") as List> + assertEquals("Incorrect search result", 3, buckets.size) + + val getAlertsResponse = assertAlerts(bucketLevelMonitorResponse.id, bucketCustomAlertsIndex, 2, workflowId) + assertAcknowledges(getAlertsResponse.alerts, bucketLevelMonitorResponse.id, 2) + assertFindings(bucketLevelMonitorResponse.id, bucketCustomFindingsIndex, 1, 4, listOf("1", "2", "3", "4")) + } else { + assertEquals(1, monitorRunResults.inputResults.results.size) + val values = monitorRunResults.triggerResults.values + assertEquals(1, values.size) + @Suppress("UNCHECKED_CAST") + val docLevelTrigger = values.iterator().next() as DocumentLevelTriggerRunResult + val triggeredDocIds = docLevelTrigger.triggeredDocs.map { it.split("|")[0] } + val expectedTriggeredDocIds = listOf("1", "2", "3", "4") + assertEquals(expectedTriggeredDocIds, triggeredDocIds.sorted()) + + val getAlertsResponse = assertAlerts(docLevelMonitorResponse.id, docCustomAlertsIndex, 4, workflowId) + assertAcknowledges(getAlertsResponse.alerts, docLevelMonitorResponse.id, 4) + assertFindings(docLevelMonitorResponse.id, docCustomFindingsIndex, 4, 4, listOf("1", "2", "3", "4")) + } + } + } + + fun `test execute workflow with custom alerts and finding index when doc level delegate is used in chained finding`() { + val docQuery1 = DocLevelQuery(query = "test_field_1:\"test_value_2\"", name = "1") + val docQuery2 = DocLevelQuery(query = "test_field_1:\"test_value_3\"", name = "2") + + var docLevelMonitor = randomDocumentLevelMonitor( + inputs = listOf(DocLevelMonitorInput("description", listOf(index), listOf(docQuery1, docQuery2))), + triggers = listOf(randomDocumentLevelTrigger(condition = ALWAYS_RUN)), + dataSources = DataSources( + alertsIndex = "custom_alerts_index", + findingsIndex = "custom_findings_index", + findingsIndexPattern = "custom_findings_index-1" + ) + ) + + val docLevelMonitorResponse = createMonitor(docLevelMonitor)!! + + val query = QueryBuilders.rangeQuery("test_strict_date_time") + .gt("{{period_end}}||-10d") + .lte("{{period_end}}") + .format("epoch_millis") + val compositeSources = listOf( + TermsValuesSourceBuilder("test_field_1").field("test_field_1") + ) + val compositeAgg = CompositeAggregationBuilder("composite_agg", compositeSources) + val input = SearchInput(indices = listOf(index), query = SearchSourceBuilder().size(0).query(query).aggregation(compositeAgg)) + // Bucket level monitor will reduce the size of matched doc ids on those that belong to a bucket that contains more than 1 document after term grouping + val triggerScript = """ + params.docCount > 1 + """.trimIndent() + + var trigger = randomBucketLevelTrigger() + trigger = trigger.copy( + bucketSelector = BucketSelectorExtAggregationBuilder( + name = trigger.id, + bucketsPathsMap = mapOf("docCount" to "_count"), + script = Script(triggerScript), + parentBucketPath = "composite_agg", + filter = null, + ) + ) + + val bucketLevelMonitorResponse = createMonitor( + randomBucketLevelMonitor( + inputs = listOf(input), + enabled = false, + triggers = listOf(trigger), + dataSources = DataSources( + findingsEnabled = true, + alertsIndex = "custom_alerts_index", + findingsIndex = "custom_findings_index", + findingsIndexPattern = "custom_findings_index-1" + ) + ) + )!! + + var docLevelMonitor1 = randomDocumentLevelMonitor( + // Match the documents with test_field_1: test_value_3 + inputs = listOf(DocLevelMonitorInput("description", listOf(index), listOf(docQuery2))), + triggers = listOf(randomDocumentLevelTrigger(condition = ALWAYS_RUN)), + dataSources = DataSources( + findingsEnabled = true, + alertsIndex = "custom_alerts_index_1", + findingsIndex = "custom_findings_index_1", + findingsIndexPattern = "custom_findings_index_1-1" + ) + ) + + val docLevelMonitorResponse1 = createMonitor(docLevelMonitor1)!! + + val queryMonitorInput = SearchInput( + indices = listOf(index), + query = SearchSourceBuilder().query( + QueryBuilders + .rangeQuery("test_strict_date_time") + .gt("{{period_end}}||-10d") + .lte("{{period_end}}") + .format("epoch_millis") + ) + ) + val queryTriggerScript = """ + return ctx.results[0].hits.hits.size() > 0 + """.trimIndent() + + val queryLevelTrigger = randomQueryLevelTrigger(condition = Script(queryTriggerScript)) + val queryMonitorResponse = + createMonitor(randomQueryLevelMonitor(inputs = listOf(queryMonitorInput), triggers = listOf(queryLevelTrigger)))!! + + // 1. docMonitor (chainedFinding = null) 2. bucketMonitor (chainedFinding = docMonitor) 3. docMonitor (chainedFinding = bucketMonitor) 4. queryMonitor (chainedFinding = docMonitor 3) + var workflow = randomWorkflow( + monitorIds = listOf( + docLevelMonitorResponse.id, + bucketLevelMonitorResponse.id, + docLevelMonitorResponse1.id, + queryMonitorResponse.id + ), + auditDelegateMonitorAlerts = false + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id) + assertNotNull(workflowById) + + // Creates 5 documents + insertSampleTimeSerializedData( + index, + listOf( + "test_value_1", + "test_value_1", // adding duplicate to verify aggregation + "test_value_2", + "test_value_2", + "test_value_3", + "test_value_3" + ) + ) + + val workflowId = workflowResponse.id + // 1. Doc level monitor should reduce the doc findings to 4 (3 - test_value_2, 4 - test_value_2, 5 - test_value_3, 6 - test_value_3) + // 2. Bucket level monitor will match the fetch the docs from current findings execution, although it contains rules for matching documents which has test_value_2 and test value_3 + val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + assertNotNull(executeWorkflowResponse) + + for (monitorRunResults in executeWorkflowResponse.workflowRunResult.monitorRunResults) { + when (monitorRunResults.monitorName) { + // Verify first doc level monitor execution, alerts and findings + docLevelMonitorResponse.monitor.name -> { + assertEquals(1, monitorRunResults.inputResults.results.size) + val values = monitorRunResults.triggerResults.values + assertEquals(1, values.size) + @Suppress("UNCHECKED_CAST") + val docLevelTrigger = values.iterator().next() as DocumentLevelTriggerRunResult + val triggeredDocIds = docLevelTrigger.triggeredDocs.map { it.split("|")[0] } + val expectedTriggeredDocIds = listOf("3", "4", "5", "6") + assertEquals(expectedTriggeredDocIds, triggeredDocIds.sorted()) + + val getAlertsResponse = + assertAlerts(docLevelMonitorResponse.id, docLevelMonitorResponse.monitor.dataSources.alertsIndex, 4, workflowId) + assertAcknowledges(getAlertsResponse.alerts, docLevelMonitorResponse.id, 4) + assertFindings( + docLevelMonitorResponse.id, + docLevelMonitorResponse.monitor.dataSources.findingsIndex, + 4, + 4, + listOf("3", "4", "5", "6") + ) + } + // Verify second bucket level monitor execution, alerts and findings + bucketLevelMonitorResponse.monitor.name -> { + val searchResult = monitorRunResults.inputResults.results.first() + + @Suppress("UNCHECKED_CAST") + val buckets = + searchResult + .stringMap("aggregations")?.stringMap("composite_agg") + ?.get("buckets") as List> + assertEquals("Incorrect search result", 2, buckets.size) + + val getAlertsResponse = + assertAlerts( + bucketLevelMonitorResponse.id, + bucketLevelMonitorResponse.monitor.dataSources.alertsIndex, + 2, + workflowId + ) + assertAcknowledges(getAlertsResponse.alerts, bucketLevelMonitorResponse.id, 2) + assertFindings( + bucketLevelMonitorResponse.id, + bucketLevelMonitorResponse.monitor.dataSources.findingsIndex, + 1, + 4, + listOf("3", "4", "5", "6") + ) + } + // Verify third doc level monitor execution, alerts and findings + docLevelMonitorResponse1.monitor.name -> { + assertEquals(1, monitorRunResults.inputResults.results.size) + val values = monitorRunResults.triggerResults.values + assertEquals(1, values.size) + @Suppress("UNCHECKED_CAST") + val docLevelTrigger = values.iterator().next() as DocumentLevelTriggerRunResult + val triggeredDocIds = docLevelTrigger.triggeredDocs.map { it.split("|")[0] } + val expectedTriggeredDocIds = listOf("5", "6") + assertEquals(expectedTriggeredDocIds, triggeredDocIds.sorted()) + + val getAlertsResponse = + assertAlerts(docLevelMonitorResponse1.id, docLevelMonitorResponse1.monitor.dataSources.alertsIndex, 2, workflowId) + assertAcknowledges(getAlertsResponse.alerts, docLevelMonitorResponse1.id, 2) + assertFindings( + docLevelMonitorResponse1.id, + docLevelMonitorResponse1.monitor.dataSources.findingsIndex, + 2, + 2, + listOf("5", "6") + ) + } + // Verify fourth query level monitor execution + queryMonitorResponse.monitor.name -> { + assertEquals(1, monitorRunResults.inputResults.results.size) + val values = monitorRunResults.triggerResults.values + assertEquals(1, values.size) + @Suppress("UNCHECKED_CAST") + val totalHits = + ( + ( + monitorRunResults.inputResults.results[0]["hits"] as kotlin.collections.Map + )["total"] as kotlin.collections.Map + )["value"] + assertEquals(2, totalHits) + @Suppress("UNCHECKED_CAST") + val docIds = + ( + ( + monitorRunResults.inputResults.results[0]["hits"] as kotlin.collections.Map + )["hits"] as List> + ).map { it["_id"]!! } + assertEquals(listOf("5", "6"), docIds.sorted()) + } + } + } + } + + private fun assertAlerts( + monitorId: String, + customAlertsIndex: String, + alertSize: Int, + workflowId: String, + ): GetAlertsResponse { + val table = Table("asc", "id", null, alertSize, 0, "") + val getAlertsResponse = client() + .execute( + AlertingActions.GET_ALERTS_ACTION_TYPE, + GetAlertsRequest( + table, "ALL", "ALL", monitorId, customAlertsIndex, + workflowIds = listOf(workflowId) + ) + ) + .get() + assertTrue(getAlertsResponse != null) + assertTrue(getAlertsResponse.alerts.size == alertSize) + return getAlertsResponse + } + fun `test execute workflow with custom alerts and finding index with doc level delegates`() { val docQuery1 = DocLevelQuery(query = "test_field_1:\"us-west-2\"", name = "3") val docLevelInput1 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1)) @@ -2138,7 +2510,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val monitorResponse2 = createMonitor(monitor2)!! var workflow = randomWorkflow( - monitorIds = listOf(monitorResponse.id, monitorResponse2.id) + monitorIds = listOf(monitorResponse.id, monitorResponse2.id), auditDelegateMonitorAlerts = false ) val workflowResponse = upsertWorkflow(workflow)!! val workflowById = searchWorkflow(workflowResponse.id) @@ -2176,7 +2548,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val workflowId = workflowResponse.id val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! - val monitorsRunResults = executeWorkflowResponse.workflowRunResult.workflowRunResult + val monitorsRunResults = executeWorkflowResponse.workflowRunResult.monitorRunResults assertEquals(2, monitorsRunResults.size) assertEquals(monitor1.name, monitorsRunResults[0].monitorName) @@ -2185,11 +2557,11 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { Assert.assertEquals(monitor2.name, monitorsRunResults[1].monitorName) Assert.assertEquals(1, monitorsRunResults[1].triggerResults.size) - val getAlertsResponse = assertAlerts(monitorResponse.id, customAlertsIndex1, 2) + val getAlertsResponse = assertAlerts(monitorResponse.id, customAlertsIndex1, alertSize = 2, workflowId = workflowId) assertAcknowledges(getAlertsResponse.alerts, monitorResponse.id, 2) assertFindings(monitorResponse.id, customFindingsIndex1, 2, 2, listOf("1", "2")) - val getAlertsResponse2 = assertAlerts(monitorResponse2.id, customAlertsIndex2, 1) + val getAlertsResponse2 = assertAlerts(monitorResponse2.id, customAlertsIndex2, alertSize = 1, workflowId = workflowId) assertAcknowledges(getAlertsResponse2.alerts, monitorResponse2.id, 1) assertFindings(monitorResponse2.id, customFindingsIndex2, 1, 1, listOf("2")) } @@ -2213,14 +2585,16 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val monitorResponse = createMonitor(monitor)!! var workflow = randomWorkflow( - monitorIds = listOf(monitorResponse.id) + monitorIds = listOf(monitorResponse.id), + auditDelegateMonitorAlerts = false ) val workflowResponse = upsertWorkflow(workflow)!! val workflowById = searchWorkflow(workflowResponse.id) assertNotNull(workflowById) var workflow1 = randomWorkflow( - monitorIds = listOf(monitorResponse.id) + monitorIds = listOf(monitorResponse.id), + auditDelegateMonitorAlerts = false ) val workflowResponse1 = upsertWorkflow(workflow1)!! val workflowById1 = searchWorkflow(workflowResponse1.id) @@ -2247,14 +2621,14 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val workflowId = workflowResponse.id val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! - val monitorsRunResults = executeWorkflowResponse.workflowRunResult.workflowRunResult + val monitorsRunResults = executeWorkflowResponse.workflowRunResult.monitorRunResults assertEquals(1, monitorsRunResults.size) assertEquals(monitor.name, monitorsRunResults[0].monitorName) assertEquals(1, monitorsRunResults[0].triggerResults.size) // Assert and not ack the alerts (in order to verify later on that all the alerts are generated) - assertAlerts(monitorResponse.id, customAlertsIndex, 2) + assertAlerts(monitorResponse.id, customAlertsIndex, alertSize = 2, workflowId) assertFindings(monitorResponse.id, customFindingsIndex, 2, 2, listOf("1", "2")) // Verify workflow and monitor delegate metadata val workflowMetadata = searchWorkflowMetadata(id = workflowId) @@ -2264,21 +2638,21 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { executeWorkflowResponse.workflowRunResult.executionId, workflowMetadata!!.latestExecutionId ) - val monitorMetadataId = getDelegateMonitorMetadataId(monitorResponse, workflowMetadata) + val monitorMetadataId = getDelegateMonitorMetadataId(workflowMetadata, monitorResponse) val monitorMetadata = searchMonitorMetadata(monitorMetadataId) assertNotNull(monitorMetadata) // Execute second workflow val workflowId1 = workflowResponse1.id val executeWorkflowResponse1 = executeWorkflow(workflowById1, workflowId1, false)!! - val monitorsRunResults1 = executeWorkflowResponse1.workflowRunResult.workflowRunResult + val monitorsRunResults1 = executeWorkflowResponse1.workflowRunResult.monitorRunResults assertEquals(1, monitorsRunResults1.size) assertEquals(monitor.name, monitorsRunResults1[0].monitorName) assertEquals(1, monitorsRunResults1[0].triggerResults.size) - val getAlertsResponse = assertAlerts(monitorResponse.id, customAlertsIndex, 4) - assertAcknowledges(getAlertsResponse.alerts, monitorResponse.id, 4) + val getAlertsResponse = assertAlerts(monitorResponse.id, customAlertsIndex, alertSize = 2, workflowId1) + assertAcknowledges(getAlertsResponse.alerts, monitorResponse.id, 2) assertFindings(monitorResponse.id, customFindingsIndex, 4, 4, listOf("1", "2", "1", "2")) // Verify workflow and monitor delegate metadata val workflowMetadata1 = searchWorkflowMetadata(id = workflowId1) @@ -2288,7 +2662,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { executeWorkflowResponse1.workflowRunResult.executionId, workflowMetadata1!!.latestExecutionId ) - val monitorMetadataId1 = getDelegateMonitorMetadataId(monitorResponse, workflowMetadata1) + val monitorMetadataId1 = getDelegateMonitorMetadataId(workflowMetadata1, monitorResponse) val monitorMetadata1 = searchMonitorMetadata(monitorMetadataId1) assertNotNull(monitorMetadata1) // Verify that for two workflows two different doc level monitor metadata has been created @@ -2308,14 +2682,14 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val monitorResponse = createMonitor(monitor)!! val workflow = randomWorkflow( - monitorIds = listOf(monitorResponse.id) + monitorIds = listOf(monitorResponse.id), auditDelegateMonitorAlerts = false ) val workflowResponse = upsertWorkflow(workflow)!! val workflowById = searchWorkflow(workflowResponse.id) assertNotNull(workflowById) val workflow1 = randomWorkflow( - monitorIds = listOf(monitorResponse.id) + monitorIds = listOf(monitorResponse.id), auditDelegateMonitorAlerts = false ) val workflowResponse1 = upsertWorkflow(workflow1)!! val workflowById1 = searchWorkflow(workflowResponse1.id) @@ -2342,13 +2716,13 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val workflowId = workflowResponse.id val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! - val monitorsRunResults = executeWorkflowResponse.workflowRunResult.workflowRunResult + val monitorsRunResults = executeWorkflowResponse.workflowRunResult.monitorRunResults assertEquals(1, monitorsRunResults.size) assertEquals(monitor.name, monitorsRunResults[0].monitorName) assertEquals(1, monitorsRunResults[0].triggerResults.size) - assertAlerts(monitorResponse.id, AlertIndices.ALERT_INDEX, 2) + assertAlerts(monitorResponse.id, AlertIndices.ALERT_INDEX, alertSize = 2, workflowId) assertFindings(monitorResponse.id, AlertIndices.FINDING_HISTORY_WRITE_INDEX, 2, 2, listOf("1", "2")) // Verify workflow and monitor delegate metadata val workflowMetadata = searchWorkflowMetadata(id = workflowId) @@ -2358,7 +2732,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { executeWorkflowResponse.workflowRunResult.executionId, workflowMetadata!!.latestExecutionId ) - val monitorMetadataId = getDelegateMonitorMetadataId(monitorResponse, workflowMetadata) + val monitorMetadataId = getDelegateMonitorMetadataId(workflowMetadata, monitorResponse) val monitorMetadata = searchMonitorMetadata(monitorMetadataId) assertNotNull(monitorMetadata) @@ -2380,14 +2754,14 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { // Execute second workflow val workflowId1 = workflowResponse1.id val executeWorkflowResponse1 = executeWorkflow(workflowById1, workflowId1, false)!! - val monitorsRunResults1 = executeWorkflowResponse1.workflowRunResult.workflowRunResult + val monitorsRunResults1 = executeWorkflowResponse1.workflowRunResult.monitorRunResults assertEquals(1, monitorsRunResults1.size) assertEquals(monitor.name, monitorsRunResults1[0].monitorName) assertEquals(1, monitorsRunResults1[0].triggerResults.size) // Verify alerts for the custom index - val getAlertsResponse = assertAlerts(monitorResponse.id, customAlertsIndex, 2) + val getAlertsResponse = assertAlerts(monitorResponse.id, customAlertsIndex, alertSize = 2, workflowId1) assertAcknowledges(getAlertsResponse.alerts, monitorResponse.id, 2) assertFindings(monitorResponse.id, customFindingsIndex, 2, 2, listOf("1", "2")) @@ -2399,7 +2773,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { executeWorkflowResponse1.workflowRunResult.executionId, workflowMetadata1!!.latestExecutionId ) - val monitorMetadataId1 = getDelegateMonitorMetadataId(monitorResponse, workflowMetadata1) + val monitorMetadataId1 = getDelegateMonitorMetadataId(workflowMetadata1, monitorResponse) val monitorMetadata1 = searchMonitorMetadata(monitorMetadataId1) assertNotNull(monitorMetadata1) // Verify that for two workflows two different doc level monitor metadata has been created @@ -2446,7 +2820,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { // First execution val workflowId = workflowResponse.id val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! - val monitorsRunResults = executeWorkflowResponse.workflowRunResult.workflowRunResult + val monitorsRunResults = executeWorkflowResponse.workflowRunResult.monitorRunResults assertEquals(2, monitorsRunResults.size) val workflowMetadata = searchWorkflowMetadata(id = workflowId) @@ -2456,13 +2830,13 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { executeWorkflowResponse.workflowRunResult.executionId, workflowMetadata!!.latestExecutionId ) - val monitorMetadataId = getDelegateMonitorMetadataId(monitorResponse, workflowMetadata) + val monitorMetadataId = getDelegateMonitorMetadataId(workflowMetadata, monitorResponse) val monitorMetadata = searchMonitorMetadata(monitorMetadataId) assertNotNull(monitorMetadata) // Second execution val executeWorkflowResponse1 = executeWorkflow(workflowById, workflowId, false)!! - val monitorsRunResults1 = executeWorkflowResponse1.workflowRunResult.workflowRunResult + val monitorsRunResults1 = executeWorkflowResponse1.workflowRunResult.monitorRunResults assertEquals(2, monitorsRunResults1.size) val workflowMetadata1 = searchWorkflowMetadata(id = workflowId) @@ -2472,17 +2846,12 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { executeWorkflowResponse1.workflowRunResult.executionId, workflowMetadata1!!.latestExecutionId ) - val monitorMetadataId1 = getDelegateMonitorMetadataId(monitorResponse, workflowMetadata1) + val monitorMetadataId1 = getDelegateMonitorMetadataId(workflowMetadata1, monitorResponse) assertTrue(monitorMetadataId == monitorMetadataId1) val monitorMetadata1 = searchMonitorMetadata(monitorMetadataId1) assertNotNull(monitorMetadata1) } - private fun getDelegateMonitorMetadataId( - monitorResponse: IndexMonitorResponse, - workflowMetadata: WorkflowMetadata, - ) = "${workflowMetadata.id}-${monitorResponse.id}-metadata" - fun `test execute workflow dryrun verify workflow metadata not created`() { val docQuery1 = DocLevelQuery(query = "test_field_1:\"us-west-2\"", name = "3") val docLevelInput1 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1)) @@ -2523,20 +2892,244 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val workflowId = workflowResponse.id val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, true) - assertNotNull("Workflow run result is null", executeWorkflowResponse) - val monitorsRunResults = executeWorkflowResponse!!.workflowRunResult.workflowRunResult - assertEquals(2, monitorsRunResults.size) + assertNotNull("Workflow run result is null", executeWorkflowResponse) + val monitorsRunResults = executeWorkflowResponse!!.workflowRunResult.monitorRunResults + assertEquals(2, monitorsRunResults.size) + + var exception: java.lang.Exception? = null + try { + searchWorkflowMetadata(id = workflowId) + } catch (ex: java.lang.Exception) { + exception = ex + } + assertTrue(exception is java.util.NoSuchElementException) + } + + fun `test execute workflow with custom alerts and finding index with bucket and doc monitor bucket monitor used as chained finding`() { + val query = QueryBuilders.rangeQuery("test_strict_date_time") + .gt("{{period_end}}||-10d") + .lte("{{period_end}}") + .format("epoch_millis") + val compositeSources = listOf( + TermsValuesSourceBuilder("test_field_1").field("test_field_1") + ) + val compositeAgg = CompositeAggregationBuilder("composite_agg", compositeSources) + val input = SearchInput(indices = listOf(index), query = SearchSourceBuilder().size(0).query(query).aggregation(compositeAgg)) + // Bucket level monitor will reduce the size of matched doc ids on those that belong to a bucket that contains more than 1 document after term grouping + val triggerScript = """ + params.docCount > 1 + """.trimIndent() + + var trigger = randomBucketLevelTrigger() + trigger = trigger.copy( + bucketSelector = BucketSelectorExtAggregationBuilder( + name = trigger.id, + bucketsPathsMap = mapOf("docCount" to "_count"), + script = Script(triggerScript), + parentBucketPath = "composite_agg", + filter = null, + ) + ) + val bucketCustomAlertsIndex = "custom_alerts_index" + val bucketCustomFindingsIndex = "custom_findings_index" + val bucketCustomFindingsIndexPattern = "custom_findings_index-1" + + val bucketLevelMonitorResponse = createMonitor( + randomBucketLevelMonitor( + inputs = listOf(input), + enabled = false, + triggers = listOf(trigger), + dataSources = DataSources( + findingsEnabled = true, + alertsIndex = bucketCustomAlertsIndex, + findingsIndex = bucketCustomFindingsIndex, + findingsIndexPattern = bucketCustomFindingsIndexPattern + ) + ) + )!! + + val docQuery1 = DocLevelQuery(query = "test_field_1:\"test_value_2\"", name = "1") + val docQuery2 = DocLevelQuery(query = "test_field_1:\"test_value_1\"", name = "2") + val docQuery3 = DocLevelQuery(query = "test_field_1:\"test_value_3\"", name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1, docQuery2, docQuery3)) + val docTrigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val docCustomAlertsIndex = "custom_alerts_index" + val docCustomFindingsIndex = "custom_findings_index" + val docCustomFindingsIndexPattern = "custom_findings_index-1" + var docLevelMonitor = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(docTrigger), + dataSources = DataSources( + alertsIndex = docCustomAlertsIndex, + findingsIndex = docCustomFindingsIndex, + findingsIndexPattern = docCustomFindingsIndexPattern + ) + ) + + val docLevelMonitorResponse = createMonitor(docLevelMonitor)!! + // 1. bucketMonitor (chainedFinding = null) 2. docMonitor (chainedFinding = bucketMonitor) + var workflow = randomWorkflow( + monitorIds = listOf(bucketLevelMonitorResponse.id, docLevelMonitorResponse.id), auditDelegateMonitorAlerts = false + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id) + assertNotNull(workflowById) + + // Creates 5 documents + insertSampleTimeSerializedData( + index, + listOf( + "test_value_1", + "test_value_1", // adding duplicate to verify aggregation + "test_value_2", + "test_value_2", + "test_value_3" + ) + ) + + val workflowId = workflowResponse.id + // 1. bucket level monitor should reduce the doc findings to 4 (1, 2, 3, 4) + // 2. Doc level monitor will match those 4 documents although it contains rules for matching all 5 documents (docQuery3 matches the fifth) + val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + assertNotNull(executeWorkflowResponse) + + for (monitorRunResults in executeWorkflowResponse.workflowRunResult.monitorRunResults) { + if (bucketLevelMonitorResponse.monitor.name == monitorRunResults.monitorName) { + val searchResult = monitorRunResults.inputResults.results.first() + + @Suppress("UNCHECKED_CAST") + val buckets = searchResult.stringMap("aggregations")?.stringMap("composite_agg") + ?.get("buckets") as List> + assertEquals("Incorrect search result", 3, buckets.size) + + val getAlertsResponse = assertAlerts(bucketLevelMonitorResponse.id, bucketCustomAlertsIndex, alertSize = 2, workflowId) + assertAcknowledges(getAlertsResponse.alerts, bucketLevelMonitorResponse.id, 2) + assertFindings(bucketLevelMonitorResponse.id, bucketCustomFindingsIndex, 1, 4, listOf("1", "2", "3", "4")) + } else { + assertEquals(1, monitorRunResults.inputResults.results.size) + val values = monitorRunResults.triggerResults.values + assertEquals(1, values.size) + @Suppress("UNCHECKED_CAST") + val docLevelTrigger = values.iterator().next() as DocumentLevelTriggerRunResult + val triggeredDocIds = docLevelTrigger.triggeredDocs.map { it.split("|")[0] } + val expectedTriggeredDocIds = listOf("1", "2", "3", "4") + assertEquals(expectedTriggeredDocIds, triggeredDocIds.sorted()) + + val getAlertsResponse = assertAlerts(docLevelMonitorResponse.id, docCustomAlertsIndex, alertSize = 4, workflowId) + assertAcknowledges(getAlertsResponse.alerts, docLevelMonitorResponse.id, 4) + assertFindings(docLevelMonitorResponse.id, docCustomFindingsIndex, 4, 4, listOf("1", "2", "3", "4")) + } + } + } + + fun `test chained alerts for bucket level monitors generating audit alerts custom alerts index`() { + val customAlertIndex = "custom-alert-index" + val customAlertHistoryIndex = "custom-alert-history-index" + val customAlertHistoryIndexPattern = "" + val query = QueryBuilders.rangeQuery("test_strict_date_time") + .gt("{{period_end}}||-10d") + .lte("{{period_end}}") + .format("epoch_millis") + val compositeSources = listOf( + TermsValuesSourceBuilder("test_field_1").field("test_field_1") + ) + val compositeAgg = CompositeAggregationBuilder("composite_agg", compositeSources) + val input = SearchInput(indices = listOf(index), query = SearchSourceBuilder().size(0).query(query).aggregation(compositeAgg)) + // Bucket level monitor will reduce the size of matched doc ids on those that belong to a bucket that contains more than 1 document after term grouping + val triggerScript = """ + params.docCount > 1 + """.trimIndent() + + var trigger = randomBucketLevelTrigger() + trigger = trigger.copy( + bucketSelector = BucketSelectorExtAggregationBuilder( + name = trigger.id, + bucketsPathsMap = mapOf("docCount" to "_count"), + script = Script(triggerScript), + parentBucketPath = "composite_agg", + filter = null, + ) + ) + + val bucketLevelMonitorResponse = createMonitor( + randomBucketLevelMonitor( + inputs = listOf(input), + enabled = false, + triggers = listOf(trigger), + dataSources = DataSources( + alertsIndex = customAlertIndex, + alertsHistoryIndexPattern = customAlertHistoryIndexPattern, + alertsHistoryIndex = customAlertHistoryIndex + + ) + ) + )!! + + val bucketLevelMonitorResponse2 = createMonitor( + randomBucketLevelMonitor( + inputs = listOf(input), + enabled = false, + triggers = listOf(trigger), + dataSources = DataSources( + alertsIndex = customAlertIndex, + alertsHistoryIndexPattern = customAlertHistoryIndexPattern, + alertsHistoryIndex = customAlertHistoryIndex + + ) + ) + )!! + + val andTrigger = randomChainedAlertTrigger( + name = "1And2", + condition = Script("monitor[id=${bucketLevelMonitorResponse.id}] && monitor[id=${bucketLevelMonitorResponse2.id}]") + ) + // 1. bucketMonitor (chainedFinding = null) 2. docMonitor (chainedFinding = bucketMonitor) + var workflow = randomWorkflow( + monitorIds = listOf(bucketLevelMonitorResponse.id, bucketLevelMonitorResponse2.id), + triggers = listOf(andTrigger) + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id) + assertNotNull(workflowById) + + // Creates 5 documents + insertSampleTimeSerializedData( + index, + listOf( + "test_value_1", + "test_value_1", // adding duplicate to verify aggregation + "test_value_2", + "test_value_2", + "test_value_3" + ) + ) + + val workflowId = workflowResponse.id + // 1. bucket level monitor should reduce the doc findings to 4 (1, 2, 3, 4) + // 2. Doc level monitor will match those 4 documents although it contains rules for matching all 5 documents (docQuery3 matches the fifth) + val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + assertNotNull(executeWorkflowResponse) + + Assert.assertTrue(executeWorkflowResponse.workflowRunResult.triggerResults.isNotEmpty()) + Assert.assertTrue(executeWorkflowResponse.workflowRunResult.triggerResults.containsKey(andTrigger.id)) + Assert.assertTrue(executeWorkflowResponse.workflowRunResult.triggerResults[andTrigger.id]!!.triggered) - var exception: java.lang.Exception? = null - try { - searchWorkflowMetadata(id = workflowId) - } catch (ex: java.lang.Exception) { - exception = ex - } - assertTrue(exception is NoSuchElementException) + val auditStateAlerts = getAuditStateAlerts( + alertsIndex = customAlertHistoryIndex, + monitorId = bucketLevelMonitorResponse.id, + executionId = executeWorkflowResponse.workflowRunResult.executionId + ) + Assert.assertEquals(auditStateAlerts.size, 2) + + val auditStateAlerts2 = getAuditStateAlerts( + alertsIndex = customAlertHistoryIndex, + monitorId = bucketLevelMonitorResponse2.id, + executionId = executeWorkflowResponse.workflowRunResult.executionId + ) + Assert.assertEquals(auditStateAlerts2.size, 2) } - fun `test execute workflow with custom alerts and finding index when bucket monitor is used in chained finding of doc monitor`() { + fun `test chained alerts for bucket level monitors generating audit alerts`() { val query = QueryBuilders.rangeQuery("test_strict_date_time") .gt("{{period_end}}||-10d") .lte("{{period_end}}") @@ -2546,8 +3139,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { ) val compositeAgg = CompositeAggregationBuilder("composite_agg", compositeSources) val input = SearchInput(indices = listOf(index), query = SearchSourceBuilder().size(0).query(query).aggregation(compositeAgg)) - // Bucket level monitor will reduce the size of matched doc ids on those that belong - // to a bucket that contains more than 1 document after term grouping + // Bucket level monitor will reduce the size of matched doc ids on those that belong to a bucket that contains more than 1 document after term grouping val triggerScript = """ params.docCount > 1 """.trimIndent() @@ -2562,46 +3154,31 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { filter = null, ) ) - val bucketCustomAlertsIndex = "custom_alerts_index" - val bucketCustomFindingsIndex = "custom_findings_index" - val bucketCustomFindingsIndexPattern = "custom_findings_index-1" val bucketLevelMonitorResponse = createMonitor( randomBucketLevelMonitor( inputs = listOf(input), enabled = false, - triggers = listOf(trigger), - dataSources = DataSources( - findingsEnabled = true, - alertsIndex = bucketCustomAlertsIndex, - findingsIndex = bucketCustomFindingsIndex, - findingsIndexPattern = bucketCustomFindingsIndexPattern - ) + triggers = listOf(trigger) ) )!! - val docQuery1 = DocLevelQuery(query = "test_field_1:\"test_value_2\"", name = "1") - val docQuery2 = DocLevelQuery(query = "test_field_1:\"test_value_1\"", name = "2") - val docQuery3 = DocLevelQuery(query = "test_field_1:\"test_value_3\"", name = "3") - val docLevelInput = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1, docQuery2, docQuery3)) - val docTrigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) - val docCustomAlertsIndex = "custom_alerts_index" - val docCustomFindingsIndex = "custom_findings_index" - val docCustomFindingsIndexPattern = "custom_findings_index-1" - var docLevelMonitor = randomDocumentLevelMonitor( - inputs = listOf(docLevelInput), - triggers = listOf(docTrigger), - dataSources = DataSources( - alertsIndex = docCustomAlertsIndex, - findingsIndex = docCustomFindingsIndex, - findingsIndexPattern = docCustomFindingsIndexPattern + val bucketLevelMonitorResponse2 = createMonitor( + randomBucketLevelMonitor( + inputs = listOf(input), + enabled = false, + triggers = listOf(trigger) ) - ) + )!! - val docLevelMonitorResponse = createMonitor(docLevelMonitor)!! + val andTrigger = randomChainedAlertTrigger( + name = "1And2", + condition = Script("monitor[id=${bucketLevelMonitorResponse.id}] && monitor[id=${bucketLevelMonitorResponse2.id}]") + ) // 1. bucketMonitor (chainedFinding = null) 2. docMonitor (chainedFinding = bucketMonitor) var workflow = randomWorkflow( - monitorIds = listOf(bucketLevelMonitorResponse.id, docLevelMonitorResponse.id) + monitorIds = listOf(bucketLevelMonitorResponse.id, bucketLevelMonitorResponse2.id), + triggers = listOf(andTrigger) ) val workflowResponse = upsertWorkflow(workflow)!! val workflowById = searchWorkflow(workflowResponse.id) @@ -2625,36 +3202,26 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! assertNotNull(executeWorkflowResponse) - for (monitorRunResults in executeWorkflowResponse.workflowRunResult.workflowRunResult) { - if (bucketLevelMonitorResponse.monitor.name == monitorRunResults.monitorName) { - val searchResult = monitorRunResults.inputResults.results.first() - - @Suppress("UNCHECKED_CAST") - val buckets = searchResult.stringMap("aggregations")?.stringMap("composite_agg") - ?.get("buckets") as List> - assertEquals("Incorrect search result", 3, buckets.size) + Assert.assertTrue(executeWorkflowResponse.workflowRunResult.triggerResults.isNotEmpty()) + Assert.assertTrue(executeWorkflowResponse.workflowRunResult.triggerResults.containsKey(andTrigger.id)) + Assert.assertTrue(executeWorkflowResponse.workflowRunResult.triggerResults[andTrigger.id]!!.triggered) - val getAlertsResponse = assertAlerts(bucketLevelMonitorResponse.id, bucketCustomAlertsIndex, 2) - assertAcknowledges(getAlertsResponse.alerts, bucketLevelMonitorResponse.id, 2) - assertFindings(bucketLevelMonitorResponse.id, bucketCustomFindingsIndex, 1, 4, listOf("1", "2", "3", "4")) - } else { - assertEquals(1, monitorRunResults.inputResults.results.size) - val values = monitorRunResults.triggerResults.values - assertEquals(1, values.size) - @Suppress("UNCHECKED_CAST") - val docLevelTrigger = values.iterator().next() as DocumentLevelTriggerRunResult - val triggeredDocIds = docLevelTrigger.triggeredDocs.map { it.split("|")[0] } - val expectedTriggeredDocIds = listOf("1", "2", "3", "4") - assertEquals(expectedTriggeredDocIds, triggeredDocIds.sorted()) + val auditStateAlerts = getAuditStateAlerts( + alertsIndex = bucketLevelMonitorResponse.monitor.dataSources.alertsHistoryIndex, + monitorId = bucketLevelMonitorResponse.id, + executionId = executeWorkflowResponse.workflowRunResult.executionId + ) + Assert.assertEquals(auditStateAlerts.size, 2) - val getAlertsResponse = assertAlerts(docLevelMonitorResponse.id, docCustomAlertsIndex, 4) - assertAcknowledges(getAlertsResponse.alerts, docLevelMonitorResponse.id, 4) - assertFindings(docLevelMonitorResponse.id, docCustomFindingsIndex, 4, 4, listOf("1", "2", "3", "4")) - } - } + val auditStateAlerts2 = getAuditStateAlerts( + alertsIndex = bucketLevelMonitorResponse.monitor.dataSources.alertsHistoryIndex, + monitorId = bucketLevelMonitorResponse2.id, + executionId = executeWorkflowResponse.workflowRunResult.executionId + ) + Assert.assertEquals(auditStateAlerts2.size, 2) } - fun `test execute workflow with custom alerts and finding index when doc level delegate is used in chained finding`() { + fun `test execute with custom alerts and finding index with bucket and doc monitor when doc monitor is used in chained finding`() { val docQuery1 = DocLevelQuery(query = "test_field_1:\"test_value_2\"", name = "1") val docQuery2 = DocLevelQuery(query = "test_field_1:\"test_value_3\"", name = "2") @@ -2748,7 +3315,8 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { bucketLevelMonitorResponse.id, docLevelMonitorResponse1.id, queryMonitorResponse.id - ) + ), + auditDelegateMonitorAlerts = false ) val workflowResponse = upsertWorkflow(workflow)!! val workflowById = searchWorkflow(workflowResponse.id) @@ -2773,7 +3341,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! assertNotNull(executeWorkflowResponse) - for (monitorRunResults in executeWorkflowResponse.workflowRunResult.workflowRunResult) { + for (monitorRunResults in executeWorkflowResponse.workflowRunResult.monitorRunResults) { when (monitorRunResults.monitorName) { // Verify first doc level monitor execution, alerts and findings docLevelMonitorResponse.monitor.name -> { @@ -2787,7 +3355,12 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { assertEquals(expectedTriggeredDocIds, triggeredDocIds.sorted()) val getAlertsResponse = - assertAlerts(docLevelMonitorResponse.id, docLevelMonitorResponse.monitor.dataSources.alertsIndex, 4) + assertAlerts( + docLevelMonitorResponse.id, + docLevelMonitorResponse.monitor.dataSources.alertsIndex, + alertSize = 4, + workflowId = workflowId + ) assertAcknowledges(getAlertsResponse.alerts, docLevelMonitorResponse.id, 4) assertFindings( docLevelMonitorResponse.id, @@ -2809,7 +3382,12 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { assertEquals("Incorrect search result", 2, buckets.size) val getAlertsResponse = - assertAlerts(bucketLevelMonitorResponse.id, bucketLevelMonitorResponse.monitor.dataSources.alertsIndex, 2) + assertAlerts( + bucketLevelMonitorResponse.id, + bucketLevelMonitorResponse.monitor.dataSources.alertsIndex, + alertSize = 2, + workflowId + ) assertAcknowledges(getAlertsResponse.alerts, bucketLevelMonitorResponse.id, 2) assertFindings( bucketLevelMonitorResponse.id, @@ -2831,7 +3409,12 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { assertEquals(expectedTriggeredDocIds, triggeredDocIds.sorted()) val getAlertsResponse = - assertAlerts(docLevelMonitorResponse1.id, docLevelMonitorResponse1.monitor.dataSources.alertsIndex, 2) + assertAlerts( + docLevelMonitorResponse1.id, + docLevelMonitorResponse1.monitor.dataSources.alertsIndex, + alertSize = 2, + workflowId + ) assertAcknowledges(getAlertsResponse.alerts, docLevelMonitorResponse1.id, 2) assertFindings( docLevelMonitorResponse1.id, @@ -2850,7 +3433,8 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val totalHits = ( ( - monitorRunResults.inputResults.results[0]["hits"] as kotlin.collections.Map + monitorRunResults.inputResults.results[0]["hits"] + as kotlin.collections.Map )["total"] as kotlin.collections.Map )["value"] assertEquals(2, totalHits) @@ -2858,9 +3442,11 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val docIds = ( ( - monitorRunResults.inputResults.results[0]["hits"] as kotlin.collections.Map + monitorRunResults.inputResults.results[0]["hits"] + as kotlin.collections.Map )["hits"] as List> - ).map { it["_id"]!! } + ) + .map { it["_id"]!! } assertEquals(listOf("5", "6"), docIds.sorted()) } } @@ -2880,7 +3466,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val monitorResponse = createMonitor(monitor)!! var workflow = randomWorkflow( - monitorIds = listOf(monitorResponse.id) + monitorIds = listOf(monitorResponse.id), auditDelegateMonitorAlerts = false ) val workflowResponse = upsertWorkflow(workflow)!! val workflowById = searchWorkflow(workflowResponse.id) @@ -2889,11 +3475,11 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { deleteIndex(index) val response = executeWorkflow(workflowById, workflowById!!.id, false)!! - val error = response.workflowRunResult.workflowRunResult[0].error + val error = response.workflowRunResult.monitorRunResults[0].error assertNotNull(error) assertTrue(error is AlertingException) assertEquals(RestStatus.INTERNAL_SERVER_ERROR, (error as AlertingException).status) - assertTrue(error.message!!.contains("IndexNotFoundException")) + assertTrue(error.message!!.contains("no such index [$index]")) } fun `test execute workflow wrong workflow id`() { @@ -2948,24 +3534,52 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { assertTrue("Findings saved for test monitor", relatedDocIds.containsAll(findingDocIds)) } + private fun getAuditStateAlerts( + alertsIndex: String? = AlertIndices.ALERT_INDEX, + monitorId: String, + executionId: String? = null, + ): List { + val searchRequest = SearchRequest(alertsIndex) + val boolQueryBuilder = QueryBuilders.boolQuery() + boolQueryBuilder.must(TermQueryBuilder("monitor_id", monitorId)) + if (executionId.isNullOrEmpty() == false) + boolQueryBuilder.must(TermQueryBuilder("execution_id", executionId)) + searchRequest.source().query(boolQueryBuilder) + val searchResponse = client().search(searchRequest).get() + return searchResponse.hits.map { hit -> + val xcp = XContentHelper.createParser( + xContentRegistry(), LoggingDeprecationHandler.INSTANCE, + hit.sourceRef, XContentType.JSON + ) + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + val alert = Alert.parse(xcp, hit.id, hit.version) + alert + } + } + private fun assertAlerts( monitorId: String, - customAlertsIndex: String, + alertsIndex: String? = AlertIndices.ALERT_INDEX, + executionId: String? = null, alertSize: Int, + workflowId: String ): GetAlertsResponse { - val alerts = searchAlerts(monitorId, customAlertsIndex) + val alerts = searchAlerts(monitorId, alertsIndex!!, executionId = executionId) assertEquals("Alert saved for test monitor", alertSize, alerts.size) val table = Table("asc", "id", null, alertSize, 0, "") var getAlertsResponse = client() .execute( AlertingActions.GET_ALERTS_ACTION_TYPE, - GetAlertsRequest(table, "ALL", "ALL", null, customAlertsIndex) + GetAlertsRequest(table, "ALL", "ALL", null, alertsIndex) ) .get() assertTrue(getAlertsResponse != null) assertTrue(getAlertsResponse.alerts.size == alertSize) getAlertsResponse = client() - .execute(AlertingActions.GET_ALERTS_ACTION_TYPE, GetAlertsRequest(table, "ALL", "ALL", monitorId, null)) + .execute( + AlertingActions.GET_ALERTS_ACTION_TYPE, + GetAlertsRequest(table, "ALL", "ALL", monitorId, null, workflowIds = listOf(workflowId)) + ) .get() assertTrue(getAlertsResponse != null) assertTrue(getAlertsResponse.alerts.size == alertSize) @@ -2984,96 +3598,437 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { AcknowledgeAlertRequest(monitorId, alertIds, WriteRequest.RefreshPolicy.IMMEDIATE) ).get() - assertEquals(alertSize, acknowledgeAlertResponse.acknowledged.size) - } + assertEquals(alertSize, acknowledgeAlertResponse.acknowledged.size) + } + + private fun verifyAcknowledgeChainedAlerts( + alerts: List, + workflowId: String, + alertSize: Int, + ) { + val alertIds = alerts.map { it.id }.toMutableList() + val acknowledgeAlertResponse = ackChainedAlerts(alertIds, workflowId) + assertTrue(acknowledgeAlertResponse.acknowledged.stream().map { it.id }.collect(Collectors.toList()).containsAll(alertIds)) + assertEquals(alertSize, acknowledgeAlertResponse.acknowledged.size) + alertIds.add("dummy") + val redundantAck = ackChainedAlerts(alertIds, workflowId) + Assert.assertTrue(redundantAck.acknowledged.isEmpty()) + Assert.assertTrue(redundantAck.missing.contains("dummy")) + alertIds.remove("dummy") + Assert.assertTrue(redundantAck.failed.map { it.id }.toList().containsAll(alertIds)) + } + + private fun ackChainedAlerts(alertIds: List, workflowId: String): AcknowledgeAlertResponse { + + return client().execute( + AlertingActions.ACKNOWLEDGE_CHAINED_ALERTS_ACTION_TYPE, + AcknowledgeChainedAlertRequest(workflowId, alertIds) + ).get() + } + + private fun assertAuditStateAlerts( + monitorId: String, + alerts: List, + ) { + alerts.forEach { Assert.assertEquals(it.state, Alert.State.AUDIT) } + val alertIds = alerts.stream().map { it.id }.collect(Collectors.toList()) + val ack = client().execute( + AlertingActions.ACKNOWLEDGE_ALERTS_ACTION_TYPE, + AcknowledgeAlertRequest(monitorId, alertIds, WriteRequest.RefreshPolicy.IMMEDIATE) + ).get() + Assert.assertTrue(ack.acknowledged.isEmpty()) + Assert.assertTrue(ack.missing.containsAll(alertIds)) + Assert.assertTrue(ack.failed.isEmpty()) + } + + fun `test execute workflow with bucket-level and doc-level chained monitors`() { + createTestIndex(TEST_HR_INDEX) + + val compositeSources = listOf( + TermsValuesSourceBuilder("test_field_1").field("test_field_1") + ) + val compositeAgg = CompositeAggregationBuilder("composite_agg", compositeSources) + val input = SearchInput( + indices = listOf(TEST_HR_INDEX), + query = SearchSourceBuilder().size(0).query(QueryBuilders.matchAllQuery()).aggregation(compositeAgg) + ) + val triggerScript = """ + params.docCount > 0 + """.trimIndent() + + var trigger = randomBucketLevelTrigger() + trigger = trigger.copy( + bucketSelector = BucketSelectorExtAggregationBuilder( + name = trigger.id, + bucketsPathsMap = mapOf("docCount" to "_count"), + script = Script(triggerScript), + parentBucketPath = "composite_agg", + filter = null + ), + actions = listOf() + ) + val bucketMonitor = createMonitor( + randomBucketLevelMonitor( + inputs = listOf(input), + enabled = false, + triggers = listOf(trigger) + ) + ) + assertNotNull("The bucket monitor was not created", bucketMonitor) + + val docQuery1 = DocLevelQuery(query = "test_field_1:\"a\"", name = "3") + var monitor1 = randomDocumentLevelMonitor( + inputs = listOf(DocLevelMonitorInput("description", listOf(TEST_HR_INDEX), listOf(docQuery1))), + triggers = listOf(randomDocumentLevelTrigger(condition = ALWAYS_RUN)) + ) + val docMonitor = createMonitor(monitor1)!! + assertNotNull("The doc level monitor was not created", docMonitor) + + val workflow = randomWorkflow(monitorIds = listOf(bucketMonitor!!.id, docMonitor.id)) + val workflowResponse = upsertWorkflow(workflow) + assertNotNull("The workflow was not created", workflowResponse) + + // Add a doc that is accessible to the user + indexDoc( + TEST_HR_INDEX, + "1", + """ + { + "test_field_1": "a", + "accessible": true + } + """.trimIndent() + ) + + // Add a second doc that is not accessible to the user + indexDoc( + TEST_HR_INDEX, + "2", + """ + { + "test_field_1": "b", + "accessible": false + } + """.trimIndent() + ) + + indexDoc( + TEST_HR_INDEX, + "3", + """ + { + "test_field_1": "c", + "accessible": true + } + """.trimIndent() + ) + + val executeResult = executeWorkflow(id = workflowResponse!!.id) + assertNotNull(executeResult) + assertEquals(2, executeResult!!.workflowRunResult.monitorRunResults.size) + } + + fun `test chained alerts for AND OR and NOT conditions with custom alerts indices`() { + val docQuery1 = DocLevelQuery(query = "test_field_1:\"us-west-2\"", name = "3") + val docLevelInput1 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1)) + val trigger1 = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val customFindingsIndex1 = "custom_findings_index" + val customFindingsIndexPattern1 = "custom_findings_index-1" + val customAlertsIndex = "custom_alerts_index" + val customAlertsHistoryIndex = "custom_alerts_history_index" + val customAlertsHistoryIndexPattern = "" + var monitor1 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput1), + triggers = listOf(trigger1), + dataSources = DataSources( + findingsIndex = customFindingsIndex1, + findingsIndexPattern = customFindingsIndexPattern1, + alertsIndex = customAlertsIndex, + alertsHistoryIndex = customAlertsHistoryIndex, + alertsHistoryIndexPattern = customAlertsHistoryIndexPattern + ) + ) + val monitorResponse = createMonitor(monitor1)!! + + val docQuery2 = DocLevelQuery(query = "source.ip.v6.v2:16645", name = "4") + val docLevelInput2 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery2)) + val trigger2 = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val customFindingsIndex2 = "custom_findings_index_2" + val customFindingsIndexPattern2 = "custom_findings_index-2" + var monitor2 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput2), + triggers = listOf(trigger2), + dataSources = DataSources( + findingsIndex = customFindingsIndex2, + findingsIndexPattern = customFindingsIndexPattern2, + alertsIndex = customAlertsIndex, + alertsHistoryIndex = customAlertsHistoryIndex, + alertsHistoryIndexPattern = customAlertsHistoryIndexPattern + ) + ) + + val monitorResponse2 = createMonitor(monitor2)!! + val andTrigger = randomChainedAlertTrigger( + name = "1And2", + condition = Script("monitor[id=${monitorResponse.id}] && monitor[id=${monitorResponse2.id}]") + ) + val notTrigger = randomChainedAlertTrigger( + name = "Not1OrNot2", + condition = Script("!monitor[id=${monitorResponse.id}] || !monitor[id=${monitorResponse2.id}]") + ) + var workflow = randomWorkflow( + monitorIds = listOf(monitorResponse.id, monitorResponse2.id), + triggers = listOf(andTrigger, notTrigger) + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id) + assertNotNull(workflowById) + val workflowId = workflowResponse.id + + var executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + var triggerResults = executeWorkflowResponse.workflowRunResult.triggerResults + Assert.assertEquals(triggerResults.size, 2) + Assert.assertTrue(triggerResults.containsKey(andTrigger.id)) + Assert.assertTrue(triggerResults.containsKey(notTrigger.id)) + var andTriggerResult = triggerResults[andTrigger.id] + var notTriggerResult = triggerResults[notTrigger.id] + Assert.assertTrue(notTriggerResult!!.triggered) + Assert.assertFalse(andTriggerResult!!.triggered) + var res = + getWorkflowAlerts(workflowId = workflowId, alertIndex = customAlertsIndex, associatedAlertsIndex = customAlertsHistoryIndex) + var chainedAlerts = res.alerts + Assert.assertTrue(chainedAlerts.size == 1) + Assert.assertTrue(res.associatedAlerts.isEmpty()) + verifyAcknowledgeChainedAlerts(chainedAlerts, workflowId, 1) + Assert.assertTrue(chainedAlerts[0].executionId == executeWorkflowResponse.workflowRunResult.executionId) + Assert.assertTrue(chainedAlerts[0].monitorId == "") + Assert.assertTrue(chainedAlerts[0].triggerId == notTrigger.id) + var testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(ChronoUnit.MILLIS)) + // Matches monitor1 + val testDoc1 = """{ + "message" : "This is an error from IAD region", + "source.ip.v6.v2" : 16644, + "test_strict_date_time" : "$testTime", + "test_field_1" : "us-west-2" + }""" + indexDoc(index, "1", testDoc1) + + testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(ChronoUnit.MILLIS)) + // Matches monitor1 and monitor2 + val testDoc2 = """{ + "message" : "This is an error from IAD region", + "source.ip.v6.v2" : 16645, + "test_strict_date_time" : "$testTime", + "test_field_1" : "us-west-2" + }""" + indexDoc(index, "2", testDoc2) + + testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(ChronoUnit.MILLIS)) + // Doesn't match + val testDoc3 = """{ + "message" : "This is an error from IAD region", + "source.ip.v6.v2" : 16645, + "test_strict_date_time" : "$testTime", + "test_field_1" : "us-east-1" + }""" + indexDoc(index, "3", testDoc3) + executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + triggerResults = executeWorkflowResponse.workflowRunResult.triggerResults + Assert.assertEquals(triggerResults.size, 2) + Assert.assertTrue(triggerResults.containsKey(andTrigger.id)) + Assert.assertTrue(triggerResults.containsKey(notTrigger.id)) + andTriggerResult = triggerResults[andTrigger.id] + notTriggerResult = triggerResults[notTrigger.id] + Assert.assertFalse(notTriggerResult!!.triggered) + Assert.assertTrue(andTriggerResult!!.triggered) + res = getWorkflowAlerts(workflowId, alertIndex = customAlertsIndex, associatedAlertsIndex = customAlertsHistoryIndex) + chainedAlerts = res.alerts + val numChainedAlerts = 1 + Assert.assertTrue(res.associatedAlerts.isNotEmpty()) + Assert.assertTrue(chainedAlerts.size == numChainedAlerts) + Assert.assertTrue(chainedAlerts[0].executionId == executeWorkflowResponse.workflowRunResult.executionId) + Assert.assertTrue(chainedAlerts[0].monitorId == "") + Assert.assertTrue(chainedAlerts[0].triggerId == andTrigger.id) + val monitorsRunResults = executeWorkflowResponse.workflowRunResult.monitorRunResults + assertEquals(2, monitorsRunResults.size) - fun `test execute workflow with bucket-level and doc-level chained monitors`() { - createTestIndex(TEST_HR_INDEX) + assertEquals(monitor1.name, monitorsRunResults[0].monitorName) + assertEquals(1, monitorsRunResults[0].triggerResults.size) - val compositeSources = listOf( - TermsValuesSourceBuilder("test_field").field("test_field") + Assert.assertEquals(monitor2.name, monitorsRunResults[1].monitorName) + Assert.assertEquals(1, monitorsRunResults[1].triggerResults.size) + + Assert.assertEquals( + monitor1.dataSources.alertsHistoryIndex, + CompositeWorkflowRunner.getDelegateMonitorAlertIndex(dataSources = monitor1.dataSources, workflow, true) ) - val compositeAgg = CompositeAggregationBuilder("composite_agg", compositeSources) - val input = SearchInput( - indices = listOf(TEST_HR_INDEX), - query = SearchSourceBuilder().size(0).query(QueryBuilders.matchAllQuery()).aggregation(compositeAgg) + val alerts = getAuditStateAlerts( + monitorId = monitorResponse.id, executionId = executeWorkflowResponse.workflowRunResult.executionId, + alertsIndex = monitor1.dataSources.alertsHistoryIndex, ) - val triggerScript = """ - params.docCount > 0 - """.trimIndent() + assertAuditStateAlerts(monitorResponse.id, alerts) + assertFindings(monitorResponse.id, customFindingsIndex1, 2, 2, listOf("1", "2")) + val associatedAlertIds = res.associatedAlerts.map { it.id }.toList() + associatedAlertIds.containsAll(alerts.map { it.id }.toList()) + val alerts1 = getAuditStateAlerts( + alertsIndex = monitor2.dataSources.alertsHistoryIndex, monitorId = monitorResponse2.id, + executionId = executeWorkflowResponse.workflowRunResult.executionId, + ) + assertAuditStateAlerts(monitorResponse2.id, alerts1) + assertFindings(monitorResponse2.id, customFindingsIndex2, 1, 1, listOf("2")) + associatedAlertIds.containsAll(alerts1.map { it.id }.toList()) + verifyAcknowledgeChainedAlerts(chainedAlerts, workflowId, numChainedAlerts) + } - var trigger = randomBucketLevelTrigger() - trigger = trigger.copy( - bucketSelector = BucketSelectorExtAggregationBuilder( - name = trigger.id, - bucketsPathsMap = mapOf("docCount" to "_count"), - script = Script(triggerScript), - parentBucketPath = "composite_agg", - filter = null - ), - actions = listOf() + fun `test chained alerts for AND OR and NOT conditions`() { + val docQuery1 = DocLevelQuery(query = "test_field_1:\"us-west-2\"", name = "3") + val docLevelInput1 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1)) + val trigger1 = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val customFindingsIndex1 = "custom_findings_index" + val customFindingsIndexPattern1 = "custom_findings_index-1" + var monitor1 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput1), + triggers = listOf(trigger1), + dataSources = DataSources( + findingsIndex = customFindingsIndex1, + findingsIndexPattern = customFindingsIndexPattern1 + ) ) - val bucketMonitor = createMonitor( - randomBucketLevelMonitor( - inputs = listOf(input), - enabled = false, - triggers = listOf(trigger) + val monitorResponse = createMonitor(monitor1)!! + + val docQuery2 = DocLevelQuery(query = "source.ip.v6.v2:16645", name = "4") + val docLevelInput2 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery2)) + val trigger2 = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val customFindingsIndex2 = "custom_findings_index_2" + val customFindingsIndexPattern2 = "custom_findings_index-2" + var monitor2 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput2), + triggers = listOf(trigger2), + dataSources = DataSources( + findingsIndex = customFindingsIndex2, + findingsIndexPattern = customFindingsIndexPattern2 ) ) - assertNotNull("The bucket monitor was not created", bucketMonitor) - val docQuery1 = DocLevelQuery(query = "test_field:\"a\"", name = "3") - var monitor1 = randomDocumentLevelMonitor( - inputs = listOf(DocLevelMonitorInput("description", listOf(TEST_HR_INDEX), listOf(docQuery1))), - triggers = listOf(randomDocumentLevelTrigger(condition = ALWAYS_RUN)) + val monitorResponse2 = createMonitor(monitor2)!! + val andTrigger = randomChainedAlertTrigger( + name = "1And2", + condition = Script("monitor[id=${monitorResponse.id}] && monitor[id=${monitorResponse2.id}]") ) - val docMonitor = createMonitor(monitor1)!! - assertNotNull("The doc level monitor was not created", docMonitor) + val notTrigger = randomChainedAlertTrigger( + name = "Not1OrNot2", + condition = Script("!monitor[id=${monitorResponse.id}] || !monitor[id=${monitorResponse2.id}]") + ) + var workflow = randomWorkflow( + monitorIds = listOf(monitorResponse.id, monitorResponse2.id), + triggers = listOf(andTrigger, notTrigger) + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id) + assertNotNull(workflowById) + val workflowId = workflowResponse.id - val workflow = randomWorkflow(monitorIds = listOf(bucketMonitor!!.id, docMonitor.id)) - val workflowResponse = upsertWorkflow(workflow) - assertNotNull("The workflow was not created", workflowResponse) + var executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + var triggerResults = executeWorkflowResponse.workflowRunResult.triggerResults + Assert.assertEquals(triggerResults.size, 2) + Assert.assertTrue(triggerResults.containsKey(andTrigger.id)) + Assert.assertTrue(triggerResults.containsKey(notTrigger.id)) + var andTriggerResult = triggerResults[andTrigger.id] + var notTriggerResult = triggerResults[notTrigger.id] + Assert.assertTrue(notTriggerResult!!.triggered) + Assert.assertFalse(andTriggerResult!!.triggered) + var res = getWorkflowAlerts( + workflowId, + ) + var chainedAlerts = res.alerts + Assert.assertTrue(chainedAlerts.size == 1) + Assert.assertTrue(res.associatedAlerts.isEmpty()) + verifyAcknowledgeChainedAlerts(chainedAlerts, workflowId, 1) + Assert.assertTrue(chainedAlerts[0].executionId == executeWorkflowResponse.workflowRunResult.executionId) + Assert.assertTrue(chainedAlerts[0].monitorId == "") + Assert.assertTrue(chainedAlerts[0].triggerId == notTrigger.id) + var testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(ChronoUnit.MILLIS)) + // Matches monitor1 + val testDoc1 = """{ + "message" : "This is an error from IAD region", + "source.ip.v6.v2" : 16644, + "test_strict_date_time" : "$testTime", + "test_field_1" : "us-west-2" + }""" + indexDoc(index, "1", testDoc1) - // Add a doc that is accessible to the user - indexDoc( - TEST_HR_INDEX, - "1", - """ - { - "test_field": "a", - "accessible": true - } - """.trimIndent() - ) + testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(ChronoUnit.MILLIS)) + // Matches monitor1 and monitor2 + val testDoc2 = """{ + "message" : "This is an error from IAD region", + "source.ip.v6.v2" : 16645, + "test_strict_date_time" : "$testTime", + "test_field_1" : "us-west-2" + }""" + indexDoc(index, "2", testDoc2) - // Add a second doc that is not accessible to the user - indexDoc( - TEST_HR_INDEX, - "2", - """ - { - "test_field": "b", - "accessible": false - } - """.trimIndent() - ) + testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(ChronoUnit.MILLIS)) + // Doesn't match + val testDoc3 = """{ + "message" : "This is an error from IAD region", + "source.ip.v6.v2" : 16645, + "test_strict_date_time" : "$testTime", + "test_field_1" : "us-east-1" + }""" + indexDoc(index, "3", testDoc3) + executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + triggerResults = executeWorkflowResponse.workflowRunResult.triggerResults + Assert.assertEquals(triggerResults.size, 2) + Assert.assertTrue(triggerResults.containsKey(andTrigger.id)) + Assert.assertTrue(triggerResults.containsKey(notTrigger.id)) + andTriggerResult = triggerResults[andTrigger.id] + notTriggerResult = triggerResults[notTrigger.id] + Assert.assertFalse(notTriggerResult!!.triggered) + Assert.assertTrue(andTriggerResult!!.triggered) + res = getWorkflowAlerts(workflowId) + chainedAlerts = res.alerts + Assert.assertTrue(chainedAlerts.size == 1) + Assert.assertTrue(res.associatedAlerts.isNotEmpty()) + Assert.assertTrue(chainedAlerts[0].executionId == executeWorkflowResponse.workflowRunResult.executionId) + Assert.assertTrue(chainedAlerts[0].monitorId == "") + Assert.assertTrue(chainedAlerts[0].triggerId == andTrigger.id) + val monitorsRunResults = executeWorkflowResponse.workflowRunResult.monitorRunResults + assertEquals(2, monitorsRunResults.size) - indexDoc( - TEST_HR_INDEX, - "3", - """ - { - "test_field": "c", - "accessible": true - } - """.trimIndent() + assertEquals(monitor1.name, monitorsRunResults[0].monitorName) + assertEquals(1, monitorsRunResults[0].triggerResults.size) + + Assert.assertEquals(monitor2.name, monitorsRunResults[1].monitorName) + Assert.assertEquals(1, monitorsRunResults[1].triggerResults.size) + + Assert.assertEquals( + monitor1.dataSources.alertsHistoryIndex, + CompositeWorkflowRunner.getDelegateMonitorAlertIndex(dataSources = monitor1.dataSources, workflow, true) + ) + val alerts = getAuditStateAlerts( + alertsIndex = monitor1.dataSources.alertsHistoryIndex, monitorId = monitorResponse.id, + executionId = executeWorkflowResponse.workflowRunResult.executionId ) + val associatedAlertIds = res.associatedAlerts.map { it.id }.toList() + associatedAlertIds.containsAll(alerts.map { it.id }.toList()) + assertAuditStateAlerts(monitorResponse.id, alerts) + assertFindings(monitorResponse.id, customFindingsIndex1, 2, 2, listOf("1", "2")) - val executeResult = executeWorkflow(id = workflowResponse!!.id) - assertNotNull(executeResult) - assertEquals(2, executeResult!!.workflowRunResult.workflowRunResult.size) + val alerts1 = getAuditStateAlerts( + alertsIndex = monitor2.dataSources.alertsHistoryIndex, monitorId = monitorResponse2.id, + executionId = executeWorkflowResponse.workflowRunResult.executionId + ) + associatedAlertIds.containsAll(alerts1.map { it.id }.toList()) + assertAuditStateAlerts(monitorResponse2.id, alerts1) + assertFindings(monitorResponse2.id, customFindingsIndex2, 1, 1, listOf("2")) + verifyAcknowledgeChainedAlerts(chainedAlerts, workflowId, 1) } + private fun getDelegateMonitorMetadataId( + workflowMetadata: WorkflowMetadata?, + monitorResponse: IndexMonitorResponse, + ) = "${workflowMetadata!!.id}-${monitorResponse.id}-metadata" + fun `test create workflow success`() { val docQuery1 = DocLevelQuery(query = "source.ip.v6.v1:12345", name = "3") val docLevelInput = DocLevelMonitorInput( @@ -3661,7 +4616,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val workflowId = workflowResponse.id val executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! - val monitorsRunResults = executeWorkflowResponse.workflowRunResult.workflowRunResult + val monitorsRunResults = executeWorkflowResponse.workflowRunResult.monitorRunResults assertEquals(2, monitorsRunResults.size) val workflowMetadata = searchWorkflowMetadata(workflowId) @@ -3727,11 +4682,6 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { } } - private fun getDelegateMonitorMetadataId( - workflowMetadata: WorkflowMetadata?, - monitorResponse: IndexMonitorResponse, - ) = "${workflowMetadata!!.id}-${monitorResponse.id}-metadata" - fun `test delete workflow delegate monitor part of another workflow not deleted`() { val docLevelInput = DocLevelMonitorInput( "description", listOf(index), listOf(DocLevelQuery(query = "source.ip.v6.v1:12345", name = "3")) @@ -4141,7 +5091,7 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { val queryMonitor = randomQueryLevelMonitor() val queryMonitorResponse = createMonitor(queryMonitor)!! - var workflow = randomWorkflow( + val workflow = randomWorkflow( monitorIds = listOf(queryMonitorResponse.id, docMonitorResponse.id) ) try { @@ -4316,4 +5266,144 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { } } } + + fun `test create workflow with chained alert triggers`() { + val docQuery1 = DocLevelQuery(query = "source.ip.v6.v1:12345", name = "3") + val docLevelInput = DocLevelMonitorInput( + "description", listOf(index), listOf(docQuery1) + ) + val trigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val customFindingsIndex = "custom_findings_index" + val customFindingsIndexPattern = "custom_findings_index-1" + val customQueryIndex = "custom_alerts_index" + val monitor1 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(trigger), + dataSources = DataSources( + queryIndex = customQueryIndex, + findingsIndex = customFindingsIndex, + findingsIndexPattern = customFindingsIndexPattern + ) + ) + + val monitor2 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(trigger), + dataSources = DataSources( + queryIndex = customQueryIndex, + findingsIndex = customFindingsIndex, + findingsIndexPattern = customFindingsIndexPattern + ) + ) + + val monitorResponse1 = createMonitor(monitor1)!! + val monitorResponse2 = createMonitor(monitor2)!! + + val chainedAlertTrigger1 = randomChainedAlertTrigger( + condition = Script("monitor[id=${monitorResponse1.id}] && monitor[id=${monitorResponse2.id}") + ) + val chainedAlertTrigger2 = randomChainedAlertTrigger( + condition = Script("monitor[id=${monitorResponse1.id}] || monitor[id=${monitorResponse2.id}]") + ) + val workflow = randomWorkflow( + monitorIds = listOf(monitorResponse1.id, monitorResponse2.id), + triggers = listOf( + chainedAlertTrigger1, + chainedAlertTrigger2 + ) + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id)!! + + assertEquals("Workflow input not correct", workflowById.triggers.size, 2) + assertEquals("Workflow input not correct", workflowById.triggers.get(0).name, chainedAlertTrigger1.name) + assertEquals("Workflow input not correct", workflowById.triggers.get(1).name, chainedAlertTrigger2.name) + assertEquals("Workflow input not correct", workflowById.triggers.get(0).id, chainedAlertTrigger1.id) + assertEquals("Workflow input not correct", workflowById.triggers.get(1).id, chainedAlertTrigger2.id) + assertEquals( + "Workflow input not correct", + (workflowById.triggers.get(0) as ChainedAlertTrigger).condition.idOrCode, + chainedAlertTrigger1.condition.idOrCode + ) + assertEquals( + "Workflow input not correct", + (workflowById.triggers.get(1) as ChainedAlertTrigger).condition.idOrCode, + chainedAlertTrigger2.condition.idOrCode + ) + } + + fun `test postIndex on workflow update with trigger deletion`() { + val monitorRunnerService = getInstanceFromNode(MonitorRunnerService.javaClass) + val docQuery1 = DocLevelQuery(query = "test_field_1:\"us-west-2\"", name = "3") + val docLevelInput1 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1)) + val trigger1 = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + var monitor1 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput1), + triggers = listOf(trigger1) + ) + var monitor2 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput1), + triggers = listOf(trigger1) + ) + val monitorResponse = createMonitor(monitor1)!! + val monitorResponse2 = createMonitor(monitor2)!! + + val andTrigger = randomChainedAlertTrigger( + name = "1And2", + condition = Script("monitor[id=${monitorResponse.id}] && monitor[id=${monitorResponse2.id}]") + ) + val notTrigger = randomChainedAlertTrigger( + name = "Not1OrNot2", + condition = Script("!monitor[id=${monitorResponse.id}] || !monitor[id=${monitorResponse2.id}]") + ) + var workflow = randomWorkflow( + monitorIds = listOf(monitorResponse.id, monitorResponse2.id), + triggers = listOf(andTrigger) + ) + val workflowResponse = upsertWorkflow(workflow)!! + val workflowById = searchWorkflow(workflowResponse.id) + val testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(MILLIS)) + val testDoc1 = """{ + "message" : "This is an error from IAD region", + "source.ip.v6.v2" : 16644, + "test_strict_date_time" : "$testTime", + "test_field_1" : "us-west-2" + }""" + indexDoc(index, "1", testDoc1) + val workflowId = workflowById!!.id + var executeWorkflowResponse = executeWorkflow(workflowById, workflowId, false)!! + var res = getWorkflowAlerts( + workflowId, + ) + var chainedAlerts = res.alerts + Assert.assertTrue(chainedAlerts.size == 1) + val updatedWorkflowResponse = upsertWorkflow( + workflowById.copy(triggers = listOf(notTrigger)), + workflowResponse.id, + RestRequest.Method.PUT + )!! + val updatedWorkflow = searchWorkflow(workflowResponse.id) + Assert.assertTrue(updatedWorkflow!!.triggers.size == 1) + Assert.assertTrue(updatedWorkflow.triggers[0].id == notTrigger.id) + OpenSearchTestCase.waitUntil({ + val searchRequest = SearchRequest(AlertIndices.ALERT_HISTORY_ALL) + val sr = client().search(searchRequest).get() + sr.hits.hits.size == 3 + }, 5, TimeUnit.MINUTES) + val searchRequest = SearchRequest(AlertIndices.ALERT_HISTORY_ALL) + val sr = client().search(searchRequest).get() + Assert.assertTrue(sr.hits.hits.size == 3) + val alerts = sr.hits.map { hit -> + val xcp = XContentHelper.createParser( + xContentRegistry(), + LoggingDeprecationHandler.INSTANCE, + hit.sourceRef, + XContentType.JSON + ) + XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_OBJECT, xcp.nextToken(), xcp) + val alert = Alert.parse(xcp, hit.id, hit.version) + alert + } + Assert.assertTrue(alerts.stream().anyMatch { it.state == Alert.State.DELETED && chainedAlerts[0].id == it.id }) + } } diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt b/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt index b72c08865..0cd03131f 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt @@ -35,6 +35,7 @@ import org.opensearch.commons.alerting.model.ActionExecutionResult import org.opensearch.commons.alerting.model.AggregationResultBucket import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.BucketLevelTrigger +import org.opensearch.commons.alerting.model.ChainedAlertTrigger import org.opensearch.commons.alerting.model.ChainedMonitorFindings import org.opensearch.commons.alerting.model.ClusterMetricsInput import org.opensearch.commons.alerting.model.CompositeInput @@ -232,7 +233,9 @@ fun randomWorkflow( schedule: Schedule = IntervalSchedule(interval = 5, unit = ChronoUnit.MINUTES), enabled: Boolean = randomBoolean(), enabledTime: Instant? = if (enabled) Instant.now().truncatedTo(ChronoUnit.MILLIS) else null, - lastUpdateTime: Instant = Instant.now().truncatedTo(ChronoUnit.MILLIS) + lastUpdateTime: Instant = Instant.now().truncatedTo(ChronoUnit.MILLIS), + triggers: List = emptyList(), + auditDelegateMonitorAlerts: Boolean? = true ): Workflow { val delegates = mutableListOf() if (!monitorIds.isNullOrEmpty()) { @@ -255,7 +258,8 @@ fun randomWorkflow( inputs = listOf(CompositeInput(Sequence(delegates))), version = -1L, schemaVersion = 0, - triggers = listOf() + triggers = triggers, + auditDelegateMonitorAlerts = auditDelegateMonitorAlerts ) } @@ -268,6 +272,7 @@ fun randomWorkflowWithDelegates( enabled: Boolean = randomBoolean(), enabledTime: Instant? = if (enabled) Instant.now().truncatedTo(ChronoUnit.MILLIS) else null, lastUpdateTime: Instant = Instant.now().truncatedTo(ChronoUnit.MILLIS), + triggers: List = emptyList() ): Workflow { return Workflow( id = id, @@ -281,7 +286,7 @@ fun randomWorkflowWithDelegates( inputs = listOf(CompositeInput(Sequence(delegates))), version = -1L, schemaVersion = 0, - triggers = emptyList() + triggers = triggers ) } @@ -771,3 +776,22 @@ fun assertUserNull(monitor: Monitor) { fun assertUserNull(workflow: Workflow) { assertNull("User is not null", workflow.user) } + +fun randomChainedAlertTrigger( + id: String = UUIDs.base64UUID(), + name: String = OpenSearchRestTestCase.randomAlphaOfLength(10), + severity: String = "1", + condition: Script = randomScript(), + actions: List = mutableListOf(), + destinationId: String = "" +): ChainedAlertTrigger { + return ChainedAlertTrigger( + id = id, + name = name, + severity = severity, + condition = condition, + actions = if (actions.isEmpty() && destinationId.isNotBlank()) { + (0..randomInt(10)).map { randomAction(destinationId = destinationId) } + } else actions + ) +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/action/GetMonitorResponseTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/action/GetMonitorResponseTests.kt index ae2e68747..6e6596951 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/action/GetMonitorResponseTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/action/GetMonitorResponseTests.kt @@ -18,7 +18,7 @@ import java.time.ZoneId class GetMonitorResponseTests : OpenSearchTestCase() { fun `test get monitor response`() { - val req = GetMonitorResponse("1234", 1L, 2L, 0L, RestStatus.OK, null) + val req = GetMonitorResponse("1234", 1L, 2L, 0L, RestStatus.OK, null, null) assertNotNull(req) val out = BytesStreamOutput() @@ -51,7 +51,7 @@ class GetMonitorResponseTests : OpenSearchTestCase() { triggers = mutableListOf(), uiMetadata = mutableMapOf() ) - val req = GetMonitorResponse("1234", 1L, 2L, 0L, RestStatus.OK, monitor) + val req = GetMonitorResponse("1234", 1L, 2L, 0L, RestStatus.OK, monitor, null) assertNotNull(req) val out = BytesStreamOutput() diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/alerts/AlertIndicesIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/alerts/AlertIndicesIT.kt index 4ce7bd2be..eddbabf90 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/alerts/AlertIndicesIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/alerts/AlertIndicesIT.kt @@ -64,7 +64,7 @@ class AlertIndicesIT : AlertingRestTestCase() { executeMonitor(createRandomMonitor()) assertIndexExists(AlertIndices.ALERT_INDEX) assertIndexExists(AlertIndices.ALERT_HISTORY_WRITE_INDEX) - verifyIndexSchemaVersion(ScheduledJob.SCHEDULED_JOBS_INDEX, 7) + verifyIndexSchemaVersion(ScheduledJob.SCHEDULED_JOBS_INDEX, 8) verifyIndexSchemaVersion(AlertIndices.ALERT_INDEX, 5) verifyIndexSchemaVersion(AlertIndices.ALERT_HISTORY_WRITE_INDEX, 5) } @@ -88,7 +88,7 @@ class AlertIndicesIT : AlertingRestTestCase() { val trueMonitor = createMonitor(randomDocumentLevelMonitor(inputs = listOf(docLevelInput), triggers = listOf(trigger))) executeMonitor(trueMonitor.id) assertIndexExists(AlertIndices.FINDING_HISTORY_WRITE_INDEX) - verifyIndexSchemaVersion(ScheduledJob.SCHEDULED_JOBS_INDEX, 7) + verifyIndexSchemaVersion(ScheduledJob.SCHEDULED_JOBS_INDEX, 8) verifyIndexSchemaVersion(AlertIndices.FINDING_HISTORY_WRITE_INDEX, 3) } diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionParserTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionParserTests.kt new file mode 100644 index 000000000..7ebc82697 --- /dev/null +++ b/alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionParserTests.kt @@ -0,0 +1,84 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition + +import org.junit.Assert +import org.opensearch.alerting.chainedAlertCondition.parsers.ChainedAlertExpressionParser +import org.opensearch.test.OpenSearchTestCase + +class ChainedAlertsExpressionParserTests : OpenSearchTestCase() { + + fun `test trigger expression posix parsing simple AND`() { + val eqString = "(monitor[id=abc] && monitor[id=xyz])" + val equation = ChainedAlertExpressionParser(eqString).parse() + val expectedEquation = "monitor[id=abc] monitor[id=xyz] && " + Assert.assertTrue(expectedEquation == equation.toString()) + } + + fun `test trigger expression posix parsing simple AND without parentheses`() { + val eqString = "monitor[id=abc] && monitor[id=xyz]" + val equation = ChainedAlertExpressionParser(eqString).parse() + val expectedEquation = "monitor[id=abc] monitor[id=xyz] && " + Assert.assertTrue(expectedEquation == equation.toString()) + } + + fun `test trigger expression posix parsing multiple AND`() { + val eqString = "(monitor[id=abc] && monitor[id=def]) && monitor[id=ghi]" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=abc] monitor[id=def] && monitor[id=ghi] && ", equation.toString()) + } + + fun `test trigger expression posix parsing multiple AND with parenthesis`() { + val eqString = "(monitor[id=sigma-123] && monitor[id=sigma-456]) && (monitor[id=sigma-789] && monitor[id=id-2aw34])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals( + "monitor[id=sigma-123] monitor[id=sigma-456] && monitor[id=sigma-789] monitor[id=id-2aw34] && && ", + equation.toString() + ) + } + + fun `test trigger expression posix parsing simple OR`() { + val eqString = "(monitor[id=sigma-123] || monitor[id=sigma-456])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=sigma-123] monitor[id=sigma-456] || ", equation.toString()) + } + + fun `test trigger expression posix parsing multiple OR`() { + val eqString = "(monitor[id=sigma-123] || monitor[id=sigma-456]) || monitor[id=sigma-789]" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=sigma-123] monitor[id=sigma-456] || monitor[id=sigma-789] || ", equation.toString()) + } + + fun `test trigger expression posix parsing multiple OR with parenthesis`() { + val eqString = "(monitor[id=sigma-123] || monitor[id=sigma-456]) || (monitor[id=sigma-789] || monitor[id=id-2aw34])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals( + "monitor[id=sigma-123] monitor[id=sigma-456] || monitor[id=sigma-789] monitor[id=id-2aw34] || || ", + equation.toString() + ) + } + + fun `test trigger expression posix parsing simple NOT`() { + val eqString = "(monitor[id=sigma-123] || !monitor[id=sigma-456])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=sigma-123] monitor[id=sigma-456] ! || ", equation.toString()) + } + + fun `test trigger expression posix parsing multiple NOT`() { + val eqString = "(monitor[id=sigma-123] && !monitor[tag=tag-456]) && !(monitor[id=sigma-789])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=sigma-123] monitor[tag=tag-456] ! && monitor[id=sigma-789] ! && ", equation.toString()) + } + + fun `test trigger expression posix parsing multiple operators with parenthesis`() { + val eqString = "(monitor[id=sigma-123] && monitor[tag=sev1]) || !(!monitor[id=sigma-789] || monitor[id=id-2aw34])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals( + "monitor[id=sigma-123] monitor[tag=sev1] && monitor[id=sigma-789] ! monitor[id=id-2aw34] || ! || ", + equation.toString() + ) + } +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionResolveTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionResolveTests.kt new file mode 100644 index 000000000..a0851d58d --- /dev/null +++ b/alerting/src/test/kotlin/org/opensearch/alerting/chainedAlertCondition/ChainedAlertsExpressionResolveTests.kt @@ -0,0 +1,118 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.chainedAlertCondition + +import org.junit.Assert +import org.opensearch.alerting.chainedAlertCondition.parsers.ChainedAlertExpressionParser +import org.opensearch.test.OpenSearchTestCase + +class ChainedAlertsExpressionResolveTests : OpenSearchTestCase() { + + fun `test chained alert trigger expression evaluation simple AND`() { + val eqString = "(monitor[id=123] && monitor[id=456])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=123] monitor[id=456] && ", equation.toString()) + val alertGeneratingMonitors: Set = setOf( + "123", + "456" + ) + Assert.assertTrue(equation.evaluate(alertGeneratingMonitors)) + val alertGeneratingMonitors2: Set = setOf( + "123", + "789" + ) + Assert.assertFalse(equation.evaluate(alertGeneratingMonitors2)) + } + + fun `test chained alert trigger expression evaluation AND with NOT`() { + val eqString = "(monitor[id=123] && !monitor[id=456])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=123] monitor[id=456] ! && ", equation.toString()) + val alertGeneratingMonitors: Set = setOf( + "123", + "456" + ) + Assert.assertFalse(equation.evaluate(alertGeneratingMonitors)) + val alertGeneratingMonitors1: Set = setOf( + "123", + "223" + ) + Assert.assertTrue(equation.evaluate(alertGeneratingMonitors1)) + } + + fun `test chained alert trigger expression evaluation simple OR`() { + val eqString = "(monitor[id=123] || monitor[id=456])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=123] monitor[id=456] || ", equation.toString()) + val alertGeneratingMonitors: Set = setOf( + "123", + "456" + ) + Assert.assertTrue(equation.evaluate(alertGeneratingMonitors)) + val alertGeneratingMonitors2: Set = setOf( + "234", + "567" + ) + Assert.assertFalse(equation.evaluate(alertGeneratingMonitors2)) + } + + fun `test chained alert trigger expression evaluation OR with NOT`() { + val eqString = "(monitor[id=123] || !monitor[id=456])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=123] monitor[id=456] ! || ", equation.toString()) + val alertGeneratingMonitors: Set = setOf( + "123", + "456" + ) + Assert.assertTrue(equation.evaluate(alertGeneratingMonitors)) + val alertGeneratingMonitors2: Set = setOf( + "456" + ) + Assert.assertFalse(equation.evaluate(alertGeneratingMonitors2)) + } + + fun `test chained alert trigger expression evaluation simple NOT`() { + val eqString = "!(monitor[id=456])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals("monitor[id=456] ! ", equation.toString()) + val alertGeneratingMonitors: Set = setOf( + "123" + ) + Assert.assertTrue(equation.evaluate(alertGeneratingMonitors)) + val alertGeneratingMonitors2: Set = setOf( + "456" + ) + Assert.assertFalse(equation.evaluate(alertGeneratingMonitors2)) + } + + fun `test chained alert trigger expression evaluation with multiple operators with parenthesis`() { + val eqString = "(monitor[id=123] && monitor[id=456]) || !(!monitor[id=789] || monitor[id=abc])" + val equation = ChainedAlertExpressionParser(eqString).parse() + Assert.assertEquals( + "monitor[id=123] monitor[id=456] && monitor[id=789] ! monitor[id=abc] || ! || ", + equation.toString() + ) + // part 1 evaluates, part 2 evaluates + val alertGeneratingMonitors1: Set = setOf( + "123", + "456", + "789", + "abc" + ) + Assert.assertTrue(equation.evaluate(alertGeneratingMonitors1)) + // part 1 not evaluates, part 2 not evaluates + val alertGeneratingMonitors2: Set = setOf( + "789", + "abc" + ) + Assert.assertFalse(equation.evaluate(alertGeneratingMonitors2)) + // part 1 not evaluates, part 2 evaluates + val alertGeneratingMonitors3: Set = setOf( + "789" + ) + Assert.assertTrue(equation.evaluate(alertGeneratingMonitors3)) + } +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/WorkflowRestApiIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/WorkflowRestApiIT.kt index 82dd701c2..de31fab55 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/WorkflowRestApiIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/resthandler/WorkflowRestApiIT.kt @@ -5,31 +5,39 @@ package org.opensearch.alerting.resthandler +import org.junit.Assert import org.opensearch.alerting.ALWAYS_RUN import org.opensearch.alerting.AlertingRestTestCase import org.opensearch.alerting.WORKFLOW_ALERTING_BASE_URI import org.opensearch.alerting.makeRequest import org.opensearch.alerting.randomBucketLevelMonitor +import org.opensearch.alerting.randomChainedAlertTrigger import org.opensearch.alerting.randomDocumentLevelMonitor import org.opensearch.alerting.randomDocumentLevelTrigger import org.opensearch.alerting.randomQueryLevelMonitor +import org.opensearch.alerting.randomQueryLevelTrigger +import org.opensearch.alerting.randomUser import org.opensearch.alerting.randomWorkflow import org.opensearch.alerting.randomWorkflowWithDelegates import org.opensearch.client.ResponseException +import org.opensearch.commons.alerting.model.ChainedAlertTrigger import org.opensearch.commons.alerting.model.ChainedMonitorFindings import org.opensearch.commons.alerting.model.CompositeInput import org.opensearch.commons.alerting.model.Delegate import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.commons.alerting.model.IntervalSchedule import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.SearchInput import org.opensearch.commons.alerting.model.Workflow import org.opensearch.index.query.QueryBuilders import org.opensearch.rest.RestStatus +import org.opensearch.script.Script import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.test.junit.annotations.TestLogging import java.time.Instant +import java.time.temporal.ChronoUnit import java.util.Collections import java.util.Locale import java.util.UUID @@ -95,7 +103,11 @@ class WorkflowRestApiIT : AlertingRestTestCase() { val bucketLevelMonitorResponse = createMonitor(bucketLevelMonitor) val workflow = randomWorkflow( - monitorIds = listOf(docLevelMonitorResponse.id, bucketLevelMonitorResponse.id) + monitorIds = listOf(docLevelMonitorResponse.id, bucketLevelMonitorResponse.id), + triggers = listOf( + randomChainedAlertTrigger(condition = Script("trigger1")), + randomChainedAlertTrigger(condition = Script("trigger2")) + ) ) val createResponse = client().makeRequest("POST", WORKFLOW_ALERTING_BASE_URI, emptyMap(), workflow.toHttpEntity()) @@ -137,6 +149,12 @@ class WorkflowRestApiIT : AlertingRestTestCase() { assertEquals( "Delegate2 Chained finding not correct", docLevelMonitorResponse.id, delegate2.chainedMonitorFindings!!.monitorId ) + + assertEquals(workflowById.triggers.size, 2) + assertTrue(workflowById.triggers[0] is ChainedAlertTrigger) + assertTrue(workflowById.triggers[1] is ChainedAlertTrigger) + assertTrue((workflowById.triggers[0] as ChainedAlertTrigger).condition == Script("trigger1")) + assertTrue((workflowById.triggers[1] as ChainedAlertTrigger).condition == Script("trigger2")) } fun `test create workflow without delegate failure`() { @@ -1004,4 +1022,88 @@ class WorkflowRestApiIT : AlertingRestTestCase() { assertEquals(RestStatus.NOT_FOUND, e.response.restStatus()) } } + + fun `test chained alerts and audit alerts for workflows with query level monitor`() { + val index = createTestIndex() + val docQuery1 = DocLevelQuery(query = "test_field:\"test_value_1\"", name = "3") + val docLevelInput1 = DocLevelMonitorInput("description", listOf(index), listOf(docQuery1)) + val trigger1 = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + var monitor1 = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput1), + triggers = listOf(trigger1), + enabled = false + ) + val monitorResponse = createMonitor(monitor1)!! + var monitor2 = randomQueryLevelMonitor( + triggers = listOf(randomQueryLevelTrigger(condition = Script("return true"))), + enabled = false + ) + + val monitorResponse2 = createMonitor(monitor2)!! + val andTrigger = randomChainedAlertTrigger( + name = "1And2", + condition = Script("monitor[id=${monitorResponse.id}] && monitor[id=${monitorResponse2.id}]") + ) + + val workflow = Workflow( + id = "", + version = 2, + name = "test", + enabled = false, + schedule = IntervalSchedule(5, ChronoUnit.MINUTES), + lastUpdateTime = Instant.now(), + enabledTime = null, + workflowType = Workflow.WorkflowType.COMPOSITE, + user = randomUser(), + schemaVersion = -1, + inputs = listOf( + CompositeInput( + org.opensearch.commons.alerting.model.Sequence( + delegates = listOf( + Delegate(1, monitorResponse.id), + Delegate(2, monitorResponse2.id) + ) + ) + ) + ), + owner = "alerting", + triggers = listOf(andTrigger) + ) + val workflowById = createWorkflow(workflow)!! + assertNotNull(workflowById) + val workflowId = workflowById.id + + insertSampleTimeSerializedData( + index, + listOf( + "test_value_1" + ) + ) + + val response = executeWorkflow(workflowId = workflowId, params = emptyMap()) + val executeWorkflowResponse = entityAsMap(response) + logger.info(executeWorkflowResponse) + val executionId = executeWorkflowResponse["execution_id"] + Assert.assertTrue(executeWorkflowResponse.containsKey("trigger_results")) + val workflowTriggerResults = executeWorkflowResponse["trigger_results"] as Map + assertEquals(workflowTriggerResults.size, 1) + assertTrue( + (workflowTriggerResults[andTrigger.id] as Map)["triggered"] as Boolean + ) + val res = getWorkflowAlerts(workflowId, true) + val getWorkflowAlerts = entityAsMap(res) + Assert.assertTrue(getWorkflowAlerts.containsKey("alerts")) + Assert.assertTrue(getWorkflowAlerts.containsKey("associatedAlerts")) + val alerts = getWorkflowAlerts["alerts"] as List> + assertEquals(alerts.size, 1) + Assert.assertEquals(alerts[0]["execution_id"], executionId) + Assert.assertEquals(alerts[0]["workflow_id"], workflowId) + Assert.assertEquals(alerts[0]["monitor_id"], "") + val associatedAlerts = getWorkflowAlerts["associatedAlerts"] as List> + assertEquals(associatedAlerts.size, 2) + val ackRes = acknowledgeChainedAlerts(workflowId, alerts[0]["id"].toString()) + val acknowledgeChainedAlertsResponse = entityAsMap(ackRes) + val acknowledged = acknowledgeChainedAlertsResponse["success"] as List + Assert.assertEquals(acknowledged[0], alerts[0]["id"]) + } } diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/transport/AlertingSingleNodeTestCase.kt b/alerting/src/test/kotlin/org/opensearch/alerting/transport/AlertingSingleNodeTestCase.kt index f4e58f328..79c970d3b 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/transport/AlertingSingleNodeTestCase.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/transport/AlertingSingleNodeTestCase.kt @@ -37,6 +37,8 @@ import org.opensearch.commons.alerting.action.DeleteMonitorRequest import org.opensearch.commons.alerting.action.DeleteWorkflowRequest import org.opensearch.commons.alerting.action.GetFindingsRequest import org.opensearch.commons.alerting.action.GetFindingsResponse +import org.opensearch.commons.alerting.action.GetWorkflowAlertsRequest +import org.opensearch.commons.alerting.action.GetWorkflowAlertsResponse import org.opensearch.commons.alerting.action.GetWorkflowRequest import org.opensearch.commons.alerting.action.GetWorkflowResponse import org.opensearch.commons.alerting.action.IndexMonitorRequest @@ -52,6 +54,7 @@ import org.opensearch.commons.alerting.model.Workflow import org.opensearch.core.xcontent.XContentBuilder import org.opensearch.core.xcontent.XContentParser import org.opensearch.index.IndexService +import org.opensearch.index.query.BoolQueryBuilder import org.opensearch.index.query.TermQueryBuilder import org.opensearch.index.reindex.ReindexPlugin import org.opensearch.index.seqno.SequenceNumbers @@ -68,6 +71,7 @@ import java.time.ZonedDateTime import java.time.format.DateTimeFormatter import java.time.temporal.ChronoUnit import java.util.Locale +import java.util.concurrent.TimeUnit /** * A test that keep a singleton node started for all tests that can be used to get @@ -245,7 +249,12 @@ abstract class AlertingSingleNodeTestCase : OpenSearchSingleNodeTestCase() { return true } - protected fun searchAlerts(id: String, indices: String = AlertIndices.ALERT_INDEX, refresh: Boolean = true): List { + protected fun searchAlerts( + monitorId: String, + indices: String = AlertIndices.ALERT_INDEX, + refresh: Boolean = true, + executionId: String? = null, + ): List { try { if (refresh) refreshIndex(indices) } catch (e: Exception) { @@ -254,8 +263,13 @@ abstract class AlertingSingleNodeTestCase : OpenSearchSingleNodeTestCase() { } val ssb = SearchSourceBuilder() ssb.version(true) - ssb.query(TermQueryBuilder(Alert.MONITOR_ID_FIELD, id)) - val searchResponse = client().prepareSearch(indices).setRouting(id).setSource(ssb).get() + val bqb = BoolQueryBuilder() + bqb.must(TermQueryBuilder(Alert.MONITOR_ID_FIELD, monitorId)) + if (executionId.isNullOrEmpty() == false) { + bqb.must(TermQueryBuilder(Alert.EXECUTION_ID_FIELD, executionId)) + } + ssb.query(bqb) + val searchResponse = client().prepareSearch(indices).setRouting(monitorId).setSource(ssb).get() return searchResponse.hits.hits.map { val xcp = createParser(JsonXContent.jsonXContent, it.sourceRef).also { it.nextToken() } @@ -263,6 +277,30 @@ abstract class AlertingSingleNodeTestCase : OpenSearchSingleNodeTestCase() { } } + protected fun getWorkflowAlerts( + workflowId: String, + getAssociatedAlerts: Boolean? = true, + alertState: Alert.State? = Alert.State.ACTIVE, + alertIndex: String? = "", + associatedAlertsIndex: String? = "", + ): GetWorkflowAlertsResponse { + val table = Table("asc", "monitor_id", null, 100, 0, null) + return client().execute( + AlertingActions.GET_WORKFLOW_ALERTS_ACTION_TYPE, + GetWorkflowAlertsRequest( + table = table, + severityLevel = "ALL", + alertState = alertState!!.name, + alertIndex = alertIndex, + associatedAlertsIndex = associatedAlertsIndex, + monitorIds = emptyList(), + workflowIds = listOf(workflowId), + alertIds = emptyList(), + getAssociatedAlerts = getAssociatedAlerts!! + ) + ).get() + } + protected fun refreshIndex(index: String) { client().execute(RefreshAction.INSTANCE, RefreshRequest(index)).get() } @@ -270,7 +308,7 @@ abstract class AlertingSingleNodeTestCase : OpenSearchSingleNodeTestCase() { protected fun searchFindings( id: String, indices: String = AlertIndices.ALL_FINDING_INDEX_PATTERN, - refresh: Boolean = true + refresh: Boolean = true, ): List { if (refresh) refreshIndex(indices) @@ -305,7 +343,7 @@ abstract class AlertingSingleNodeTestCase : OpenSearchSingleNodeTestCase() { protected fun getMonitorResponse( monitorId: String, version: Long = 1L, - fetchSourceContext: FetchSourceContext = FetchSourceContext.FETCH_SOURCE + fetchSourceContext: FetchSourceContext = FetchSourceContext.FETCH_SOURCE, ) = client().execute( GetMonitorAction.INSTANCE, GetMonitorRequest(monitorId, version, RestRequest.Method.GET, fetchSourceContext) @@ -435,6 +473,7 @@ abstract class AlertingSingleNodeTestCase : OpenSearchSingleNodeTestCase() { return client().execute(AlertingActions.INDEX_WORKFLOW_ACTION_TYPE, request).actionGet() } + protected fun getWorkflowById(id: String): GetWorkflowResponse { return client().execute( AlertingActions.GET_WORKFLOW_ACTION_TYPE, @@ -453,4 +492,12 @@ abstract class AlertingSingleNodeTestCase : OpenSearchSingleNodeTestCase() { val request = ExecuteWorkflowRequest(dryRun, TimeValue(Instant.now().toEpochMilli()), id, workflow) return client().execute(ExecuteWorkflowAction.INSTANCE, request).get() } + + override fun nodeSettings(): Settings { + return Settings.builder() + .put(super.nodeSettings()) + .put("opendistro.scheduled_jobs.sweeper.period", TimeValue(5, TimeUnit.SECONDS)) + .put("opendistro.scheduled_jobs.enabled", true) + .build() + } } diff --git a/core/src/main/resources/mappings/scheduled-jobs.json b/core/src/main/resources/mappings/scheduled-jobs.json index 2e844d300..2651c862e 100644 --- a/core/src/main/resources/mappings/scheduled-jobs.json +++ b/core/src/main/resources/mappings/scheduled-jobs.json @@ -1,6 +1,6 @@ { "_meta" : { - "schema_version": 7 + "schema_version": 8 }, "properties": { "monitor": { @@ -369,6 +369,9 @@ "enabled": { "type": "boolean" }, + "audit_delegate_monitor_alerts": { + "type": "boolean" + }, "enabled_time": { "type": "date", "format": "strict_date_time||epoch_millis"