diff --git a/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/inventory_metric_threshold_executor.ts b/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/inventory_metric_threshold_executor.ts index f5937823d0e82..aa4f3b5a5e82f 100644 --- a/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/inventory_metric_threshold_executor.ts +++ b/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/inventory_metric_threshold_executor.ts @@ -19,9 +19,10 @@ import { AlertInstanceContext as AlertContext, AlertInstanceState as AlertState, } from '@kbn/alerting-plugin/common'; -import { AlertsClientError, RuleTypeState } from '@kbn/alerting-plugin/server'; +import { AlertsClientError, RuleExecutorOptions, RuleTypeState } from '@kbn/alerting-plugin/server'; import { getAlertUrl } from '@kbn/observability-plugin/common'; import { SnapshotMetricType } from '@kbn/metrics-data-access-plugin/common'; +import { ObservabilityMetricsAlert } from '@kbn/alerts-as-data-utils'; import { getOriginalActionGroup } from '../../../utils/get_original_action_group'; import { AlertStates, @@ -62,203 +63,80 @@ export type InventoryMetricThresholdRuleTypeState = RuleTypeState; // no specifi export type InventoryMetricThresholdAlertState = AlertState; // no specific state used export type InventoryMetricThresholdAlertContext = AlertContext; // no specific instance context used -export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) => - libs.metricsRules.createLifecycleRuleExecutor< - InventoryMetricThresholdParams & Record, - InventoryMetricThresholdRuleTypeState, - InventoryMetricThresholdAlertState, - InventoryMetricThresholdAlertContext, - InventoryMetricThresholdAllowedActionGroups - >( - async ({ +export type InventoryMetricThresholdAlert = Omit< + ObservabilityMetricsAlert, + 'kibana.alert.evaluation.values' +> & { + // Defining a custom type for this because the schema generation script doesn't allow explicit null values + [ALERT_EVALUATION_VALUES]?: Array; + [ALERT_EVALUATION_THRESHOLD]?: Array; +}; + +export const createInventoryMetricThresholdExecutor = + (libs: InfraBackendLibs) => + async ( + options: RuleExecutorOptions< + InventoryMetricThresholdParams & Record, + InventoryMetricThresholdRuleTypeState, + InventoryMetricThresholdAlertState, + InventoryMetricThresholdAlertContext, + InventoryMetricThresholdAllowedActionGroups, + InventoryMetricThresholdAlert + > + ) => { + const { services, params, + startedAt, executionId, spaceId, - startedAt, rule: { id: ruleId, tags: ruleTags }, getTimeRange, - }) => { - const startTime = Date.now(); + } = options; - const { criteria, filterQuery, sourceId = 'default', nodeType, alertOnNoData } = params; + const startTime = Date.now(); - if (criteria.length === 0) throw new Error('Cannot execute an alert with 0 conditions'); + const { criteria, filterQuery, sourceId = 'default', nodeType, alertOnNoData } = params; - const logger = createScopedLogger(libs.logger, 'inventoryRule', { - alertId: ruleId, - executionId, - }); + if (criteria.length === 0) throw new Error('Cannot execute an alert with 0 conditions'); - const esClient = services.scopedClusterClient.asCurrentUser; - - const { savedObjectsClient, alertFactory: baseAlertFactory, alertsClient } = services; - - if (!alertsClient) { - throw new AlertsClientError(); - } + const logger = createScopedLogger(libs.logger, 'inventoryRule', { + alertId: ruleId, + executionId, + }); - if (!params.filterQuery && params.filterQueryText) { - try { - const { fromKueryExpression } = await import('@kbn/es-query'); - fromKueryExpression(params.filterQueryText); - } catch (e) { - logger.error(e.message); - - const actionGroup = FIRED_ACTIONS.id; // Change this to an Error action group when able, - const reason = buildInvalidQueryAlertReason(params.filterQueryText); - - const { uuid, start } = alertsClient.report({ - id: UNGROUPED_FACTORY_KEY, - actionGroup, - }); - - const indexedStartedAt = start ?? startedAt.toISOString(); - - alertsClient.setAlertData({ - id: UNGROUPED_FACTORY_KEY, - payload: { - [ALERT_REASON]: reason, - [ALERT_ACTION_GROUP]: actionGroup, - }, - context: { - alertDetailsUrl: await getAlertUrl( - uuid, - spaceId, - indexedStartedAt, - libs.alertsLocator, - libs.basePath.publicBaseUrl - ), - alertState: stateToAlertMessage[AlertStates.ERROR], - group: UNGROUPED_FACTORY_KEY, - metric: mapToConditionsLookup(criteria, (c) => c.metric), - reason, - timestamp: startedAt.toISOString(), - value: null, - viewInAppUrl: getInventoryViewInAppUrlWithSpaceId({ - basePath: libs.basePath, - criteria, - nodeType, - timestamp: indexedStartedAt, - spaceId, - }), - }, - }); - - return { state: {} }; - } - } - const source = await libs.sources.getSourceConfiguration(savedObjectsClient, sourceId); - - const [, { logsShared }] = await libs.getStartServices(); - const logQueryFields: LogQueryFields | undefined = await logsShared.logViews - .getClient(savedObjectsClient, esClient) - .getResolvedLogView({ - type: 'log-view-reference', - logViewId: sourceId, - }) - .then( - ({ indices }) => ({ indexPattern: indices }), - () => undefined - ); + const esClient = services.scopedClusterClient.asCurrentUser; - const compositeSize = libs.configuration.alerting.inventory_threshold.group_by_page_size; - const { dateEnd } = getTimeRange(); - const results = await Promise.all( - criteria.map((condition) => - evaluateCondition({ - compositeSize, - condition, - esClient, - executionTimestamp: new Date(dateEnd), - filterQuery, - logger, - logQueryFields, - nodeType, - source, - }) - ) - ); + const { savedObjectsClient, alertFactory: baseAlertFactory, alertsClient } = services; - let scheduledActionsCount = 0; - const alertLimit = baseAlertFactory.alertLimit.getValue(); - let hasReachedLimit = false; - const inventoryItems = Object.keys(first(results)!); - for (const group of inventoryItems) { - if (scheduledActionsCount >= alertLimit) { - // need to set this so that warning is displayed in the UI and in the logs - hasReachedLimit = true; - break; // once limit is reached, we break out of the loop and don't schedule any more alerts - } - // AND logic; all criteria must be across the threshold - const shouldAlertFire = results.every((result) => result[group]?.shouldFire); - const shouldAlertWarn = results.every((result) => result[group]?.shouldWarn); - // AND logic; because we need to evaluate all criteria, if one of them reports no data then the - // whole alert is in a No Data/Error state - const isNoData = results.some((result) => result[group]?.isNoData); - const isError = results.some((result) => result[group]?.isError); - - const nextState = isError - ? AlertStates.ERROR - : isNoData - ? AlertStates.NO_DATA - : shouldAlertFire - ? AlertStates.ALERT - : shouldAlertWarn - ? AlertStates.WARNING - : AlertStates.OK; - let reason; - if (nextState === AlertStates.ALERT || nextState === AlertStates.WARNING) { - reason = results - .map((result) => - buildReasonWithVerboseMetricName( - group, - result[group], - buildFiredAlertReason, - nextState === AlertStates.WARNING - ) - ) - .join('\n'); - } - if (alertOnNoData) { - if (nextState === AlertStates.NO_DATA) { - reason = results - .filter((result) => result[group].isNoData) - .map((result) => - buildReasonWithVerboseMetricName(group, result[group], buildNoDataAlertReason) - ) - .join('\n'); - } else if (nextState === AlertStates.ERROR) { - reason = results - .filter((result) => result[group].isError) - .map((result) => - buildReasonWithVerboseMetricName(group, result[group], buildErrorAlertReason) - ) - .join('\n'); - } - } - if (reason) { - const actionGroup = - nextState === AlertStates.WARNING ? WARNING_ACTIONS_ID : FIRED_ACTIONS_ID; - - const additionalContext = results && results.length > 0 ? results[0][group].context : {}; - additionalContext.tags = Array.from( - new Set([...(additionalContext.tags ?? []), ...ruleTags]) - ); + if (!alertsClient) { + throw new AlertsClientError(); + } - const evaluationValues = getEvaluationValues(results, group); - const thresholds = getThresholds(criteria); + if (!params.filterQuery && params.filterQueryText) { + try { + const { fromKueryExpression } = await import('@kbn/es-query'); + fromKueryExpression(params.filterQueryText); + } catch (e) { + logger.error(e.message); - const { uuid, start } = alertsClient.report({ - id: group, - actionGroup, - }); + const actionGroup = FIRED_ACTIONS.id; // Change this to an Error action group when able, + const reason = buildInvalidQueryAlertReason(params.filterQueryText); - const indexedStartedAt = start ?? startedAt.toISOString(); + const { uuid, start } = alertsClient.report({ + id: UNGROUPED_FACTORY_KEY, + actionGroup, + }); - scheduledActionsCount++; + const indexedStartedAt = start ?? startedAt.toISOString(); - const context = { + alertsClient.setAlertData({ + id: UNGROUPED_FACTORY_KEY, + payload: { + [ALERT_REASON]: reason, + [ALERT_ACTION_GROUP]: actionGroup, + }, + context: { alertDetailsUrl: await getAlertUrl( uuid, spaceId, @@ -266,66 +144,152 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) = libs.alertsLocator, libs.basePath.publicBaseUrl ), - alertState: stateToAlertMessage[nextState], - group, - reason, + alertState: stateToAlertMessage[AlertStates.ERROR], + group: UNGROUPED_FACTORY_KEY, metric: mapToConditionsLookup(criteria, (c) => c.metric), + reason, timestamp: startedAt.toISOString(), - threshold: mapToConditionsLookup(criteria, (c) => c.threshold), - value: mapToConditionsLookup(results, (result) => - formatMetric(result[group].metric, result[group].currentValue) - ), + value: null, viewInAppUrl: getInventoryViewInAppUrlWithSpaceId({ basePath: libs.basePath, criteria, nodeType, timestamp: indexedStartedAt, spaceId, - hostName: additionalContext?.host?.name, }), - ...additionalContext, - }; + }, + }); - const payload = { - [ALERT_REASON]: reason, - [ALERT_ACTION_GROUP]: actionGroup, - [ALERT_EVALUATION_VALUES]: evaluationValues, - [ALERT_EVALUATION_THRESHOLD]: thresholds, - ...flattenAdditionalContext(additionalContext), - }; - - alertsClient.setAlertData({ - id: group, - payload, - context, - }); + return { state: {} }; + } + } + const source = await libs.sources.getSourceConfiguration(savedObjectsClient, sourceId); + + const [, { logsShared }] = await libs.getStartServices(); + const logQueryFields: LogQueryFields | undefined = await logsShared.logViews + .getClient(savedObjectsClient, esClient) + .getResolvedLogView({ + type: 'log-view-reference', + logViewId: sourceId, + }) + .then( + ({ indices }) => ({ indexPattern: indices }), + () => undefined + ); + + const compositeSize = libs.configuration.alerting.inventory_threshold.group_by_page_size; + const { dateEnd } = getTimeRange(); + const results = await Promise.all( + criteria.map((condition) => + evaluateCondition({ + compositeSize, + condition, + esClient, + executionTimestamp: new Date(dateEnd), + filterQuery, + logger, + logQueryFields, + nodeType, + source, + }) + ) + ); + + let scheduledActionsCount = 0; + const alertLimit = baseAlertFactory.alertLimit.getValue(); + let hasReachedLimit = false; + const inventoryItems = Object.keys(first(results)!); + for (const group of inventoryItems) { + if (scheduledActionsCount >= alertLimit) { + // need to set this so that warning is displayed in the UI and in the logs + hasReachedLimit = true; + break; // once limit is reached, we break out of the loop and don't schedule any more alerts + } + // AND logic; all criteria must be across the threshold + const shouldAlertFire = results.every((result) => result[group]?.shouldFire); + const shouldAlertWarn = results.every((result) => result[group]?.shouldWarn); + // AND logic; because we need to evaluate all criteria, if one of them reports no data then the + // whole alert is in a No Data/Error state + const isNoData = results.some((result) => result[group]?.isNoData); + const isError = results.some((result) => result[group]?.isError); + + const nextState = isError + ? AlertStates.ERROR + : isNoData + ? AlertStates.NO_DATA + : shouldAlertFire + ? AlertStates.ALERT + : shouldAlertWarn + ? AlertStates.WARNING + : AlertStates.OK; + let reason; + if (nextState === AlertStates.ALERT || nextState === AlertStates.WARNING) { + reason = results + .map((result) => + buildReasonWithVerboseMetricName( + group, + result[group], + buildFiredAlertReason, + nextState === AlertStates.WARNING + ) + ) + .join('\n'); + } + if (alertOnNoData) { + if (nextState === AlertStates.NO_DATA) { + reason = results + .filter((result) => result[group].isNoData) + .map((result) => + buildReasonWithVerboseMetricName(group, result[group], buildNoDataAlertReason) + ) + .join('\n'); + } else if (nextState === AlertStates.ERROR) { + reason = results + .filter((result) => result[group].isError) + .map((result) => + buildReasonWithVerboseMetricName(group, result[group], buildErrorAlertReason) + ) + .join('\n'); } } + if (reason) { + const actionGroup = + nextState === AlertStates.WARNING ? WARNING_ACTIONS_ID : FIRED_ACTIONS_ID; + + const additionalContext = results && results.length > 0 ? results[0][group].context : {}; + additionalContext.tags = Array.from( + new Set([...(additionalContext.tags ?? []), ...ruleTags]) + ); - alertsClient.setAlertLimitReached(hasReachedLimit); - const recoveredAlerts = alertsClient.getRecoveredAlerts() ?? []; + const evaluationValues = getEvaluationValues(results, group); + const thresholds = getThresholds(criteria); - for (const recoveredAlert of recoveredAlerts) { - const recoveredAlertId = recoveredAlert.alert.getId(); - const indexedStartedAt = recoveredAlert.alert.getStart() ?? startedAt.toISOString(); - const alertUuid = recoveredAlert.alert.getUuid(); - const alertHits = recoveredAlert.hit; - const additionalContext = getContextForRecoveredAlerts(alertHits); - const originalActionGroup = getOriginalActionGroup(alertHits); + const { uuid, start } = alertsClient.report({ + id: group, + actionGroup, + }); + + const indexedStartedAt = start ?? startedAt.toISOString(); - recoveredAlert.alert.setContext({ + scheduledActionsCount++; + + const context = { alertDetailsUrl: await getAlertUrl( - alertUuid, + uuid, spaceId, indexedStartedAt, libs.alertsLocator, libs.basePath.publicBaseUrl ), - alertState: stateToAlertMessage[AlertStates.OK], - group: recoveredAlertId, + alertState: stateToAlertMessage[nextState], + group, + reason, metric: mapToConditionsLookup(criteria, (c) => c.metric), - threshold: mapToConditionsLookup(criteria, (c) => c.threshold), timestamp: startedAt.toISOString(), + threshold: mapToConditionsLookup(criteria, (c) => c.threshold), + value: mapToConditionsLookup(results, (result) => + formatMetric(result[group].metric, result[group].currentValue) + ), viewInAppUrl: getInventoryViewInAppUrlWithSpaceId({ basePath: libs.basePath, criteria, @@ -334,19 +298,69 @@ export const createInventoryMetricThresholdExecutor = (libs: InfraBackendLibs) = spaceId, hostName: additionalContext?.host?.name, }), - originalAlertState: translateActionGroupToAlertState(originalActionGroup), - originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS_ID, - originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS_ID, ...additionalContext, + }; + + const payload = { + [ALERT_REASON]: reason, + [ALERT_ACTION_GROUP]: actionGroup, + [ALERT_EVALUATION_VALUES]: evaluationValues, + [ALERT_EVALUATION_THRESHOLD]: thresholds, + ...flattenAdditionalContext(additionalContext), + }; + + alertsClient.setAlertData({ + id: group, + payload, + context, }); } + } - const stopTime = Date.now(); - logger.debug(`Scheduled ${scheduledActionsCount} actions in ${stopTime - startTime}ms`); - - return { state: {} }; + alertsClient.setAlertLimitReached(hasReachedLimit); + const recoveredAlerts = alertsClient.getRecoveredAlerts() ?? []; + + for (const recoveredAlert of recoveredAlerts) { + const recoveredAlertId = recoveredAlert.alert.getId(); + const indexedStartedAt = recoveredAlert.alert.getStart() ?? startedAt.toISOString(); + const alertUuid = recoveredAlert.alert.getUuid(); + const alertHits = recoveredAlert.hit; + const additionalContext = getContextForRecoveredAlerts(alertHits); + const originalActionGroup = getOriginalActionGroup(alertHits); + + recoveredAlert.alert.setContext({ + alertDetailsUrl: await getAlertUrl( + alertUuid, + spaceId, + indexedStartedAt, + libs.alertsLocator, + libs.basePath.publicBaseUrl + ), + alertState: stateToAlertMessage[AlertStates.OK], + group: recoveredAlertId, + metric: mapToConditionsLookup(criteria, (c) => c.metric), + threshold: mapToConditionsLookup(criteria, (c) => c.threshold), + timestamp: startedAt.toISOString(), + viewInAppUrl: getInventoryViewInAppUrlWithSpaceId({ + basePath: libs.basePath, + criteria, + nodeType, + timestamp: indexedStartedAt, + spaceId, + hostName: additionalContext?.host?.name, + }), + originalAlertState: translateActionGroupToAlertState(originalActionGroup), + originalAlertStateWasALERT: originalActionGroup === FIRED_ACTIONS_ID, + originalAlertStateWasWARNING: originalActionGroup === WARNING_ACTIONS_ID, + ...additionalContext, + }); } - ); + + const stopTime = Date.now(); + logger.debug(`Scheduled ${scheduledActionsCount} actions in ${stopTime - startTime}ms`); + + return { state: {} }; + }; const formatThreshold = (metric: SnapshotMetricType, value: number | number[]) => { const metricFormatter = get(METRIC_FORMATTERS, metric, METRIC_FORMATTERS.count); diff --git a/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/register_inventory_metric_threshold_rule_type.ts b/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/register_inventory_metric_threshold_rule_type.ts index 3b5c158f7c1f5..40070909d8d73 100644 --- a/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/register_inventory_metric_threshold_rule_type.ts +++ b/x-pack/plugins/observability_solution/infra/server/lib/alerting/inventory_metric_threshold/register_inventory_metric_threshold_rule_type.ts @@ -20,7 +20,6 @@ import { SnapshotMetricType, SnapshotMetricTypeKeys, } from '@kbn/metrics-data-access-plugin/common'; -import { MetricThresholdAlert } from '../metric_threshold/metric_threshold_executor'; import type { InfraConfig } from '../../../../common/plugin_config_types'; import { Comparator, @@ -54,6 +53,7 @@ import { createInventoryMetricThresholdExecutor, FIRED_ACTIONS, FIRED_ACTIONS_ID, + InventoryMetricThresholdAlert, WARNING_ACTIONS, } from './inventory_metric_threshold_executor'; import { MetricsRulesTypeAlertDefinition } from '../register_rule_types'; @@ -170,7 +170,7 @@ export async function registerInventoryThresholdRuleType( alerts: { ...MetricsRulesTypeAlertDefinition, shouldWrite: true, - } as IRuleTypeAlerts, + } as IRuleTypeAlerts, fieldsForAAD: O11Y_AAD_FIELDS, getViewInAppRelativeUrl: ({ rule }: GetViewInAppRelativeUrlFnOpts<{}>) => observabilityPaths.ruleDetails(rule.id),