From f5fb4b62ae88024198ca1f215e2081648dc841bd Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Wed, 25 Aug 2021 14:14:47 -0400 Subject: [PATCH] [ML] Telemetry for the Anomaly detection jobs health rule type (#110052) (#110085) * [ML] add mappings for the new rule type * [ML] add telemetry for enabled health checks * [ML] update xpack_plugins.json Co-authored-by: Dima Arnautov --- .../server/usage/alerts_usage_collector.ts | 1 + x-pack/plugins/ml/server/usage/collector.ts | 96 +++++++++++++++++++ .../schema/xpack_plugins.json | 38 ++++++++ 3 files changed, 135 insertions(+) diff --git a/x-pack/plugins/alerting/server/usage/alerts_usage_collector.ts b/x-pack/plugins/alerting/server/usage/alerts_usage_collector.ts index 59aeb4854d9f0..67687045f1b50 100644 --- a/x-pack/plugins/alerting/server/usage/alerts_usage_collector.ts +++ b/x-pack/plugins/alerting/server/usage/alerts_usage_collector.ts @@ -46,6 +46,7 @@ const byTypeSchema: MakeSchemaFrom['count_by_type'] = { '__geo-containment': { type: 'long' }, // ML xpack_ml_anomaly_detection_alert: { type: 'long' }, + xpack_ml_anomaly_detection_jobs_health: { type: 'long' }, }; export function createAlertsUsageCollector( diff --git a/x-pack/plugins/ml/server/usage/collector.ts b/x-pack/plugins/ml/server/usage/collector.ts index 91fa72e3a04cc..ca865a8f48770 100644 --- a/x-pack/plugins/ml/server/usage/collector.ts +++ b/x-pack/plugins/ml/server/usage/collector.ts @@ -8,6 +8,8 @@ import type { UsageCollectionSetup } from '../../../../../src/plugins/usage_collection/server'; import { ML_ALERT_TYPES } from '../../common/constants/alerts'; import { AnomalyResultType } from '../../common/types/anomalies'; +import { MlAnomalyDetectionJobsHealthRuleParams } from '../../common/types/alerts'; +import { getResultJobsHealthRuleConfig } from '../../common/util/alerts'; export interface MlUsageData { alertRules: { @@ -18,6 +20,14 @@ export interface MlUsageData { influencer: number; }; }; + 'xpack.ml.anomaly_detection_jobs_health': { + count_by_check_type: { + datafeed: number; + mml: number; + delayedData: number; + errorMessages: number; + }; + }; }; } @@ -42,6 +52,38 @@ export function registerCollector(usageCollection: UsageCollectionSetup, kibanaI }, }, }, + 'xpack.ml.anomaly_detection_jobs_health': { + count_by_check_type: { + datafeed: { + type: 'long', + _meta: { + description: + 'total number of alerting rules performing the not started datafeed health check', + }, + }, + mml: { + type: 'long', + _meta: { + description: + 'total number of alerting rules performing the model memory limit health check', + }, + }, + delayedData: { + type: 'long', + _meta: { + description: + 'total number of alerting rules performing the delayed data health check', + }, + }, + errorMessages: { + type: 'long', + _meta: { + description: + 'total number of alerting rules performing the error messages health check', + }, + }, + }, + }, }, }, isReady: () => !!kibanaIndex, @@ -86,11 +128,65 @@ export function registerCollector(usageCollection: UsageCollectionSetup, kibanaI return acc; }, {} as MlUsageData['alertRules'][typeof ML_ALERT_TYPES.ANOMALY_DETECTION]['count_by_result_type']); + const jobsHealthRuleInstances = await esClient.search<{ + alert: { + params: MlAnomalyDetectionJobsHealthRuleParams; + }; + }>({ + index: kibanaIndex, + size: 10000, + body: { + query: { + bool: { + filter: [ + { term: { type: 'alert' } }, + { + term: { + 'alert.alertTypeId': ML_ALERT_TYPES.AD_JOBS_HEALTH, + }, + }, + ], + }, + }, + }, + }); + + const resultsByCheckType = jobsHealthRuleInstances.body.hits.hits.reduce( + (acc, curr) => { + const doc = curr._source; + if (!doc) return acc; + + const { + alert: { + params: { testsConfig }, + }, + } = doc; + + const resultConfig = getResultJobsHealthRuleConfig(testsConfig); + + acc.datafeed += resultConfig.datafeed.enabled ? 1 : 0; + acc.mml += resultConfig.mml.enabled ? 1 : 0; + acc.delayedData += resultConfig.delayedData.enabled ? 1 : 0; + acc.errorMessages += resultConfig.errorMessages.enabled ? 1 : 0; + + return acc; + }, + { + datafeed: 0, + mml: 0, + delayedData: 0, + errorMessages: 0, + } + ); + return { alertRules: { [ML_ALERT_TYPES.ANOMALY_DETECTION]: { count_by_result_type: countByResultType, }, + [ML_ALERT_TYPES.AD_JOBS_HEALTH]: { + count_by_check_type: resultsByCheckType, + }, }, }; }, diff --git a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json index bbe0ad8014ae7..6f50ed1c24e62 100644 --- a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json +++ b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json @@ -228,6 +228,9 @@ }, "xpack_ml_anomaly_detection_alert": { "type": "long" + }, + "xpack_ml_anomaly_detection_jobs_health": { + "type": "long" } } }, @@ -307,6 +310,9 @@ }, "xpack_ml_anomaly_detection_alert": { "type": "long" + }, + "xpack_ml_anomaly_detection_jobs_health": { + "type": "long" } } } @@ -3811,6 +3817,38 @@ } } } + }, + "xpack.ml.anomaly_detection_jobs_health": { + "properties": { + "count_by_check_type": { + "properties": { + "datafeed": { + "type": "long", + "_meta": { + "description": "total number of alerting rules performing the not started datafeed health check" + } + }, + "mml": { + "type": "long", + "_meta": { + "description": "total number of alerting rules performing the model memory limit health check" + } + }, + "delayedData": { + "type": "long", + "_meta": { + "description": "total number of alerting rules performing the delayed data health check" + } + }, + "errorMessages": { + "type": "long", + "_meta": { + "description": "total number of alerting rules performing the error messages health check" + } + } + } + } + } } } }