From d80e3194a11c41eed0e4d1ecac0bcfef17c172dd Mon Sep 17 00:00:00 2001 From: Pete Harverson Date: Fri, 28 Jan 2022 14:25:08 +0000 Subject: [PATCH] [ML] Add comments to anomaly detection types and constants in public API (#123813) * [ML] Add comments to anomaly detection types and constants in public API * [ML] Clarify comment for job groups * [ML] More edits to summary_job docs Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> --- .../plugins/ml/common/constants/anomalies.ts | 77 +++++++ x-pack/plugins/ml/common/constants/states.ts | 9 + x-pack/plugins/ml/common/types/anomalies.ts | 211 +++++++++++++++++- .../anomaly_detection_jobs/summary_job.ts | 110 +++++++++ .../plugins/ml/common/util/anomaly_utils.ts | 124 ++++++---- 5 files changed, 486 insertions(+), 45 deletions(-) diff --git a/x-pack/plugins/ml/common/constants/anomalies.ts b/x-pack/plugins/ml/common/constants/anomalies.ts index 4decdcb2b2d28..3c031f28f4d3f 100644 --- a/x-pack/plugins/ml/common/constants/anomalies.ts +++ b/x-pack/plugins/ml/common/constants/anomalies.ts @@ -5,29 +5,106 @@ * 2.0. */ +/** + * Labels displayed in the ML UI to indicate the severity of the anomaly according + * to the normalized anomaly score. + */ export enum ANOMALY_SEVERITY { + /** + * Anomalies are displayed as critical severity when the score is greater than or equal to 75. + */ CRITICAL = 'critical', + + /** + * Anomalies are displayed as major severity when the score is greater than or equal to 50 and less than 75. + */ MAJOR = 'major', + + /** + * Anomalies are displayed as minor severity when the score is greater than or equal to 25 and less than 50. + */ MINOR = 'minor', + + /** + * Anomalies are displayed as warning severity when the score is greater than or equal to 3 and less than 25. + * Note in some parts of the UI, warning severity is used when the score is greater than or equal to 0. + */ WARNING = 'warning', + + /** + * Anomalies are displayed as low severity in some parts of the ML UI when the score is greater than or equal to 0 and less than 3. + */ LOW = 'low', + + /** + * Anomalies are displayed as unknown severity if the anomaly score is not known. + */ UNKNOWN = 'unknown', } +/** + * Anomaly score numeric thresholds to indicate the severity of the anomaly. + */ export enum ANOMALY_THRESHOLD { + /** + * Threshold at which anomalies are labelled in the UI as critical. + */ CRITICAL = 75, + + /** + * Threshold at which anomalies are labelled in the UI as major. + */ MAJOR = 50, + + /** + * Threshold at which anomalies are labelled in the UI as minor. + */ MINOR = 25, + + /** + * Threshold at which anomalies are labelled in the UI as warning. + */ WARNING = 3, + + /** + * Threshold at which anomalies are labelled in the UI as low. + */ LOW = 0, } +/** + * RGB hex codes used to indicate the severity of an anomaly according to its anomaly score. + */ export const SEVERITY_COLORS = { + /** + * Color used in the UI to indicate a critical anomaly, with a score greater than or equal to 75. + */ CRITICAL: '#fe5050', + + /** + * Color used in the UI to indicate a major anomaly, with a score greater than or equal to 50 and less than 75 . + */ MAJOR: '#fba740', + + /** + * Color used in the UI to indicate a minor anomaly, with a score greater than or equal to 25 and less than 50. + */ MINOR: '#fdec25', + + /** + * Color used in the UI to indicate a warning anomaly, with a score greater than or equal to 3 and less than 25. + * Note in some parts of the UI, warning severity is used when the score is greater than or equal to 0. + */ WARNING: '#8bc8fb', + + /** + * Color used in some parts of the UI to indicate a low severity anomaly, with a score greater than or equal to 0 and less than 3. + */ LOW: '#d2e9f7', + + /** + * Color used in the UI to indicate an anomaly for which the score is unknown. + */ BLANK: '#ffffff', }; diff --git a/x-pack/plugins/ml/common/constants/states.ts b/x-pack/plugins/ml/common/constants/states.ts index 04208fe32db80..252d38e159421 100644 --- a/x-pack/plugins/ml/common/constants/states.ts +++ b/x-pack/plugins/ml/common/constants/states.ts @@ -5,6 +5,9 @@ * 2.0. */ +/** + * The status of the datafeed. + */ export enum DATAFEED_STATE { STARTED = 'started', STARTING = 'starting', @@ -13,6 +16,9 @@ export enum DATAFEED_STATE { DELETED = 'deleted', } +/** + * The status of the anomaly detection job forecast. + */ export enum FORECAST_REQUEST_STATE { FAILED = 'failed', FINISHED = 'finished', @@ -20,6 +26,9 @@ export enum FORECAST_REQUEST_STATE { STARTED = 'started', } +/** + * The status of the anomaly detection job. + */ export enum JOB_STATE { CLOSED = 'closed', CLOSING = 'closing', diff --git a/x-pack/plugins/ml/common/types/anomalies.ts b/x-pack/plugins/ml/common/types/anomalies.ts index 3490047d8378e..31f90e0887895 100644 --- a/x-pack/plugins/ml/common/types/anomalies.ts +++ b/x-pack/plugins/ml/common/types/anomalies.ts @@ -8,37 +8,158 @@ import { PARTITION_FIELDS, ANOMALY_RESULT_TYPE } from '../constants/anomalies'; import type { KibanaUrlConfig } from './custom_urls'; +/** + * Influencers are the entities that have contributed to, or are to blame for, the anomalies. + * Influencer results are available only if an influencer_field_name is specified in the job configuration. + */ export interface Influencer { + /** + * The field name of the influencer. + */ influencer_field_name: string; + + /** + * The entities that influenced, contributed to, or were to blame for the anomaly. + */ influencer_field_values: string[]; } export type MLAnomalyDoc = AnomalyRecordDoc; +/** + * Anomaly record document. Records contain the detailed analytical results. + * They describe the anomalous activity that has been identified in the input data based on the detector configuration. + */ export interface AnomalyRecordDoc { + /** + * Index signature to cover dynamic attributes added to the record depending on the fields being analyzed. + * For example, if the job is analyzing hostname as a by field, then a field hostname is added to the result document. + */ [key: string]: any; + + /** + * The identifier for the anomaly detection job. + */ job_id: string; + + /** + * The type of the result document, which is 'record' for record level results. + */ result_type: string; + + /** + * The probability of the individual anomaly occurring, in the range 0 to 1. + * This value can be held to a high precision of over 300 decimal places, + * so the record_score is provided as a human-readable and friendly interpretation of this. + */ probability: number; + + /** + * A normalized score between 0-100, which is based on the probability of the anomalousness of this record. + * Unlike initial_record_score, this value will be updated by a re-normalization process as new data is analyzed. + */ record_score: number; + + /** + * A normalized score between 0-100, which is based on the probability of the anomalousness of this record. + * This is the initial value that was calculated at the time the bucket was processed. + */ initial_record_score: number; + + /** + * The length of the bucket in seconds. This value matches the bucket_span that is specified in the job. + */ bucket_span: number; + + /** + * A unique identifier for the detector. This identifier is based on the order of the detectors + * in the analysis configuration, starting at zero. + */ detector_index: number; + + /** + * If true, this is an interim result. In other words, the results are calculated based on partial input data. + */ is_interim: boolean; + + /** + * The start time of the bucket for which these results were calculated. + */ timestamp: number; + + /** + * The field used to segment the analysis. + * When you use this property, you have completely independent baselines for each value of this field. + */ partition_field_name?: string; + + /** + * The value of the partition field. + */ partition_field_value?: string | number; + + /** + * The function in which the anomaly occurs, as specified in the detector configuration. For example, max. + */ function: string; + + /** + * The description of the function in which the anomaly occurs, as specified in the detector configuration. + */ function_description: string; + + /** + * Certain functions require a field to operate on, for example, sum(). + * For those functions, this value is the name of the field to be analyzed. + */ + field_name?: string; + + /** + * The typical value for the bucket, according to analytical modeling. + */ typical?: number[]; + + /** + * The actual value for the bucket. + */ actual?: number[]; + + /** + * If influencers was specified in the detector configuration, this array contains influencers + * that contributed to or were to blame for an anomaly. + */ influencers?: Influencer[]; + + /** + * The field used to split the data. In particular, this property is used for analyzing the splits + * with respect to their own history. It is used for finding unusual values in the context of the split. + */ by_field_name?: string; - field_name?: string; + + /** + * The value of the by field. + */ by_field_value?: string; - multi_bucket_impact?: number; + + /** + * The field used to split the data. In particular, this property is used for analyzing + * the splits with respect to the history of all splits. + * It is used for finding unusual values in the population of all splits. + */ over_field_name?: string; + + /** + * The value of the over field. + */ over_field_value?: string; + + /** + * For population analysis, this property contains an array of anomaly records that are the causes + * for the anomaly that has been identified for the over field. If no over fields exist, this field is not present. + * This sub-resource contains the most anomalous records for the over_field_name. + * The causes resource contains similar elements to the record resource. + * Probability and scores are not applicable to causes. + */ causes?: Array<{ function: string; function_description: string; @@ -53,24 +174,107 @@ export interface AnomalyRecordDoc { partition_field_name?: string; partition_field_value?: string | number; }>; + + /** + * An indication of how strongly an anomaly is multi bucket or single bucket. + * The value is on a scale of -5.0 to +5.0 where -5.0 means the anomaly is + * purely single bucket and +5.0 means the anomaly is purely multi bucket. + */ + multi_bucket_impact?: number; } +/** + * Anomaly table record, representing the fields shown in the ML UI anomalies table. + */ export interface AnomaliesTableRecord { + /** + * The start time of the interval for which the anomaly data in the table is being aggregated. + * Anomalies in the table are commonly aggregated by day, hour, or at the bucket span of the job. + */ time: number; + + /** + * The source anomaly record document, containing the full source anomaly record fields. + */ source: AnomalyRecordDoc; + + /** + * Unique identifier for the table row. + */ rowId: string; + + /** + * Identifier for the anomaly detection job. + */ jobId: string; + + /** + * A unique identifier for the detector. + * This identifier is based on the order of the detectors in the analysis configuration, starting at zero. + */ detectorIndex: number; + + /** + * Severity of the anomaly displaying the anomaly record_score, a normalized score between 0-100, + * which is based on the probability of the anomalousness of this record. + */ severity: number; + + /** + * The entity name of the anomaly, looking first for a by_field, then over_field, + * then partition_field, returning undefined if none of these fields are present. + */ entityName?: string; + + /** + * The value of the entity field. + */ entityValue?: any; + + /** + * If influencers was specified in the detector configuration, this array contains influencers + * that contributed to or were to blame for an anomaly. + */ influencers?: Array<{ [key: string]: any }>; + + /** + * The actual value for the anomaly. + */ actual?: number[]; + + /** + * Property used by the table to sort anomalies by their actual value, + * which is a single numeric value rather than the underlying arrays. + */ actualSort?: any; + + /** + * The typical value for the anomaly. + */ typical?: number[]; + + /** + * Property used by the table to sort anomalies by their typical value, + * which is a single numeric value rather than the underlying arrays. + */ typicalSort?: any; + + /** + * Property used by the table to sort anomalies by the description of how the + * actual value compares to the typical value. + */ metricDescriptionSort?: number; + + /** + * List of custom URL drilldowns from the table row to other pages such as + * Discover, Dashboard or other web pages. + */ customUrls?: KibanaUrlConfig[]; + + /** + * Returns true if the anomaly record represented by the table row is for a time series + * which can be plotted by the ML UI in an anomaly chart. + */ isTimeSeriesViewRecord?: boolean; } @@ -95,4 +299,7 @@ export interface AnomalyCategorizerStatsDoc { export type EntityFieldType = 'partition_field' | 'over_field' | 'by_field'; +/** + * The type of the anomaly result, such as bucket, influencer or record. + */ export type AnomalyResultType = typeof ANOMALY_RESULT_TYPE[keyof typeof ANOMALY_RESULT_TYPE]; diff --git a/x-pack/plugins/ml/common/types/anomaly_detection_jobs/summary_job.ts b/x-pack/plugins/ml/common/types/anomaly_detection_jobs/summary_job.ts index 6bcba719deaee..fed0cc85c20b0 100644 --- a/x-pack/plugins/ml/common/types/anomaly_detection_jobs/summary_job.ts +++ b/x-pack/plugins/ml/common/types/anomaly_detection_jobs/summary_job.ts @@ -14,32 +14,142 @@ import type { MlJobBlocked } from './job'; export type { Datafeed } from './datafeed'; export type { DatafeedStats } from './datafeed_stats'; +/** + * A summary of an anomaly detection job. + */ export interface MlSummaryJob { + /** + * The identifier for the anomaly detection job. + */ id: string; + + /** + * A description of the job. + */ description: string; + + /** + * A list of job groups. A job can belong to no groups, one or many. + */ groups: string[]; + + /** + * The number of input documents that have been processed by the anomaly detection job. + * This value includes documents with missing fields, since they are nonetheless analyzed. + */ processed_record_count?: number; + + /** + * The status of the mathematical models, which can take the values ok, soft_limit or hard_limit. + */ memory_status?: string; + + /** + * The status of the job. + */ jobState: string; + + /** + * An array of index names used by the datafeed. Wildcards are supported. + */ datafeedIndices: string[]; + + /** + * Flag indicating whether a datafeed exists for the job. + */ hasDatafeed: boolean; + + /** + * The identifier for the datafeed. + */ datafeedId: string; + + /** + * The status of the datafeed. + */ datafeedState: string; + + /** + * The timestamp of the latest chronologically input document. + */ latestTimestampMs?: number; + + /** + * The timestamp of the earliest chronologically input document. + */ earliestTimestampMs?: number; + + /** + * The latest of the timestamp of the latest chronologically input document or the latest bucket that was processed. + */ latestResultsTimestampMs?: number; + + /** + * Used in older implementations of the job config, where the datafeed was placed inside the job for convenience. + * This will be populated if the job's id has been passed to the /api/ml/jobs/jobs_summary endpoint. + */ fullJob?: CombinedJob; + + /** + * The name of the node that runs the job. + */ nodeName?: string; + + /** + * Audit message for the job. + */ auditMessage?: Partial; + + /** + * Flag indicating whether results of the job can be viewed in the Single Metric Viewer. + */ isSingleMetricViewerJob: boolean; + + /** + * For jobs which cannot be viewed in the Single Metric Viewer, a message indicating the reason why + * results for the job cannot be viewed in the Single Metric Viewer. + */ isNotSingleMetricViewerJobMessage?: string; + + /** + * When present, it explains that a task is currently running on the job, which is stopping + * any other actions from being performed on the job. + */ blocked?: MlJobBlocked; + + /** + * Value of the latest timestamp for the job used for sorting. + */ latestTimestampSortValue?: number; + + /** + * The earlist of the timestamp of the earliest chronologically input document or the earliest bucket that was processed. + */ earliestStartTimestampMs?: number; + + /** + * Indicates whether the job is currently awaiting assignment to a node before opening. + */ awaitingNodeAssignment: boolean; + + /** + * List of anomaly detection alerting rules configured for the job. + */ alertingRules?: MlAnomalyDetectionAlertRule[]; + + /** + * List of tags that have been added to the job. + */ jobTags: Record; + + /** + * The size of the interval that the analysis is aggregated into, typically between 5m and 1h. + */ bucketSpanSeconds: number; + + /** + * Advanced configuration option. Contains custom meta data about the job. For example, it can contain custom URL information. + */ customSettings?: MlCustomSettings; } diff --git a/x-pack/plugins/ml/common/util/anomaly_utils.ts b/x-pack/plugins/ml/common/util/anomaly_utils.ts index 0bfb96efc4311..cbbec963b0c3d 100644 --- a/x-pack/plugins/ml/common/util/anomaly_utils.ts +++ b/x-pack/plugins/ml/common/util/anomaly_utils.ts @@ -124,14 +124,18 @@ export function isCategorizationAnomaly(anomaly: AnomaliesTableRecord): boolean } /** - * Return formatted severity score. + * Returns formatted severity score. + * @param score - A normalized score between 0-100, which is based on the probability of the anomalousness of this record */ export function getFormattedSeverityScore(score: number): string { return score < 1 ? '< 1' : String(parseInt(String(score), 10)); } -// Returns a severity label (one of critical, major, minor, warning or unknown) -// for the supplied normalized anomaly score (a value between 0 and 100). +/** + * Returns a severity label (one of critical, major, minor, warning or unknown) + * for the supplied normalized anomaly score (a value between 0 and 100). + * @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record + */ export function getSeverity(normalizedScore: number): SeverityType { const severityTypesList = getSeverityTypes(); @@ -148,6 +152,11 @@ export function getSeverity(normalizedScore: number): SeverityType { } } +/** + * Returns a severity type (indicating a critical, major, minor, warning or low severity anomaly) + * for the supplied normalized anomaly score (a value between 0 and 100). + * @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record + */ export function getSeverityType(normalizedScore: number): ANOMALY_SEVERITY { if (normalizedScore >= 75) { return ANOMALY_SEVERITY.CRITICAL; @@ -164,9 +173,12 @@ export function getSeverityType(normalizedScore: number): ANOMALY_SEVERITY { } } -// Returns a severity label (one of critical, major, minor, warning, low or unknown) -// for the supplied normalized anomaly score (a value between 0 and 100), where scores -// less than 3 are assigned a severity of 'low'. +/** + * Returns a severity label (one of critical, major, minor, warning, low or unknown) + * for the supplied normalized anomaly score (a value between 0 and 100), where scores + * less than 3 are assigned a severity of 'low'. + * @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record + */ export function getSeverityWithLow(normalizedScore: number): SeverityType { const severityTypesList = getSeverityTypes(); @@ -185,8 +197,11 @@ export function getSeverityWithLow(normalizedScore: number): SeverityType { } } -// Returns a severity RGB color (one of critical, major, minor, warning, low_warning or unknown) -// for the supplied normalized anomaly score (a value between 0 and 100). +/** + * Returns a severity RGB color (one of critical, major, minor, warning, low or blank) + * for the supplied normalized anomaly score (a value between 0 and 100). + * @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record + */ export function getSeverityColor(normalizedScore: number): string { if (normalizedScore >= ANOMALY_THRESHOLD.CRITICAL) { return SEVERITY_COLORS.CRITICAL; @@ -203,9 +218,12 @@ export function getSeverityColor(normalizedScore: number): string { } } -// Returns a label to use for the multi-bucket impact of an anomaly -// according to the value of the multi_bucket_impact field of a record, -// which ranges from -5 to +5. +/** + * Returns a label to use for the multi-bucket impact of an anomaly + * according to the value of the multi_bucket_impact field of a record, + * which ranges from -5 to +5. + * @param multiBucketImpact - Value of the multi_bucket_impact field of a record, from -5 to +5 + */ export function getMultiBucketImpactLabel(multiBucketImpact: number): string { if (multiBucketImpact >= MULTI_BUCKET_IMPACT.HIGH) { return i18n.translate('xpack.ml.anomalyUtils.multiBucketImpact.highLabel', { @@ -226,9 +244,12 @@ export function getMultiBucketImpactLabel(multiBucketImpact: number): string { } } -// Returns the name of the field to use as the entity name from the source record -// obtained from Elasticsearch. The function looks first for a by_field, then over_field, -// then partition_field, returning undefined if none of these fields are present. +/** + * Returns the name of the field to use as the entity name from the source record + * obtained from Elasticsearch. The function looks first for a by_field, then over_field, + * then partition_field, returning undefined if none of these fields are present. + * @param record - anomaly record result for which to obtain the entity field name. + */ export function getEntityFieldName(record: AnomalyRecordDoc): string | undefined { // Analyses with by and over fields, will have a top-level by_field_name, but // the by_field_value(s) will be in the nested causes array. @@ -245,9 +266,12 @@ export function getEntityFieldName(record: AnomalyRecordDoc): string | undefined } } -// Returns the value of the field to use as the entity value from the source record -// obtained from Elasticsearch. The function looks first for a by_field, then over_field, -// then partition_field, returning undefined if none of these fields are present. +/** + * Returns the value of the field to use as the entity value from the source record + * obtained from Elasticsearch. The function looks first for a by_field, then over_field, + * then partition_field, returning undefined if none of these fields are present. + * @param record - anomaly record result for which to obtain the entity field value. + */ export function getEntityFieldValue(record: AnomalyRecordDoc): string | number | undefined { if (record.by_field_value !== undefined) { return record.by_field_value; @@ -262,8 +286,11 @@ export function getEntityFieldValue(record: AnomalyRecordDoc): string | number | } } -// Returns the list of partitioning entity fields for the source record as a list -// of objects in the form { fieldName: airline, fieldValue: AAL, fieldType: partition } +/** + * Returns the list of partitioning entity fields for the source record as a list + * of objects in the form { fieldName: airline, fieldValue: AAL, fieldType: partition } + * @param record - anomaly record result for which to obtain the entity field list. + */ export function getEntityFieldList(record: AnomalyRecordDoc): EntityField[] { const entityFields: EntityField[] = []; if (record.partition_field_name !== undefined) { @@ -296,21 +323,30 @@ export function getEntityFieldList(record: AnomalyRecordDoc): EntityField[] { return entityFields; } -// Returns whether actual values should be displayed for a record with the specified function description. -// Note that the 'function' field in a record contains what the user entered e.g. 'high_count', -// whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'. +/** + * Returns whether actual values should be displayed for a record with the specified function description. + * Note that the 'function' field in a record contains what the user entered e.g. 'high_count', + * whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'. + * @param functionDescription - function_description value for the anomaly record + */ export function showActualForFunction(functionDescription: string): boolean { return DISPLAY_ACTUAL_FUNCTIONS.indexOf(functionDescription) > -1; } -// Returns whether typical values should be displayed for a record with the specified function description. -// Note that the 'function' field in a record contains what the user entered e.g. 'high_count', -// whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'. +/** + * Returns whether typical values should be displayed for a record with the specified function description. + * Note that the 'function' field in a record contains what the user entered e.g. 'high_count', + * whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'. + * @param functionDescription - function_description value for the anomaly record + */ export function showTypicalForFunction(functionDescription: string): boolean { return DISPLAY_TYPICAL_FUNCTIONS.indexOf(functionDescription) > -1; } -// Returns whether a rule can be configured against the specified anomaly. +/** + * Returns whether a rule can be configured against the specified anomaly. + * @param record - anomaly record result + */ export function isRuleSupported(record: AnomalyRecordDoc): boolean { // A rule can be configured with a numeric condition if the function supports it, // and/or with scope if there is a partitioning fields. @@ -320,23 +356,25 @@ export function isRuleSupported(record: AnomalyRecordDoc): boolean { ); } -// Two functions for converting aggregation type names. -// ML and ES use different names for the same function. -// Possible values for ML aggregation type are (defined in lib/model/CAnomalyDetector.cc): -// count -// distinct_count -// rare -// info_content -// mean -// median -// min -// max -// varp -// sum -// lat_long -// time -// The input to toES and the output from toML correspond to the value of the -// function_description field of anomaly records. +/** + * Two functions for converting aggregation type names. + * ML and ES use different names for the same function. + * Possible values for ML aggregation type are (defined in lib/model/CAnomalyDetector.cc): + * count + * distinct_count + * rare + * info_content + * mean + * median + * min + * max + * varp + * sum + * lat_long + * time + * The input to toES and the output from toML correspond to the value of the + * function_description field of anomaly records. + */ export const aggregationTypeTransform = { toES(oldAggType: string): string { let newAggType = oldAggType;