Skip to content

Commit

Permalink
[APM] Reinstate ML multi-metric job (#117836)
Browse files Browse the repository at this point in the history
Closes #101734.

This reverts commit 008421f.

Additionally, incorporate suggested changes from #101734 (comment).
  • Loading branch information
dgieselaar authored and dmlemeshko committed Nov 29, 2021
1 parent c29c6e0 commit fc5d660
Show file tree
Hide file tree
Showing 13 changed files with 225 additions and 81 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { ApmMlDetectorIndex } from './apm_ml_detectors';

export function apmMlAnomalyQuery(detectorIndex: ApmMlDetectorIndex) {
return [
{
bool: {
filter: [
{
terms: {
result_type: ['model_plot', 'record'],
},
},
{
term: { detector_index: detectorIndex },
},
],
},
},
];
}
12 changes: 12 additions & 0 deletions x-pack/plugins/apm/common/anomaly_detection/apm_ml_detectors.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

export const enum ApmMlDetectorIndex {
txLatency = 0,
txThroughput = 1,
txFailureRate = 2,
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
*/

import { i18n } from '@kbn/i18n';
import { ANOMALY_SEVERITY } from './ml_constants';
import { ANOMALY_SEVERITY } from '../ml_constants';
import {
getSeverityType,
getSeverityColor as mlGetSeverityColor,
} from '../../ml/common';
import { ServiceHealthStatus } from './service_health_status';
} from '../../../ml/common';
import { ServiceHealthStatus } from '../service_health_status';

export interface ServiceAnomalyStats {
transactionType?: string;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@
* 2.0.
*/

import Boom from '@hapi/boom';
import { Logger } from 'kibana/server';
import uuid from 'uuid/v4';
import { snakeCase } from 'lodash';
import Boom from '@hapi/boom';
import moment from 'moment';
import uuid from 'uuid/v4';
import { ML_ERRORS } from '../../../common/anomaly_detection';
import { ProcessorEvent } from '../../../common/processor_event';
import { environmentQuery } from '../../../common/utils/environment_query';
import { Setup } from '../helpers/setup_request';
import {
TRANSACTION_DURATION,
METRICSET_NAME,
PROCESSOR_EVENT,
} from '../../../common/elasticsearch_fieldnames';
import { APM_ML_JOB_GROUP, ML_MODULE_ID_APM_TRANSACTION } from './constants';
import { ProcessorEvent } from '../../../common/processor_event';
import { environmentQuery } from '../../../common/utils/environment_query';
import { withApmSpan } from '../../utils/with_apm_span';
import { Setup } from '../helpers/setup_request';
import { APM_ML_JOB_GROUP, ML_MODULE_ID_APM_TRANSACTION } from './constants';
import { getAnomalyDetectionJobs } from './get_anomaly_detection_jobs';

export async function createAnomalyDetectionJobs(
Expand Down Expand Up @@ -92,8 +92,8 @@ async function createAnomalyDetectionJob({
query: {
bool: {
filter: [
{ term: { [PROCESSOR_EVENT]: ProcessorEvent.transaction } },
{ exists: { field: TRANSACTION_DURATION } },
{ term: { [PROCESSOR_EVENT]: ProcessorEvent.metric } },
{ term: { [METRICSET_NAME]: 'transaction' } },
...environmentQuery(environment),
],
},
Expand All @@ -105,7 +105,7 @@ async function createAnomalyDetectionJob({
job_tags: {
environment,
// identifies this as an APM ML job & facilitates future migrations
apm_ml_version: 2,
apm_ml_version: 3,
},
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import { rangeQuery } from '../../../../observability/server';
import { withApmSpan } from '../../utils/with_apm_span';
import { getMlJobsWithAPMGroup } from '../anomaly_detection/get_ml_jobs_with_apm_group';
import { Setup } from '../helpers/setup_request';
import { apmMlAnomalyQuery } from '../../../common/anomaly_detection/apm_ml_anomaly_query';
import { ApmMlDetectorIndex } from '../../../common/anomaly_detection/apm_ml_detectors';

export const DEFAULT_ANOMALIES: ServiceAnomaliesResponse = {
mlJobIds: [],
Expand Down Expand Up @@ -56,7 +58,7 @@ export async function getServiceAnomalies({
query: {
bool: {
filter: [
{ terms: { result_type: ['model_plot', 'record'] } },
...apmMlAnomalyQuery(ApmMlDetectorIndex.txLatency),
...rangeQuery(
Math.min(end - 30 * 60 * 1000, start),
end,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import { rangeQuery } from '../../../../../observability/server';
import { asMutableArray } from '../../../../common/utils/as_mutable_array';
import { withApmSpan } from '../../../utils/with_apm_span';
import { Setup } from '../../helpers/setup_request';
import { apmMlAnomalyQuery } from '../../../../common/anomaly_detection/apm_ml_anomaly_query';
import { ApmMlDetectorIndex } from '../../../../common/anomaly_detection/apm_ml_detectors';

export type ESResponse = Exclude<
PromiseReturnType<typeof anomalySeriesFetcher>,
Expand Down Expand Up @@ -40,7 +42,7 @@ export function anomalySeriesFetcher({
query: {
bool: {
filter: [
{ terms: { result_type: ['model_plot', 'record'] } },
...apmMlAnomalyQuery(ApmMlDetectorIndex.txLatency),
{ term: { partition_field_value: serviceName } },
{ term: { by_field_value: transactionType } },
...rangeQuery(start, end, 'timestamp'),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
{
"id": "apm_transaction",
"title": "APM",
"description": "Detect anomalies in transactions from your APM services.",
"description": "Detect anomalies in transaction latency, throughput and failure rate from your APM services for metric data.",
"type": "Transaction data",
"logoFile": "logo.json",
"defaultIndexPattern": "apm-*-transaction",
"defaultIndexPattern": "apm-*-metric,metrics-apm*",
"query": {
"bool": {
"filter": [
{ "term": { "processor.event": "transaction" } },
{ "exists": { "field": "transaction.duration" } }
{ "term": { "processor.event": "metric" } },
{ "term": { "metricset.name": "transaction" } }
]
}
},
"jobs": [
{
"id": "high_mean_transaction_duration",
"file": "high_mean_transaction_duration.json"
"id": "apm_tx_metrics",
"file": "apm_tx_metrics.json"
}
],
"datafeeds": [
{
"id": "datafeed-high_mean_transaction_duration",
"file": "datafeed_high_mean_transaction_duration.json",
"job_id": "high_mean_transaction_duration"
"id": "datafeed-apm_tx_metrics",
"file": "datafeed_apm_tx_metrics.json",
"job_id": "apm_tx_metrics"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"job_type": "anomaly_detector",
"groups": [
"apm"
],
"description": "Detects anomalies in transaction latency, throughput and error percentage for metric data.",
"analysis_config": {
"bucket_span": "15m",
"summary_count_field_name" : "doc_count",
"detectors" : [
{
"detector_description" : "high latency by transaction type for an APM service",
"function" : "high_mean",
"field_name" : "transaction_latency",
"by_field_name" : "transaction.type",
"partition_field_name" : "service.name"
},
{
"detector_description" : "transaction throughput for an APM service",
"function" : "mean",
"field_name" : "transaction_throughput",
"by_field_name" : "transaction.type",
"partition_field_name" : "service.name"
},
{
"detector_description" : "failed transaction rate for an APM service",
"function" : "high_mean",
"field_name" : "failed_transaction_rate",
"by_field_name" : "transaction.type",
"partition_field_name" : "service.name"
}
],
"influencers" : [
"transaction.type",
"service.name"
]
},
"analysis_limits": {
"model_memory_limit": "32mb"
},
"data_description": {
"time_field" : "@timestamp",
"time_format" : "epoch_ms"
},
"model_plot_config": {
"enabled" : true,
"annotations_enabled" : true
},
"results_index_name" : "custom-apm",
"custom_settings": {
"created_by": "ml-module-apm-transaction"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{
"job_id": "JOB_ID",
"indices": [
"INDEX_PATTERN_NAME"
],
"chunking_config" : {
"mode" : "off"
},
"query": {
"bool": {
"filter": [
{ "term": { "processor.event": "metric" } },
{ "term": { "metricset.name": "transaction" } }
]
}
},
"aggregations" : {
"buckets" : {
"composite" : {
"size" : 5000,
"sources" : [
{
"date" : {
"date_histogram" : {
"field" : "@timestamp",
"fixed_interval" : "90s"
}
}
},
{
"transaction.type" : {
"terms" : {
"field" : "transaction.type"
}
}
},
{
"service.name" : {
"terms" : {
"field" : "service.name"
}
}
}
]
},
"aggs" : {
"@timestamp" : {
"max" : {
"field" : "@timestamp"
}
},
"transaction_throughput" : {
"rate" : {
"unit" : "minute"
}
},
"transaction_latency" : {
"avg" : {
"field" : "transaction.duration.histogram"
}
},
"error_count" : {
"filter" : {
"term" : {
"event.outcome" : "failure"
}
},
"aggs" : {
"actual_error_count" : {
"value_count" : {
"field" : "event.outcome"
}
}
}
},
"success_count" : {
"filter" : {
"term" : {
"event.outcome" : "success"
}
}
},
"failed_transaction_rate" : {
"bucket_script" : {
"buckets_path" : {
"failure_count" : "error_count>_count",
"success_count" : "success_count>_count"
},
"script" : "if ((params.failure_count + params.success_count)==0){return 0;}else{return 100 * (params.failure_count/(params.failure_count + params.success_count));}"
}
}
}
}
},
"indices_options": {
"ignore_unavailable": true
}
}

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export default ({ getService }: FtrProviderContext) => {
user: USER.ML_POWERUSER,
expected: {
responseCode: 200,
moduleIds: ['apm_jsbase', 'apm_transaction', 'apm_nodejs'],
moduleIds: ['apm_jsbase', 'apm_nodejs'],
},
},
{
Expand Down
Loading

0 comments on commit fc5d660

Please sign in to comment.