-
Notifications
You must be signed in to change notification settings - Fork 8.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ML] Improve support for script and aggregation fields in anomaly detection jobs #81923
Changes from 16 commits
75c4924
d90f1ec
b580708
77a7e92
ecc77ef
8cd0732
a930e21
571bf79
25e063d
0455ba1
4a557f5
2712c6f
f76039b
85e1773
79f22dd
739a423
dfd070a
0ced887
d96a119
a3a0809
9769d94
f54c8e8
7355559
1b5898f
20c1f67
1a3c40f
22f938c
cf5d4d1
6d50e8e
a661a27
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,11 @@ import { | |
FieldHistogramRequestConfig, | ||
FieldRequestConfig, | ||
} from '../../datavisualizer/index_based/common'; | ||
import { DataRecognizerConfigResponse, Module } from '../../../../common/types/modules'; | ||
import { | ||
DatafeedOverride, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
DataRecognizerConfigResponse, | ||
Module, | ||
} from '../../../../common/types/modules'; | ||
import { getHttp } from '../../util/dependency_cache'; | ||
|
||
export interface MlInfoResponse { | ||
|
@@ -628,13 +632,15 @@ export function mlApiServicesProvider(httpService: HttpService) { | |
}, | ||
|
||
calculateModelMemoryLimit$({ | ||
datafeedConfig, | ||
analysisConfig, | ||
indexPattern, | ||
query, | ||
timeFieldName, | ||
earliestMs, | ||
latestMs, | ||
}: { | ||
datafeedConfig: DatafeedOverride; | ||
analysisConfig: AnalysisConfig; | ||
indexPattern: string; | ||
query: any; | ||
|
@@ -643,6 +649,7 @@ export function mlApiServicesProvider(httpService: HttpService) { | |
latestMs: number; | ||
}) { | ||
const body = JSON.stringify({ | ||
datafeedConfig, | ||
analysisConfig, | ||
indexPattern, | ||
query, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,9 @@ import { ML_MEDIAN_PERCENTS } from '../../../../common/util/job_utils'; | |
import { JobId } from '../../../../common/types/anomaly_detection_jobs'; | ||
import { MlApiServices } from '../ml_api_service'; | ||
import { CriteriaField } from './index'; | ||
import type { DatafeedOverride } from '../../../../common/types/modules'; | ||
import type { Aggregation } from '../../../../common/types/anomaly_detection_jobs/datafeed'; | ||
import { findAggField } from '../../../../common/util/validation_utils'; | ||
|
||
interface ResultResponse { | ||
success: boolean; | ||
|
@@ -68,8 +71,12 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) { | |
timeFieldName: string, | ||
earliestMs: number, | ||
latestMs: number, | ||
intervalMs: number | ||
intervalMs: number, | ||
dataFeedConfig?: DatafeedOverride | ||
): Observable<MetricData> { | ||
const scriptFields: any | undefined = dataFeedConfig?.script_fields; | ||
const aggFields: Aggregation | undefined = dataFeedConfig?.aggregations; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed here d96a119 |
||
|
||
// Build the criteria to use in the bool filter part of the request. | ||
// Add criteria for the time range, entity fields, | ||
// plus any additional supplied query. | ||
|
@@ -150,15 +157,35 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) { | |
body.aggs.byTime.aggs = {}; | ||
|
||
const metricAgg: any = { | ||
[metricFunction]: { | ||
field: metricFieldName, | ||
}, | ||
[metricFunction]: {}, | ||
}; | ||
if (scriptFields !== undefined && scriptFields[metricFieldName] !== undefined) { | ||
metricAgg[metricFunction].script = scriptFields[metricFieldName].script; | ||
} else { | ||
metricAgg[metricFunction].field = metricFieldName; | ||
} | ||
|
||
if (metricFunction === 'percentiles') { | ||
metricAgg[metricFunction].percents = [ML_MEDIAN_PERCENTS]; | ||
} | ||
body.aggs.byTime.aggs.metric = metricAgg; | ||
|
||
// when the field is an aggregation field, because the field doesn't actually exist in the indices | ||
// we need to pass all the sub aggs from the original datafeed config | ||
// so that we can access the aggregated field | ||
if (typeof aggFields === 'object' && Object.keys(aggFields).length > 0) { | ||
// first item under aggregations can be any name, not necessarily 'buckets' | ||
const accessor = Object.keys(aggFields)[0]; | ||
const tempAggs = { ...(aggFields[accessor].aggs ?? aggFields[accessor].aggregations) }; | ||
const foundValue = findAggField(tempAggs, metricFieldName); | ||
|
||
if (foundValue !== undefined) { | ||
tempAggs.metric = foundValue; | ||
delete tempAggs[metricFieldName]; | ||
} | ||
body.aggs.byTime.aggs = tempAggs; | ||
} else { | ||
body.aggs.byTime.aggs.metric = metricAgg; | ||
} | ||
} | ||
|
||
return mlApiServices.esSearch$({ index, body }).pipe( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -285,7 +285,7 @@ export function resultsServiceProvider(mlApiServices) { | |
influencerFieldValues: { | ||
terms: { | ||
field: 'influencer_field_value', | ||
size: maxFieldValues, | ||
size: !!maxFieldValues ? maxFieldValues : ANOMALY_SWIM_LANE_HARD_LIMIT, | ||
order: { | ||
maxAnomalyScore: 'desc', | ||
}, | ||
|
@@ -415,7 +415,7 @@ export function resultsServiceProvider(mlApiServices) { | |
influencerFieldValues: { | ||
terms: { | ||
field: 'influencer_field_value', | ||
size: maxResults !== undefined ? maxResults : 2, | ||
size: !!maxResults ? maxResults : 2, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this change is subtle but using a falsey check like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated here 7355559 |
||
order: { | ||
maxAnomalyScore: 'desc', | ||
}, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm unsure we should do this. AFAIK this will allow for any type of string based key to be valid for this interface like
date_histoasdf
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a tricky one. Talked to Ben and apparently the users don't necessarily need to name the item as
buckets
for the datafeed config to be valid. Right now on the UI side, we don't support that. I can revert this change back for this PR, but we'll address this issue in a follow up PR.