Skip to content

Commit

Permalink
[ML] Improve support for script and aggregation fields in anomaly det…
Browse files Browse the repository at this point in the history
…ection jobs (#81923)

Co-authored-by: Kibana Machine <[email protected]>
  • Loading branch information
qn895 and kibanamachine authored Nov 17, 2020
1 parent 46d587a commit 55119c2
Show file tree
Hide file tree
Showing 29 changed files with 345 additions and 62 deletions.
10 changes: 10 additions & 0 deletions x-pack/plugins/ml/common/constants/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,16 @@ export const getMessages = once(() => {
url:
'https://www.elastic.co/guide/en/elasticsearch/reference/{{version}}/ml-job-resource.html#ml-job-resource',
},
missing_summary_count_field_name: {
status: VALIDATION_STATUS.ERROR,
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.missingSummaryCountFieldNameMessage',
{
defaultMessage:
'A job configured with a datafeed with aggregations must set summary_count_field_name; use doc_count or suitable alternative.',
}
),
},
skipped_extended_tests: {
status: VALIDATION_STATUS.WARNING,
text: i18n.translate('xpack.ml.models.jobValidation.messages.skippedExtendedTestsMessage', {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export interface Datafeed {
job_id: JobId;
query: object;
query_delay?: string;
script_fields?: object;
script_fields?: Record<string, any>;
scroll_size?: number;
delayed_data_check_config?: object;
indices_options?: IndicesOptions;
Expand All @@ -30,16 +30,17 @@ export interface ChunkingConfig {
time_span?: string;
}

interface Aggregation {
buckets: {
export type Aggregation = Record<
string,
{
date_histogram: {
field: string;
fixed_interval: string;
};
aggregations?: { [key: string]: any };
aggs?: { [key: string]: any };
};
}
}
>;

interface IndicesOptions {
expand_wildcards?: 'all' | 'open' | 'closed' | 'hidden' | 'none';
Expand Down
13 changes: 13 additions & 0 deletions x-pack/plugins/ml/common/types/fields.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,16 @@ export const mlCategory: Field = {
type: ES_FIELD_TYPES.KEYWORD,
aggregatable: false,
};

export interface FieldAggCardinality {
field: string;
percent?: any;
}

export interface ScriptAggCardinality {
script: any;
}

export interface AggCardinality {
cardinality: FieldAggCardinality | ScriptAggCardinality;
}
22 changes: 22 additions & 0 deletions x-pack/plugins/ml/common/util/datafeed_utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import { Aggregation, Datafeed } from '../types/anomaly_detection_jobs';

export const getDatafeedAggregations = (
datafeedConfig: Partial<Datafeed> | undefined
): Aggregation | undefined => {
if (datafeedConfig?.aggregations !== undefined) return datafeedConfig.aggregations;
if (datafeedConfig?.aggs !== undefined) return datafeedConfig.aggs;
return undefined;
};

export const getAggregationBucketsName = (aggregations: any): string | undefined => {
if (typeof aggregations === 'object') {
const keys = Object.keys(aggregations);
return keys.length > 0 ? keys[0] : undefined;
}
};
6 changes: 3 additions & 3 deletions x-pack/plugins/ml/common/util/job_utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,8 @@ describe('ML - job utils', () => {
expect(isTimeSeriesViewDetector(job, 3)).toBe(false);
});

test('returns false for a detector using a script field as a metric field_name', () => {
expect(isTimeSeriesViewDetector(job, 4)).toBe(false);
test('returns true for a detector using a script field as a metric field_name', () => {
expect(isTimeSeriesViewDetector(job, 4)).toBe(true);
});
});

Expand Down Expand Up @@ -281,6 +281,7 @@ describe('ML - job utils', () => {
expect(isSourceDataChartableForDetector(job, 22)).toBe(true);
expect(isSourceDataChartableForDetector(job, 23)).toBe(true);
expect(isSourceDataChartableForDetector(job, 24)).toBe(true);
expect(isSourceDataChartableForDetector(job, 37)).toBe(true);
});

test('returns false for expected detectors', () => {
Expand All @@ -296,7 +297,6 @@ describe('ML - job utils', () => {
expect(isSourceDataChartableForDetector(job, 34)).toBe(false);
expect(isSourceDataChartableForDetector(job, 35)).toBe(false);
expect(isSourceDataChartableForDetector(job, 36)).toBe(false);
expect(isSourceDataChartableForDetector(job, 37)).toBe(false);
});
});

Expand Down
23 changes: 22 additions & 1 deletion x-pack/plugins/ml/common/util/job_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { MlServerLimits } from '../types/ml_server_info';
import { JobValidationMessage, JobValidationMessageId } from '../constants/messages';
import { ES_AGGREGATION, ML_JOB_AGGREGATION } from '../constants/aggregation_types';
import { MLCATEGORY } from '../constants/field_types';
import { getDatafeedAggregations } from './datafeed_utils';

export interface ValidationResults {
valid: boolean;
Expand Down Expand Up @@ -94,7 +95,6 @@ export function isSourceDataChartableForDetector(job: CombinedJob, detectorIndex
// Perform extra check to see if the detector is using a scripted field.
const scriptFields = Object.keys(job.datafeed_config.script_fields);
isSourceDataChartable =
scriptFields.indexOf(dtr.field_name!) === -1 &&
scriptFields.indexOf(dtr.partition_field_name!) === -1 &&
scriptFields.indexOf(dtr.by_field_name!) === -1 &&
scriptFields.indexOf(dtr.over_field_name!) === -1;
Expand Down Expand Up @@ -559,6 +559,27 @@ export function basicDatafeedValidation(datafeed: Datafeed): ValidationResults {
};
}

export function basicJobAndDatafeedValidation(job: Job, datafeed: Datafeed): ValidationResults {
const messages: ValidationResults['messages'] = [];
let valid = true;

if (datafeed && job) {
const datafeedAggregations = getDatafeedAggregations(datafeed);

if (datafeedAggregations !== undefined && !job.analysis_config?.summary_count_field_name) {
valid = false;
messages.push({ id: 'missing_summary_count_field_name' });
}
}

return {
messages,
valid,
contains: (id) => messages.some((m) => id === m.id),
find: (id) => messages.find((m) => id === m.id),
};
}

export function validateModelMemoryLimit(job: Job, limits: MlServerLimits): ValidationResults {
const messages: ValidationResults['messages'] = [];
let valid = true;
Expand Down
19 changes: 19 additions & 0 deletions x-pack/plugins/ml/common/util/validation_utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,22 @@ export function isValidJson(json: string) {
return false;
}
}

export function findAggField(aggs: Record<string, any>, fieldName: string): any {
let value;
Object.keys(aggs).some(function (k) {
if (k === fieldName) {
value = aggs[k];
return true;
}
if (aggs.hasOwnProperty(k) && typeof aggs[k] === 'object') {
value = findAggField(aggs[k], fieldName);
return value !== undefined;
}
});
return value;
}

export function isValidAggregationField(aggs: Record<string, any>, fieldName: string): boolean {
return findAggField(aggs, fieldName) !== undefined;
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ export const anomalyDataChange = function (
config.timeField,
range.min,
range.max,
bucketSpanSeconds * 1000
bucketSpanSeconds * 1000,
config.datafeedConfig
)
.toPromise();
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ export const useModelMemoryEstimator = (
// Update model memory estimation payload on the job creator updates
useEffect(() => {
modelMemoryEstimator.update({
datafeedConfig: jobCreator.datafeedConfig,
analysisConfig: jobCreator.jobConfig.analysis_config,
indexPattern: jobCreator.indexPatternTitle,
query: jobCreator.datafeedConfig.query,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { map, startWith, tap } from 'rxjs/operators';
import {
basicJobValidation,
basicDatafeedValidation,
basicJobAndDatafeedValidation,
} from '../../../../../../common/util/job_utils';
import { getNewJobLimits } from '../../../../services/ml_server_info';
import { JobCreator, JobCreatorType, isCategorizationJobCreator } from '../job_creator';
Expand Down Expand Up @@ -53,6 +54,7 @@ export interface BasicValidations {
scrollSize: Validation;
categorizerMissingPerPartition: Validation;
categorizerVaryingPerPartitionField: Validation;
summaryCountField: Validation;
}

export interface AdvancedValidations {
Expand Down Expand Up @@ -80,6 +82,7 @@ export class JobValidator {
scrollSize: { valid: true },
categorizerMissingPerPartition: { valid: true },
categorizerVaryingPerPartitionField: { valid: true },
summaryCountField: { valid: true },
};
private _advancedValidations: AdvancedValidations = {
categorizationFieldValid: { valid: true },
Expand Down Expand Up @@ -197,6 +200,14 @@ export class JobValidator {
datafeedConfig
);

const basicJobAndDatafeedResults = basicJobAndDatafeedValidation(jobConfig, datafeedConfig);
populateValidationMessages(
basicJobAndDatafeedResults,
this._basicValidations,
jobConfig,
datafeedConfig
);

// run addition job and group id validation
const idResults = checkForExistingJobAndGroupIds(
this._jobCreator.jobId,
Expand Down Expand Up @@ -228,6 +239,9 @@ export class JobValidator {
public get bucketSpan(): Validation {
return this._basicValidations.bucketSpan;
}
public get summaryCountField(): Validation {
return this._basicValidations.summaryCountField;
}

public get duplicateDetectors(): Validation {
return this._basicValidations.duplicateDetectors;
Expand Down Expand Up @@ -297,6 +311,7 @@ export class JobValidator {
this.duplicateDetectors.valid &&
this.categorizerMissingPerPartition.valid &&
this.categorizerVaryingPerPartitionField.valid &&
this.summaryCountField.valid &&
!this.validating &&
(this._jobCreator.type !== JOB_TYPE.CATEGORIZATION ||
(this._jobCreator.type === JOB_TYPE.CATEGORIZATION && this.categorizationField))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ export function populateValidationMessages(
basicValidations.frequency.valid = false;
basicValidations.frequency.message = invalidTimeIntervalMessage(datafeedConfig.frequency);
}
if (validationResults.contains('missing_summary_count_field_name')) {
basicValidations.summaryCountField.valid = false;
basicValidations.summaryCountField.message = i18n.translate(
'xpack.ml.newJob.wizard.validateJob.summaryCountFieldMissing',
{
defaultMessage: 'Required field as the datafeed uses aggregations.',
}
);
}
}

export function checkForExistingJobAndGroupIds(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,12 @@ export const DatafeedPreview: FC<{
if (combinedJob.datafeed_config && combinedJob.datafeed_config.indices.length) {
try {
const resp = await mlJobService.searchPreview(combinedJob);
const data = resp.aggregations
? resp.aggregations.buckets.buckets.slice(0, ML_DATA_PREVIEW_COUNT)
: resp.hits.hits;
let data = resp.hits.hits;
// the first item under aggregations can be any name
if (typeof resp.aggregations === 'object' && Object.keys(resp.aggregations).length > 0) {
const accessor = Object.keys(resp.aggregations)[0];
data = resp.aggregations[accessor].buckets.slice(0, ML_DATA_PREVIEW_COUNT);
}

setPreviewJsonString(JSON.stringify(data, null, 2));
} catch (error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,44 @@
import React, { memo, FC } from 'react';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n/react';
import { EuiDescribedFormGroup, EuiFormRow } from '@elastic/eui';
import { EuiDescribedFormGroup, EuiFormRow, EuiLink } from '@elastic/eui';
import { Validation } from '../../../../../common/job_validator';
import { useMlKibana } from '../../../../../../../contexts/kibana';

export const Description: FC = memo(({ children }) => {
interface Props {
validation: Validation;
}

export const Description: FC<Props> = memo(({ children, validation }) => {
const title = i18n.translate('xpack.ml.newJob.wizard.pickFieldsStep.summaryCountField.title', {
defaultMessage: 'Summary count field',
});
const {
services: { docLinks },
} = useMlKibana();
const { ELASTIC_WEBSITE_URL, DOC_LINK_VERSION } = docLinks;
const docsUrl = `${ELASTIC_WEBSITE_URL}guide/en/machine-learning/${DOC_LINK_VERSION}/ml-configuring-aggregation.html`;
return (
<EuiDescribedFormGroup
title={<h3>{title}</h3>}
description={
<FormattedMessage
id="xpack.ml.newJob.wizard.pickFieldsStep.summaryCountField.description"
defaultMessage="Optional, for use if input data has been pre-summarized e.g. \{docCountParam\}."
defaultMessage="If the input data is {aggregated}, specify the field that contains the document count."
values={{
aggregated: (
<EuiLink href={docsUrl} target="_blank">
<FormattedMessage
id="xpack.ml.newJob.wizard.pickFieldsStep.summaryCountField.aggregatedText"
defaultMessage="aggregated"
/>
</EuiLink>
),
}}
/>
}
>
<EuiFormRow label={title}>
<EuiFormRow label={title} error={validation.message} isInvalid={validation.valid === false}>
<>{children}</>
</EuiFormRow>
</EuiDescribedFormGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,23 @@ import {
import { Description } from './description';

export const SummaryCountField: FC = () => {
const { jobCreator: jc, jobCreatorUpdate, jobCreatorUpdated } = useContext(JobCreatorContext);
const {
jobCreator: jc,
jobCreatorUpdate,
jobCreatorUpdated,
jobValidator,
jobValidatorUpdated,
} = useContext(JobCreatorContext);

const jobCreator = jc as MultiMetricJobCreator | PopulationJobCreator | AdvancedJobCreator;
const { fields } = newJobCapsService;
const [summaryCountFieldName, setSummaryCountFieldName] = useState(
jobCreator.summaryCountFieldName
);
const [validation, setValidation] = useState(jobValidator.summaryCountField);
useEffect(() => {
setValidation(jobValidator.summaryCountField);
}, [jobValidatorUpdated]);

useEffect(() => {
jobCreator.summaryCountFieldName = summaryCountFieldName;
Expand All @@ -35,7 +45,7 @@ export const SummaryCountField: FC = () => {
}, [jobCreatorUpdated]);

return (
<Description>
<Description validation={validation}>
<SummaryCountFieldSelect
fields={fields}
changeHandler={setSummaryCountFieldName}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -628,13 +628,15 @@ export function mlApiServicesProvider(httpService: HttpService) {
},

calculateModelMemoryLimit$({
datafeedConfig,
analysisConfig,
indexPattern,
query,
timeFieldName,
earliestMs,
latestMs,
}: {
datafeedConfig?: Datafeed;
analysisConfig: AnalysisConfig;
indexPattern: string;
query: any;
Expand All @@ -643,6 +645,7 @@ export function mlApiServicesProvider(httpService: HttpService) {
latestMs: number;
}) {
const body = JSON.stringify({
datafeedConfig,
analysisConfig,
indexPattern,
query,
Expand Down
Loading

0 comments on commit 55119c2

Please sign in to comment.