From 625b93a22fa2f667b99abe837d5db49e55f55505 Mon Sep 17 00:00:00 2001 From: Dima Arnautov Date: Wed, 5 Feb 2020 16:14:05 +0100 Subject: [PATCH] [ML] New Platform server shim: update data visualizer routes to use new platform router (#56739) * [ML] data_visualizer TS refactor, NP router * [ML] fix schema, add apiDoc * [ML] update apiDoc order * [ML] validate_cardinality with NP router * [ML] use mlClient * [ML] remove redundant code * [ML] support legacy callWithRequest for job validation * [ML] fix schema validation --- .../plugins/ml/common/util/job_utils.d.ts | 2 + .../lib/{query_utils.js => query_utils.ts} | 11 +- ...{data_visualizer.js => data_visualizer.ts} | 441 +++++++++++------- .../data_visualizer/{index.js => index.ts} | 0 .../new_platform/data_visualizer_schema.ts | 38 ++ .../plugins/ml/server/routes/apidoc.json | 5 +- .../ml/server/routes/data_visualizer.js | 110 ----- .../ml/server/routes/data_visualizer.ts | 173 +++++++ 8 files changed, 509 insertions(+), 271 deletions(-) rename x-pack/legacy/plugins/ml/server/lib/{query_utils.js => query_utils.ts} (86%) rename x-pack/legacy/plugins/ml/server/models/data_visualizer/{data_visualizer.js => data_visualizer.ts} (75%) rename x-pack/legacy/plugins/ml/server/models/data_visualizer/{index.js => index.ts} (100%) create mode 100644 x-pack/legacy/plugins/ml/server/new_platform/data_visualizer_schema.ts delete mode 100644 x-pack/legacy/plugins/ml/server/routes/data_visualizer.js create mode 100644 x-pack/legacy/plugins/ml/server/routes/data_visualizer.ts diff --git a/x-pack/legacy/plugins/ml/common/util/job_utils.d.ts b/x-pack/legacy/plugins/ml/common/util/job_utils.d.ts index df62d19b6d27b..cfff15bb97be2 100644 --- a/x-pack/legacy/plugins/ml/common/util/job_utils.d.ts +++ b/x-pack/legacy/plugins/ml/common/util/job_utils.d.ts @@ -43,3 +43,5 @@ export function processCreatedBy(customSettings: { created_by?: string }): void; export function mlFunctionToESAggregation(functionName: string): string | null; export function isModelPlotEnabled(job: Job, detectorIndex: number, entityFields: any[]): boolean; + +export function getSafeAggregationName(fieldName: string, index: number): string; diff --git a/x-pack/legacy/plugins/ml/server/lib/query_utils.js b/x-pack/legacy/plugins/ml/server/lib/query_utils.ts similarity index 86% rename from x-pack/legacy/plugins/ml/server/lib/query_utils.js rename to x-pack/legacy/plugins/ml/server/lib/query_utils.ts index 806ce324d26d1..5eff726d6c5c8 100644 --- a/x-pack/legacy/plugins/ml/server/lib/query_utils.js +++ b/x-pack/legacy/plugins/ml/server/lib/query_utils.ts @@ -10,7 +10,12 @@ // Builds the base filter criteria used in queries, // adding criteria for the time range and an optional query. -export function buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query) { +export function buildBaseFilterCriteria( + timeFieldName?: string, + earliestMs?: number, + latestMs?: number, + query?: object +) { const filterCriteria = []; if (timeFieldName && earliestMs && latestMs) { filterCriteria.push({ @@ -34,7 +39,7 @@ export function buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, que // Wraps the supplied aggregations in a sampler aggregation. // A supplied samplerShardSize (the shard_size parameter of the sampler aggregation) // of less than 1 indicates no sampling, and the aggs are returned as-is. -export function buildSamplerAggregation(aggs, samplerShardSize) { +export function buildSamplerAggregation(aggs: object, samplerShardSize: number) { if (samplerShardSize < 1) { return aggs; } @@ -53,6 +58,6 @@ export function buildSamplerAggregation(aggs, samplerShardSize) { // depending on whether sampling is being used. // A supplied samplerShardSize (the shard_size parameter of the sampler aggregation) // of less than 1 indicates no sampling, and an empty array is returned. -export function getSamplerAggregationsResponsePath(samplerShardSize) { +export function getSamplerAggregationsResponsePath(samplerShardSize: number): string[] { return samplerShardSize > 0 ? ['sample'] : []; } diff --git a/x-pack/legacy/plugins/ml/server/models/data_visualizer/data_visualizer.js b/x-pack/legacy/plugins/ml/server/models/data_visualizer/data_visualizer.ts similarity index 75% rename from x-pack/legacy/plugins/ml/server/models/data_visualizer/data_visualizer.js rename to x-pack/legacy/plugins/ml/server/models/data_visualizer/data_visualizer.ts index f4ee032ee2dbb..6186a61c5075f 100644 --- a/x-pack/legacy/plugins/ml/server/models/data_visualizer/data_visualizer.js +++ b/x-pack/legacy/plugins/ml/server/models/data_visualizer/data_visualizer.ts @@ -4,23 +4,118 @@ * you may not use this file except in compliance with the Elastic License. */ +import { CallAPIOptions, RequestHandlerContext } from 'kibana/server'; import _ from 'lodash'; +import { ML_JOB_FIELD_TYPES } from '../../../common/constants/field_types'; +import { getSafeAggregationName } from '../../../common/util/job_utils'; import { buildBaseFilterCriteria, buildSamplerAggregation, getSamplerAggregationsResponsePath, } from '../../lib/query_utils'; -import { ML_JOB_FIELD_TYPES } from '../../../common/constants/field_types'; -import { getSafeAggregationName } from '../../../common/util/job_utils'; const SAMPLER_TOP_TERMS_THRESHOLD = 100000; const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000; const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200; const FIELDS_REQUEST_BATCH_SIZE = 10; +interface FieldData { + fieldName: string; + existsInDocs: boolean; + stats?: { + sampleCount?: number; + count?: number; + cardinality?: number; + }; +} + +export interface Field { + fieldName: string; + type: string; + cardinality: number; +} + +interface Distribution { + percentiles: any[]; + minPercentile: number; + maxPercentile: number; +} + +interface Aggs { + [key: string]: any; +} + +interface Bucket { + doc_count: number; +} + +interface NumericFieldStats { + fieldName: string; + count: number; + min: number; + max: number; + avg: number; + isTopValuesSampled: boolean; + topValues: Bucket[]; + topValuesSampleSize: number; + topValuesSamplerShardSize: number; + median?: number; + distribution?: Distribution; +} + +interface StringFieldStats { + fieldName: string; + isTopValuesSampled: boolean; + topValues: Bucket[]; + topValuesSampleSize: number; + topValuesSamplerShardSize: number; +} + +interface DateFieldStats { + fieldName: string; + count: number; + earliest: number; + latest: number; +} + +interface BooleanFieldStats { + fieldName: string; + count: number; + trueCount: number; + falseCount: number; + [key: string]: number | string; +} + +interface DocumentCountStats { + documentCounts: { + interval: number; + buckets: { [key: string]: number }; + }; +} + +interface FieldExamples { + fieldName: string; + examples: any[]; +} + +type BatchStats = + | NumericFieldStats + | StringFieldStats + | BooleanFieldStats + | DateFieldStats + | DocumentCountStats + | FieldExamples; + export class DataVisualizer { - constructor(callWithRequest) { - this.callWithRequest = callWithRequest; + callAsCurrentUser: ( + endpoint: string, + clientParams: Record, + options?: CallAPIOptions + ) => Promise; + + constructor(client: RequestHandlerContext | (() => any)) { + this.callAsCurrentUser = + typeof client === 'object' ? client.ml!.mlClient.callAsCurrentUser : client; } // Obtains overall stats on the fields in the supplied index pattern, returning an object @@ -28,28 +123,28 @@ export class DataVisualizer { // aggregatable and non-aggregatable fields do or do not exist in documents. // Sampling will be used if supplied samplerShardSize > 0. async getOverallStats( - indexPatternTitle, - query, - aggregatableFields, - nonAggregatableFields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs + indexPatternTitle: string, + query: object, + aggregatableFields: string[], + nonAggregatableFields: string[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number ) { const stats = { totalCount: 0, - aggregatableExistsFields: [], - aggregatableNotExistsFields: [], - nonAggregatableExistsFields: [], - nonAggregatableNotExistsFields: [], + aggregatableExistsFields: [] as FieldData[], + aggregatableNotExistsFields: [] as FieldData[], + nonAggregatableExistsFields: [] as FieldData[], + nonAggregatableNotExistsFields: [] as FieldData[], }; // To avoid checking for the existence of too many aggregatable fields in one request, // split the check into multiple batches (max 200 fields per request). - const batches = [[]]; + const batches: string[][] = [[]]; _.each(aggregatableFields, field => { - let lastArray = _.last(batches); + let lastArray: string[] = _.last(batches); if (lastArray.length === AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE) { lastArray = []; batches.push(lastArray); @@ -89,7 +184,7 @@ export class DataVisualizer { latestMs ); - const fieldData = { + const fieldData: FieldData = { fieldName: field, existsInDocs, stats: {}, @@ -110,19 +205,19 @@ export class DataVisualizer { // returned array depend on the type of the field (keyword, number, date etc). // Sampling will be used if supplied samplerShardSize > 0. async getStatsForFields( - indexPatternTitle, - query, - fields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs, - interval, - maxExamples - ) { + indexPatternTitle: string, + query: any, + fields: Field[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number, + interval: number, + maxExamples: number + ): Promise { // Batch up fields by type, getting stats for multiple fields at a time. - const batches = []; - const batchedFields = {}; + const batches: Field[][] = []; + const batchedFields: { [key: string]: Field[][] } = {}; _.each(fields, field => { if (field.fieldName === undefined) { // undefined fieldName is used for a document count request. @@ -135,7 +230,7 @@ export class DataVisualizer { if (batchedFields[fieldType] === undefined) { batchedFields[fieldType] = [[]]; } - let lastArray = _.last(batchedFields[fieldType]); + let lastArray: Field[] = _.last(batchedFields[fieldType]); if (lastArray.length === FIELDS_REQUEST_BATCH_SIZE) { lastArray = []; batchedFields[fieldType].push(lastArray); @@ -148,10 +243,10 @@ export class DataVisualizer { batches.push(...lists); }); - let results = []; + let results: BatchStats[] = []; await Promise.all( batches.map(async batch => { - let batchStats = []; + let batchStats: BatchStats[] = []; const first = batch[0]; switch (first.type) { case ML_JOB_FIELD_TYPES.NUMBER: @@ -243,13 +338,13 @@ export class DataVisualizer { } async checkAggregatableFieldsExist( - indexPatternTitle, - query, - aggregatableFields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs + indexPatternTitle: string, + query: any, + aggregatableFields: string[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number ) { const index = indexPatternTitle; const size = 0; @@ -257,7 +352,7 @@ export class DataVisualizer { // Value count aggregation faster way of checking if field exists than using // filter aggregation with exists query. - const aggs = {}; + const aggs: Aggs = {}; aggregatableFields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field, i); aggs[`${safeFieldName}_count`] = { @@ -277,7 +372,7 @@ export class DataVisualizer { aggs: buildSamplerAggregation(aggs, samplerShardSize), }; - const resp = await this.callWithRequest('search', { + const resp = await this.callAsCurrentUser('search', { index, rest_total_hits_as_int: true, size, @@ -287,8 +382,8 @@ export class DataVisualizer { const totalCount = _.get(resp, ['hits', 'total'], 0); const stats = { totalCount, - aggregatableExistsFields: [], - aggregatableNotExistsFields: [], + aggregatableExistsFields: [] as FieldData[], + aggregatableNotExistsFields: [] as FieldData[], }; const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); @@ -324,12 +419,12 @@ export class DataVisualizer { } async checkNonAggregatableFieldExists( - indexPatternTitle, - query, - field, - timeFieldName, - earliestMs, - latestMs + indexPatternTitle: string, + query: any, + field: string, + timeFieldName: string, + earliestMs: number, + latestMs: number ) { const index = indexPatternTitle; const size = 0; @@ -344,7 +439,7 @@ export class DataVisualizer { }; filterCriteria.push({ exists: { field } }); - const resp = await this.callWithRequest('search', { + const resp = await this.callAsCurrentUser('search', { index, rest_total_hits_as_int: true, size, @@ -354,13 +449,13 @@ export class DataVisualizer { } async getDocumentCountStats( - indexPatternTitle, - query, - timeFieldName, - earliestMs, - latestMs, - interval - ) { + indexPatternTitle: string, + query: any, + timeFieldName: string, + earliestMs: number, + latestMs: number, + interval: number + ): Promise { const index = indexPatternTitle; const size = 0; const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); @@ -371,7 +466,7 @@ export class DataVisualizer { eventRate: { date_histogram: { field: timeFieldName, - interval: interval, + interval, min_doc_count: 1, }, }, @@ -383,36 +478,42 @@ export class DataVisualizer { filter: filterCriteria, }, }, - aggs: aggs, + aggs, }; - const resp = await this.callWithRequest('search', { index, size, body }); + const resp = await this.callAsCurrentUser('search', { + index, + size, + body, + }); - const buckets = {}; - const dataByTimeBucket = _.get(resp, ['aggregations', 'eventRate', 'buckets'], []); + const buckets: { [key: string]: number } = {}; + const dataByTimeBucket: Array<{ key: string; doc_count: number }> = _.get( + resp, + ['aggregations', 'eventRate', 'buckets'], + [] + ); _.each(dataByTimeBucket, dataForTime => { const time = dataForTime.key; buckets[time] = dataForTime.doc_count; }); - const stats = { + return { documentCounts: { interval, buckets, }, }; - - return stats; } async getNumericFieldsStats( - indexPatternTitle, - query, - fields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number ) { const index = indexPatternTitle; const size = 0; @@ -429,7 +530,7 @@ export class DataVisualizer { () => (count += PERCENTILE_SPACING) ); - const aggs = {}; + const aggs: { [key: string]: any } = {}; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); aggs[`${safeFieldName}_field_stats`] = { @@ -443,7 +544,7 @@ export class DataVisualizer { aggs[`${safeFieldName}_percentiles`] = { percentiles: { field: field.fieldName, - percents: percents, + percents, keyed: false, }, }; @@ -483,10 +584,14 @@ export class DataVisualizer { aggs: buildSamplerAggregation(aggs, samplerShardSize), }; - const resp = await this.callWithRequest('search', { index, size, body }); + const resp = await this.callAsCurrentUser('search', { + index, + size, + body, + }); const aggregations = resp.aggregations; const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats = []; + const batchStats: NumericFieldStats[] = []; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); const docCount = _.get( @@ -499,7 +604,15 @@ export class DataVisualizer { [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'], {} ); - const stats = { + + const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; + if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { + topAggsPath.push('top'); + } + + const topValues: Bucket[] = _.get(aggregations, [...topAggsPath, 'buckets'], []); + + const stats: NumericFieldStats = { fieldName: field.fieldName, count: docCount, min: _.get(fieldStatsResp, 'min', 0), @@ -507,31 +620,27 @@ export class DataVisualizer { avg: _.get(fieldStatsResp, 'avg', 0), isTopValuesSampled: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, + topValues, + topValuesSampleSize: topValues.reduce( + (acc, curr) => acc + curr.doc_count, + _.get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0) + ), + topValuesSamplerShardSize: + field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD + ? SAMPLER_TOP_TERMS_SHARD_SIZE + : samplerShardSize, }; - const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; - if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { - topAggsPath.push('top'); - } - - stats.topValues = _.get(aggregations, [...topAggsPath, 'buckets'], []); - stats.topValuesSampleSize = _.get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0); - stats.topValuesSamplerShardSize = - field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD - ? SAMPLER_TOP_TERMS_SHARD_SIZE - : samplerShardSize; - stats.topValues.forEach(bucket => { - stats.topValuesSampleSize += bucket.doc_count; - }); - if (stats.count > 0) { const percentiles = _.get( aggregations, [...aggsPath, `${safeFieldName}_percentiles`, 'values'], [] ); - const medianPercentile = _.find(percentiles, { key: 50 }); - stats.median = medianPercentile !== undefined ? medianPercentile.value : 0; + const medianPercentile: { value: number; key: number } | undefined = _.find(percentiles, { + key: 50, + }); + stats.median = medianPercentile !== undefined ? medianPercentile!.value : 0; stats.distribution = this.processDistributionData( percentiles, PERCENTILE_SPACING, @@ -546,19 +655,19 @@ export class DataVisualizer { } async getStringFieldsStats( - indexPatternTitle, - query, - fields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number ) { const index = indexPatternTitle; const size = 0; const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - const aggs = {}; + const aggs: Aggs = {}; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); const top = { @@ -596,32 +705,38 @@ export class DataVisualizer { aggs: buildSamplerAggregation(aggs, samplerShardSize), }; - const resp = await this.callWithRequest('search', { index, size, body }); + const resp = await this.callAsCurrentUser('search', { + index, + size, + body, + }); const aggregations = resp.aggregations; const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats = []; + const batchStats: StringFieldStats[] = []; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); - const stats = { - fieldName: field.fieldName, - isTopValuesSampled: - field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, - }; const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { topAggsPath.push('top'); } - stats.topValues = _.get(aggregations, [...topAggsPath, 'buckets'], []); - stats.topValuesSampleSize = _.get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0); - stats.topValuesSamplerShardSize = - field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD - ? SAMPLER_TOP_TERMS_SHARD_SIZE - : samplerShardSize; - stats.topValues.forEach(bucket => { - stats.topValuesSampleSize += bucket.doc_count; - }); + const topValues: Bucket[] = _.get(aggregations, [...topAggsPath, 'buckets'], []); + + const stats = { + fieldName: field.fieldName, + isTopValuesSampled: + field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, + topValues, + topValuesSampleSize: topValues.reduce( + (acc, curr) => acc + curr.doc_count, + _.get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0) + ), + topValuesSamplerShardSize: + field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD + ? SAMPLER_TOP_TERMS_SHARD_SIZE + : samplerShardSize, + }; batchStats.push(stats); }); @@ -630,19 +745,19 @@ export class DataVisualizer { } async getDateFieldsStats( - indexPatternTitle, - query, - fields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number ) { const index = indexPatternTitle; const size = 0; const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - const aggs = {}; + const aggs: Aggs = {}; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); aggs[`${safeFieldName}_field_stats`] = { @@ -664,10 +779,14 @@ export class DataVisualizer { aggs: buildSamplerAggregation(aggs, samplerShardSize), }; - const resp = await this.callWithRequest('search', { index, size, body }); + const resp = await this.callAsCurrentUser('search', { + index, + size, + body, + }); const aggregations = resp.aggregations; const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats = []; + const batchStats: DateFieldStats[] = []; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); const docCount = _.get( @@ -692,19 +811,19 @@ export class DataVisualizer { } async getBooleanFieldsStats( - indexPatternTitle, - query, - fields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs + indexPatternTitle: string, + query: object, + fields: Field[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number ) { const index = indexPatternTitle; const size = 0; const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query); - const aggs = {}; + const aggs: Aggs = {}; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); aggs[`${safeFieldName}_value_count`] = { @@ -727,20 +846,24 @@ export class DataVisualizer { aggs: buildSamplerAggregation(aggs, samplerShardSize), }; - const resp = await this.callWithRequest('search', { index, size, body }); + const resp = await this.callAsCurrentUser('search', { + index, + size, + body, + }); const aggregations = resp.aggregations; const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const batchStats = []; + const batchStats: BooleanFieldStats[] = []; fields.forEach((field, i) => { const safeFieldName = getSafeAggregationName(field.fieldName, i); - const stats = { + const stats: BooleanFieldStats = { fieldName: field.fieldName, count: _.get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0), trueCount: 0, falseCount: 0, }; - const valueBuckets = _.get( + const valueBuckets: Array<{ [key: string]: number }> = _.get( aggregations, [...aggsPath, `${safeFieldName}_values`, 'buckets'], [] @@ -756,14 +879,14 @@ export class DataVisualizer { } async getFieldExamples( - indexPatternTitle, - query, - field, - timeFieldName, - earliestMs, - latestMs, - maxExamples - ) { + indexPatternTitle: string, + query: any, + field: string, + timeFieldName: string, + earliestMs: number, + latestMs: number, + maxExamples: number + ): Promise { const index = indexPatternTitle; // Request at least 100 docs so that we have a chance of obtaining @@ -785,7 +908,7 @@ export class DataVisualizer { }, }; - const resp = await this.callWithRequest('search', { + const resp = await this.callAsCurrentUser('search', { index, rest_total_hits_as_int: true, size, @@ -793,7 +916,7 @@ export class DataVisualizer { }); const stats = { fieldName: field, - examples: [], + examples: [] as any[], }; if (resp.hits.total !== 0) { const hits = resp.hits.hits; @@ -803,7 +926,7 @@ export class DataVisualizer { // field is populated using copy_to in the index mapping), // there will be no example to add. // Use lodash _.get() to support field names containing dots. - const example = _.get(hits[i]._source, field); + const example: any = _.get(hits[i]._source, field); if (example !== undefined && stats.examples.indexOf(example) === -1) { stats.examples.push(example); if (stats.examples.length === maxExamples) { @@ -816,13 +939,17 @@ export class DataVisualizer { return stats; } - processDistributionData(percentiles, percentileSpacing, minValue) { - const distribution = { percentiles: [], minPercentile: 0, maxPercentile: 100 }; + processDistributionData( + percentiles: Array<{ value: number }>, + percentileSpacing: number, + minValue: number + ): Distribution { + const distribution: Distribution = { percentiles: [], minPercentile: 0, maxPercentile: 100 }; if (percentiles.length === 0) { return distribution; } - let percentileBuckets = []; + let percentileBuckets: Array<{ value: number }> = []; let lowerBound = minValue; if (lowerBound >= 0) { // By default return results for 0 - 90% percentiles. @@ -853,7 +980,7 @@ export class DataVisualizer { // Add in 0-5 and 95-100% if they don't add more // than 25% to the value range at either end. - const lastValue = _.last(percentileBuckets).value; + const lastValue: number = _.last(percentileBuckets).value; const maxDiff = 0.25 * (lastValue - lowerBound); if (lowerBound - dataMin < maxDiff) { percentileBuckets.splice(0, 0, percentiles[0]); diff --git a/x-pack/legacy/plugins/ml/server/models/data_visualizer/index.js b/x-pack/legacy/plugins/ml/server/models/data_visualizer/index.ts similarity index 100% rename from x-pack/legacy/plugins/ml/server/models/data_visualizer/index.js rename to x-pack/legacy/plugins/ml/server/models/data_visualizer/index.ts diff --git a/x-pack/legacy/plugins/ml/server/new_platform/data_visualizer_schema.ts b/x-pack/legacy/plugins/ml/server/new_platform/data_visualizer_schema.ts new file mode 100644 index 0000000000000..0c10b2d5b4f16 --- /dev/null +++ b/x-pack/legacy/plugins/ml/server/new_platform/data_visualizer_schema.ts @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { schema } from '@kbn/config-schema'; + +export const dataVisualizerFieldStatsSchema = { + params: schema.object({ + indexPatternTitle: schema.string(), + }), + body: schema.object({ + query: schema.any(), + fields: schema.arrayOf(schema.any()), + samplerShardSize: schema.number(), + timeFieldName: schema.maybe(schema.string()), + earliest: schema.maybe(schema.number()), + latest: schema.maybe(schema.number()), + interval: schema.maybe(schema.string()), + maxExamples: schema.number(), + }), +}; + +export const dataVisualizerOverallStatsSchema = { + params: schema.object({ + indexPatternTitle: schema.string(), + }), + body: schema.object({ + query: schema.any(), + aggregatableFields: schema.arrayOf(schema.string()), + nonAggregatableFields: schema.arrayOf(schema.string()), + samplerShardSize: schema.number(), + timeFieldName: schema.maybe(schema.string()), + earliest: schema.maybe(schema.number()), + latest: schema.maybe(schema.number()), + }), +}; diff --git a/x-pack/legacy/plugins/ml/server/routes/apidoc.json b/x-pack/legacy/plugins/ml/server/routes/apidoc.json index 8292e946cd344..4a3b93b9b866f 100644 --- a/x-pack/legacy/plugins/ml/server/routes/apidoc.json +++ b/x-pack/legacy/plugins/ml/server/routes/apidoc.json @@ -16,6 +16,9 @@ "DeleteDataFrameAnalytics", "StartDataFrameAnalyticsJob", "StopsDataFrameAnalyticsJob", - "GetDataFrameAnalyticsMessages" + "GetDataFrameAnalyticsMessages", + "DataVisualizer", + "GetOverallStats", + "GetStatsForFields" ] } diff --git a/x-pack/legacy/plugins/ml/server/routes/data_visualizer.js b/x-pack/legacy/plugins/ml/server/routes/data_visualizer.js deleted file mode 100644 index fe11f7f4fd5b2..0000000000000 --- a/x-pack/legacy/plugins/ml/server/routes/data_visualizer.js +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -import { callWithRequestFactory } from '../client/call_with_request_factory'; -import { wrapError } from '../client/errors'; -import { DataVisualizer } from '../models/data_visualizer'; - -function getOverallStats( - callWithRequest, - indexPatternTitle, - query, - aggregatableFields, - nonAggregatableFields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs -) { - const dv = new DataVisualizer(callWithRequest); - return dv.getOverallStats( - indexPatternTitle, - query, - aggregatableFields, - nonAggregatableFields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs - ); -} - -function getStatsForFields( - callWithRequest, - indexPatternTitle, - query, - fields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs, - interval, - maxExamples -) { - const dv = new DataVisualizer(callWithRequest); - return dv.getStatsForFields( - indexPatternTitle, - query, - fields, - samplerShardSize, - timeFieldName, - earliestMs, - latestMs, - interval, - maxExamples - ); -} - -export function dataVisualizerRoutes({ commonRouteConfig, elasticsearchPlugin, route }) { - route({ - method: 'POST', - path: '/api/ml/data_visualizer/get_field_stats/{indexPatternTitle}', - handler(request) { - const callWithRequest = callWithRequestFactory(elasticsearchPlugin, request); - const indexPatternTitle = request.params.indexPatternTitle; - const payload = request.payload; - return getStatsForFields( - callWithRequest, - indexPatternTitle, - payload.query, - payload.fields, - payload.samplerShardSize, - payload.timeFieldName, - payload.earliest, - payload.latest, - payload.interval, - payload.maxExamples - ).catch(resp => wrapError(resp)); - }, - config: { - ...commonRouteConfig, - }, - }); - - route({ - method: 'POST', - path: '/api/ml/data_visualizer/get_overall_stats/{indexPatternTitle}', - handler(request) { - const callWithRequest = callWithRequestFactory(elasticsearchPlugin, request); - const indexPatternTitle = request.params.indexPatternTitle; - const payload = request.payload; - return getOverallStats( - callWithRequest, - indexPatternTitle, - payload.query, - payload.aggregatableFields, - payload.nonAggregatableFields, - payload.samplerShardSize, - payload.timeFieldName, - payload.earliest, - payload.latest - ).catch(resp => wrapError(resp)); - }, - config: { - ...commonRouteConfig, - }, - }); -} diff --git a/x-pack/legacy/plugins/ml/server/routes/data_visualizer.ts b/x-pack/legacy/plugins/ml/server/routes/data_visualizer.ts new file mode 100644 index 0000000000000..235fc26d78441 --- /dev/null +++ b/x-pack/legacy/plugins/ml/server/routes/data_visualizer.ts @@ -0,0 +1,173 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { RequestHandlerContext } from 'kibana/server'; +import { wrapError } from '../client/error_wrapper'; +import { DataVisualizer } from '../models/data_visualizer'; +import { Field } from '../models/data_visualizer/data_visualizer'; +import { + dataVisualizerFieldStatsSchema, + dataVisualizerOverallStatsSchema, +} from '../new_platform/data_visualizer_schema'; +import { licensePreRoutingFactory } from '../new_platform/licence_check_pre_routing_factory'; +import { RouteInitialization } from '../new_platform/plugin'; + +function getOverallStats( + context: RequestHandlerContext, + indexPatternTitle: string, + query: object, + aggregatableFields: string[], + nonAggregatableFields: string[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number +) { + const dv = new DataVisualizer(context); + return dv.getOverallStats( + indexPatternTitle, + query, + aggregatableFields, + nonAggregatableFields, + samplerShardSize, + timeFieldName, + earliestMs, + latestMs + ); +} + +function getStatsForFields( + context: RequestHandlerContext, + indexPatternTitle: string, + query: any, + fields: Field[], + samplerShardSize: number, + timeFieldName: string, + earliestMs: number, + latestMs: number, + interval: number, + maxExamples: number +) { + const dv = new DataVisualizer(context); + return dv.getStatsForFields( + indexPatternTitle, + query, + fields, + samplerShardSize, + timeFieldName, + earliestMs, + latestMs, + interval, + maxExamples + ); +} + +/** + * Routes for the index data visualizer. + */ +export function dataVisualizerRoutes({ xpackMainPlugin, router }: RouteInitialization) { + /** + * @apiGroup DataVisualizer + * + * @api {post} /api/ml/data_visualizer/get_field_stats/:indexPatternTitle Get stats for fields + * @apiName GetStatsForFields + * @apiDescription Returns fields stats of the index pattern. + * + * @apiParam {String} indexPatternTitle Index pattern title. + */ + router.post( + { + path: '/api/ml/data_visualizer/get_field_stats/{indexPatternTitle}', + validate: dataVisualizerFieldStatsSchema, + }, + licensePreRoutingFactory(xpackMainPlugin, async (context, request, response) => { + try { + const { + params: { indexPatternTitle }, + body: { + query, + fields, + samplerShardSize, + timeFieldName, + earliest, + latest, + interval, + maxExamples, + }, + } = request; + + const results = await getStatsForFields( + context, + indexPatternTitle, + query, + fields, + samplerShardSize, + timeFieldName, + earliest, + latest, + interval, + maxExamples + ); + + return response.ok({ + body: results, + }); + } catch (e) { + return response.customError(wrapError(e)); + } + }) + ); + + /** + * @apiGroup DataVisualizer + * + * @api {post} /api/ml/data_visualizer/get_overall_stats/:indexPatternTitle Get overall stats + * @apiName GetOverallStats + * @apiDescription Returns overall stats of the index pattern. + * + * @apiParam {String} indexPatternTitle Index pattern title. + */ + router.post( + { + path: '/api/ml/data_visualizer/get_overall_stats/{indexPatternTitle}', + validate: dataVisualizerOverallStatsSchema, + }, + licensePreRoutingFactory(xpackMainPlugin, async (context, request, response) => { + try { + const { + params: { indexPatternTitle }, + body: { + query, + aggregatableFields, + nonAggregatableFields, + samplerShardSize, + timeFieldName, + earliest, + latest, + }, + } = request; + + const results = await getOverallStats( + context, + indexPatternTitle, + query, + aggregatableFields, + nonAggregatableFields, + samplerShardSize, + timeFieldName, + earliest, + latest + ); + + return response.ok({ + body: results, + }); + } catch (e) { + return response.customError(wrapError(e)); + } + }) + ); +}