Skip to content

Commit

Permalink
[ML] Explain log rate spikes: Add field histograms to analysis result. (
Browse files Browse the repository at this point in the history
#136295)

- Extends the analysis API to return histogram data.
- Adds a column with MiniHistogram components to the analysis results table.
- Moves/consolidates fetchHistogramsForFields to @kbn/ml-agg-utils (also used in Data Visualizer and Data Grid Mini Histograms).
- So far fetchHistogramsForFields auto-identified the necessary interval and min/max. To be able to generate histogram data for the log rate spikes charts an options was added to use that information up front for the data to be fetched. This allows the buckets for the chart data for the overall (green bars) and the field/value-filtered (orange bars) histogram to have the exact same buckets.
  • Loading branch information
walterra authored Jul 20, 2022
1 parent 5aad18f commit 092fb35
Show file tree
Hide file tree
Showing 26 changed files with 672 additions and 314 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import { get } from 'lodash';

import type { Client } from '@elastic/elasticsearch';
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';

import { KBN_FIELD_TYPES } from '@kbn/field-types';
Expand All @@ -16,39 +15,14 @@ import { stringHash } from '@kbn/ml-string-hash';

import { buildSamplerAggregation } from './build_sampler_aggregation';
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';

// TODO Temporary type definition until we can import from `@kbn/core`.
// Copied from src/core/server/elasticsearch/client/types.ts
// as these types aren't part of any package yet. Once they are, remove this completely

/**
* Client used to query the elasticsearch cluster.
* @deprecated At some point use the one from src/core/server/elasticsearch/client/types.ts when it is made into a package. If it never is, then keep using this one.
* @public
*/
type ElasticsearchClient = Omit<
Client,
'connectionPool' | 'serializer' | 'extend' | 'close' | 'diagnostic'
>;
import type { ElasticsearchClient, HistogramField, NumericColumnStatsMap } from './types';

const MAX_CHART_COLUMNS = 20;

interface HistogramField {
fieldName: string;
type: string;
}

interface NumericColumnStats {
interval: number;
min: number;
max: number;
}
type NumericColumnStatsMap = Record<string, NumericColumnStats>;

/**
* Returns aggregation intervals for the supplied document fields.
*/
export const getAggIntervals = async (
export const fetchAggIntervals = async (
client: ElasticsearchClient,
indexPattern: string,
query: estypes.QueryDslQueryContainer,
Expand Down
254 changes: 254 additions & 0 deletions x-pack/packages/ml/agg_utils/src/fetch_histograms_for_fields.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import get from 'lodash/get';

import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';

import { KBN_FIELD_TYPES } from '@kbn/field-types';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { stringHash } from '@kbn/ml-string-hash';

import { buildSamplerAggregation } from './build_sampler_aggregation';
import { fetchAggIntervals } from './fetch_agg_intervals';
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
import type {
AggCardinality,
ElasticsearchClient,
HistogramField,
NumericColumnStats,
NumericColumnStatsMap,
} from './types';

const MAX_CHART_COLUMNS = 20;

interface AggHistogram {
histogram: {
field: string;
interval: number;
};
}

interface AggTerms {
terms: {
field: string;
size: number;
};
}

interface NumericDataItem {
key: number;
key_as_string?: string;
doc_count: number;
}

/**
* Interface to describe the data structure returned for numeric based charts.
*/
export interface NumericChartData {
data: NumericDataItem[];
id: string;
interval: number;
stats: [number, number];
type: 'numeric';
}

/**
* Numeric based histogram field interface, limited to `date` and `number`.
*/
export interface NumericHistogramField extends HistogramField {
type: KBN_FIELD_TYPES.DATE | KBN_FIELD_TYPES.NUMBER;
}
type NumericHistogramFieldWithColumnStats = NumericHistogramField & NumericColumnStats;

function isNumericHistogramField(arg: unknown): arg is NumericHistogramField {
return (
isPopulatedObject(arg, ['fieldName', 'type']) &&
(arg.type === KBN_FIELD_TYPES.DATE || arg.type === KBN_FIELD_TYPES.NUMBER)
);
}
function isNumericHistogramFieldWithColumnStats(
arg: unknown
): arg is NumericHistogramFieldWithColumnStats {
return (
isPopulatedObject(arg, ['fieldName', 'type', 'min', 'max', 'interval']) &&
(arg.type === KBN_FIELD_TYPES.DATE || arg.type === KBN_FIELD_TYPES.NUMBER)
);
}

interface OrdinalDataItem {
key: string;
key_as_string?: string;
doc_count: number;
}

interface OrdinalChartData {
type: 'ordinal' | 'boolean';
cardinality: number;
data: OrdinalDataItem[];
id: string;
}

interface OrdinalHistogramField extends HistogramField {
type: KBN_FIELD_TYPES.STRING | KBN_FIELD_TYPES.BOOLEAN;
}

function isOrdinalHistogramField(arg: unknown): arg is OrdinalHistogramField {
return (
isPopulatedObject(arg, ['fieldName', 'type']) &&
(arg.type === KBN_FIELD_TYPES.STRING || arg.type === KBN_FIELD_TYPES.BOOLEAN)
);
}

interface UnsupportedChartData {
id: string;
type: 'unsupported';
}

interface UnsupportedHistogramField extends HistogramField {
type: Exclude<
KBN_FIELD_TYPES,
KBN_FIELD_TYPES.STRING | KBN_FIELD_TYPES.BOOLEAN | KBN_FIELD_TYPES.DATE | KBN_FIELD_TYPES.NUMBER
>;
}

type ChartRequestAgg = AggHistogram | AggCardinality | AggTerms;

/**
* All types of histogram field definitions for fetching histogram data.
*/
export type FieldsForHistograms = Array<
| NumericHistogramField
| NumericHistogramFieldWithColumnStats
| OrdinalHistogramField
| UnsupportedHistogramField
>;

/**
* Fetches data to be used in mini histogram charts. Supports auto-identifying
* the histogram interval and min/max values.
*
* @param client Elasticsearch Client
* @param indexPattern index pattern to be queried
* @param query Elasticsearch query
* @param fields the fields the histograms should be generated for
* @param samplerShardSize shard_size parameter of the sampler aggregation
* @param runtimeMappings optional runtime mappings
* @returns an array of histogram data for each supplied field
*/
export const fetchHistogramsForFields = async (
client: ElasticsearchClient,
indexPattern: string,
query: any,
fields: FieldsForHistograms,
samplerShardSize: number,
runtimeMappings?: estypes.MappingRuntimeFields
) => {
const aggIntervals = {
...(await fetchAggIntervals(
client,
indexPattern,
query,
fields.filter((f) => !isNumericHistogramFieldWithColumnStats(f)),
samplerShardSize,
runtimeMappings
)),
...fields.filter(isNumericHistogramFieldWithColumnStats).reduce((p, field) => {
const { interval, min, max, fieldName } = field;
p[stringHash(fieldName)] = { interval, min, max };

return p;
}, {} as NumericColumnStatsMap),
};

const chartDataAggs = fields.reduce((aggs, field) => {
const id = stringHash(field.fieldName);
if (isNumericHistogramField(field)) {
if (aggIntervals[id] !== undefined) {
aggs[`${id}_histogram`] = {
histogram: {
field: field.fieldName,
interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1,
},
};
}
} else if (isOrdinalHistogramField(field)) {
if (field.type === KBN_FIELD_TYPES.STRING) {
aggs[`${id}_cardinality`] = {
cardinality: {
field: field.fieldName,
},
};
}
aggs[`${id}_terms`] = {
terms: {
field: field.fieldName,
size: MAX_CHART_COLUMNS,
},
};
}
return aggs;
}, {} as Record<string, ChartRequestAgg>);

if (Object.keys(chartDataAggs).length === 0) {
return [];
}

const body = await client.search(
{
index: indexPattern,
size: 0,
body: {
query,
aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
size: 0,
...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
},
},
{ maxRetries: 0 }
);

const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;

return fields.map((field) => {
const id = stringHash(field.fieldName);

if (isNumericHistogramField(field)) {
if (aggIntervals[id] === undefined) {
return {
type: 'numeric',
data: [],
interval: 0,
stats: [0, 0],
id: field.fieldName,
} as NumericChartData;
}

return {
data: aggregations[`${id}_histogram`].buckets,
interval: aggIntervals[id].interval,
stats: [aggIntervals[id].min, aggIntervals[id].max],
type: 'numeric',
id: field.fieldName,
} as NumericChartData;
} else if (isOrdinalHistogramField(field)) {
return {
type: field.type === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean',
cardinality:
field.type === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2,
data: aggregations[`${id}_terms`].buckets,
id: field.fieldName,
} as OrdinalChartData;
}

return {
type: 'unsupported',
id: field.fieldName,
} as UnsupportedChartData;
});
};
16 changes: 14 additions & 2 deletions x-pack/packages/ml/agg_utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,19 @@
*/

export { buildSamplerAggregation } from './build_sampler_aggregation';
export { getAggIntervals } from './get_agg_intervals';
export { fetchAggIntervals } from './fetch_agg_intervals';
export { fetchHistogramsForFields } from './fetch_histograms_for_fields';
export { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
export type { NumberValidationResult } from './validate_number';
export { numberValidator } from './validate_number';

export type { FieldsForHistograms } from './fetch_histograms_for_fields';
export type {
AggCardinality,
ChangePoint,
ChangePointHistogram,
ChangePointHistogramItem,
HistogramField,
NumericColumnStats,
NumericColumnStatsMap,
} from './types';
export type { NumberValidationResult } from './validate_number';
Loading

0 comments on commit 092fb35

Please sign in to comment.