diff --git a/src/plugins/profiling/common/flamegraph.ts b/src/plugins/profiling/common/flamegraph.ts index 8ed0497fe6b84..3308ad16ad8a6 100644 --- a/src/plugins/profiling/common/flamegraph.ts +++ b/src/plugins/profiling/common/flamegraph.ts @@ -77,7 +77,6 @@ export class FlameGraph { private getExeFileName(exe: any, type: number) { if (exe?.FileName === undefined) { - this.logger.warn('missing executable FileName'); return ''; } if (exe.FileName !== '') { diff --git a/src/plugins/profiling/server/routes/compat.ts b/src/plugins/profiling/server/routes/compat.ts new file mode 100644 index 0000000000000..4dce7cd170b2b --- /dev/null +++ b/src/plugins/profiling/server/routes/compat.ts @@ -0,0 +1,57 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +// Code that works around incompatibilities between different +// versions of Kibana / ES. +// Currently, we work with 8.1 and 8.3 and thus this code only needs +// to address the incompatibilities between those two versions. + +import type { TransportResult } from '@elastic/transport/lib/types'; +import type { + SearchResponse, + SearchHitsMetadata, + SearchHit, + MgetResponse, + MgetResponseItem, + AggregationsAggregate, +} from '@elastic/elasticsearch/lib/api/types'; +import type { ElasticsearchClient } from 'kibana/server'; +import type { DataRequestHandlerContext } from '../../../data/server'; + +// Search results in 8.1 have 'body' but not in 8.3. +export function getHits( + res: TransportResult>, unknown> +): SearchHitsMetadata { + return 'body' in res ? res.body.hits : res.hits; +} + +export function getAggs( + res: TransportResult>, unknown> +): Record | undefined { + return 'body' in res ? res.body.aggregations : res.aggregations; +} + +export function getHitsItems( + res: TransportResult>, unknown> +): Array> { + return getHits(res)?.hits ?? []; +} + +// Mget results in 8.1 have 'body' but not in 8.3. +export function getDocs( + res: TransportResult, unknown> +): Array> { + return ('body' in res ? res.body.docs : res.docs) ?? []; +} + +// In 8.3, context.core is a Promise. +export async function getClient(context: DataRequestHandlerContext): Promise { + return typeof context.core.then === 'function' + ? (await context.core).elasticsearch.client.asCurrentUser + : context.core.elasticsearch.client.asCurrentUser; +} diff --git a/src/plugins/profiling/server/routes/downsampling.ts b/src/plugins/profiling/server/routes/downsampling.ts index 64be75569b099..dc33895333cb0 100644 --- a/src/plugins/profiling/server/routes/downsampling.ts +++ b/src/plugins/profiling/server/routes/downsampling.ts @@ -10,6 +10,7 @@ import seedrandom from 'seedrandom'; import type { ElasticsearchClient, Logger } from 'kibana/server'; import { ProjectTimeQuery } from './mappings'; import { StackTraceID } from '../../common/profiling'; +import { getHits } from './compat'; export interface DownsampledEventsIndex { name: string; @@ -97,7 +98,7 @@ export async function findDownsampledIndex( track_total_hits: true, }, }); - sampleCountFromInitialExp = resp.body.hits.total?.value as number; + sampleCountFromInitialExp = getHits(resp).total?.value as number; } catch (e) { logger.info(e.message); } diff --git a/src/plugins/profiling/server/routes/flamechart.ts b/src/plugins/profiling/server/routes/flamechart.ts index e39451f33c826..153d77f977849 100644 --- a/src/plugins/profiling/server/routes/flamechart.ts +++ b/src/plugins/profiling/server/routes/flamechart.ts @@ -15,6 +15,7 @@ import { logExecutionLatency } from './logger'; import { newProjectTimeQuery, ProjectTimeQuery } from './mappings'; import { downsampleEventsRandomly, findDownsampledIndex } from './downsampling'; import { mgetExecutables, mgetStackFrames, mgetStackTraces, searchStackTraces } from './stacktrace'; +import { getHitsItems, getAggs, getClient } from './compat'; export function parallelMget( nQueries: number, @@ -69,21 +70,18 @@ async function queryFlameGraph( return await client.search( { index: eventsIndex.name, - size: 0, + track_total_hits: false, query: filter, aggs: { group_by: { - composite: { - size: 100000, // This is the upper limit of entries per event index. - sources: [ - { - traceid: { - terms: { - field: 'StackTraceID', - }, - }, - }, - ], + terms: { + // 'size' should be max 100k, but might be slightly more. Better be on the safe side. + size: 150000, + field: 'StackTraceID', + // 'execution_hint: map' skips the slow building of ordinals that we don't need. + // Especially with high cardinality fields, this makes aggregations really slow. + // E.g. it reduces the latency from 70s to 0.7s on our 8.1. MVP cluster (as of 28.04.2022). + execution_hint: 'map', }, aggs: { count: { @@ -113,12 +111,12 @@ async function queryFlameGraph( } ); - let totalCount: number = resEvents.body.aggregations?.total_count.value; - let stackTraceEvents = new Map(); + let totalCount: number = getAggs(resEvents)?.total_count.value; + const stackTraceEvents = new Map(); await logExecutionLatency(logger, 'processing events data', async () => { - resEvents.body.aggregations?.group_by.buckets.forEach((item: any) => { - const traceid: StackTraceID = item.key.traceid; + getAggs(resEvents)?.group_by.buckets.forEach((item: any) => { + const traceid: StackTraceID = item.key; stackTraceEvents.set(traceid, item.count.value); }); }); @@ -180,7 +178,7 @@ export function registerFlameChartElasticSearchRoute( const targetSampleSize = 20000; // minimum number of samples to get statistically sound results try { - const esClient = context.core.elasticsearch.client.asCurrentUser; + const esClient = await getClient(context); const filter = newProjectTimeQuery(projectID!, timeFrom!, timeTo!); const flamegraph = await queryFlameGraph( @@ -231,7 +229,7 @@ export function registerFlameChartPixiSearchRoute( const targetSampleSize = 20000; // minimum number of samples to get statistically sound results try { - const esClient = context.core.elasticsearch.client.asCurrentUser; + const esClient = await getClient(context); const filter = newProjectTimeQuery(projectID!, timeFrom!, timeTo!); const flamegraph = await queryFlameGraph( diff --git a/src/plugins/profiling/server/routes/stacktrace.ts b/src/plugins/profiling/server/routes/stacktrace.ts index bca19691dc46e..957d701c1f85b 100644 --- a/src/plugins/profiling/server/routes/stacktrace.ts +++ b/src/plugins/profiling/server/routes/stacktrace.ts @@ -18,6 +18,7 @@ import { StackTraceID, } from '../../common/profiling'; import { logExecutionLatency } from './logger'; +import { getHitsItems, getDocs } from './compat'; const traceLRU = new LRUCache({ max: 20000 }); const frameIDToFileIDCache = new LRUCache({ max: 100000 }); @@ -105,7 +106,7 @@ export async function searchStackTraces( const executableDocIDs = new Set(); // Set of unique executable FileIDs. await logExecutionLatency(logger, 'processing data', async () => { - const traces = stackResponses.flatMap((response) => response.body.hits.hits); + const traces = stackResponses.flatMap((response) => getHitsItems(response)); for (const trace of traces) { const frameIDs = trace.fields.FrameID as string[]; const fileIDs = extractFileIDArrayFromFrameIDArray(frameIDs); @@ -149,7 +150,7 @@ export async function mgetStackTraces( const stackResponses = await logExecutionLatency( logger, - 'mget query for ' + events.size + ' stacktraces', + 'mget query (' + concurrency + ' parallel) for ' + events.size + ' stacktraces', async () => { return await Promise.all( chunks.map((ids) => { @@ -172,7 +173,7 @@ export async function mgetStackTraces( await logExecutionLatency(logger, 'processing data', async () => { // flatMap() is significantly slower than an explicit for loop for (const res of stackResponses) { - for (const trace of res.body.docs) { + for (const trace of getDocs(res)) { // Sometimes we don't find the trace. // This is due to ES delays writing (data is not immediately seen after write). // Also, ES doesn't know about transactions. @@ -220,6 +221,12 @@ export async function mgetStackFrames( client: ElasticsearchClient, stackFrameIDs: Set ): Promise> { + const stackFrames = new Map(); + + if (stackFrameIDs.size === 0) { + return stackFrames; + } + const resStackFrames = await logExecutionLatency( logger, 'mget query for ' + stackFrameIDs.size + ' stackframes', @@ -233,10 +240,9 @@ export async function mgetStackFrames( ); // Create a lookup map StackFrameID -> StackFrame. - const stackFrames = new Map(); let framesFound = 0; await logExecutionLatency(logger, 'processing data', async () => { - const docs = resStackFrames.body?.docs ?? []; + const docs = getDocs(resStackFrames); for (const frame of docs) { if (frame.found) { stackFrames.set(frame._id, frame._source); @@ -263,6 +269,12 @@ export async function mgetExecutables( client: ElasticsearchClient, executableIDs: Set ): Promise> { + const executables = new Map(); + + if (executableIDs.size === 0) { + return executables; + } + const resExecutables = await logExecutionLatency( logger, 'mget query for ' + executableIDs.size + ' executables', @@ -276,9 +288,8 @@ export async function mgetExecutables( ); // Create a lookup map StackFrameID -> StackFrame. - const executables = new Map(); await logExecutionLatency(logger, 'processing data', async () => { - const docs = resExecutables.body?.docs ?? []; + const docs = getDocs(resExecutables); for (const exe of docs) { if (exe.found) { executables.set(exe._id, exe._source); diff --git a/src/plugins/profiling/server/routes/topn.ts b/src/plugins/profiling/server/routes/topn.ts index 63965dc20c2e0..0b8cb9b80dc86 100644 --- a/src/plugins/profiling/server/routes/topn.ts +++ b/src/plugins/profiling/server/routes/topn.ts @@ -20,6 +20,7 @@ import { findDownsampledIndex } from './downsampling'; import { logExecutionLatency } from './logger'; import { autoHistogramSumCountOnGroupByField, newProjectTimeQuery } from './mappings'; import { mgetExecutables, mgetStackFrames, mgetStackTraces } from './stacktrace'; +import { getClient, getAggs } from './compat'; export async function topNElasticSearchQuery( client: ElasticsearchClient, @@ -67,7 +68,7 @@ export async function topNElasticSearchQuery( } ); - const histogram = resEvents.body.aggregations?.histogram as AggregationsHistogramAggregate; + const histogram = getAggs(resEvents)?.histogram as AggregationsHistogramAggregate; const topN = createTopNBucketsByDate(histogram); if (searchField !== 'StackTraceID') { @@ -136,7 +137,7 @@ export function queryTopNCommon( }, async (context, request, response) => { const { index, projectID, timeFrom, timeTo, n } = request.query; - const client = context.core.elasticsearch.client.asCurrentUser; + const client = await getClient(context); try { return await topNElasticSearchQuery(