From 932e4c822f5d72f6ece430b524a27e548397b396 Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Mon, 15 Nov 2021 18:58:51 -0500 Subject: [PATCH] Handles ns to ms conversion for event loop delay metrics (#118447) (#118613) Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Co-authored-by: Christiane (Tina) Heiligers --- .../event_loop_delays_monitor.mocks.ts | 39 ++++++++++++++--- .../event_loop_delays_monitor.test.ts | 16 +++++-- .../event_loop_delays_monitor.ts | 42 ++++++++++++++----- src/core/server/metrics/types.ts | 2 +- .../collectors/event_loop_delays/constants.ts | 7 +--- .../collectors/event_loop_delays/schema.ts | 16 +++---- .../event_loop_delays/track_delays.ts | 1 + .../event_loop_delays/track_threshold.ts | 11 ++--- src/plugins/telemetry/schema/oss_plugins.json | 18 ++++---- .../schema/xpack_plugins.json | 16 +++---- 10 files changed, 109 insertions(+), 59 deletions(-) diff --git a/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.mocks.ts b/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.mocks.ts index 691bfc5030c93..5559a2be5b8d2 100644 --- a/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.mocks.ts +++ b/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.mocks.ts @@ -9,15 +9,17 @@ import moment from 'moment'; import type { EventLoopDelaysMonitor } from './event_loop_delays_monitor'; import type { IntervalHistogram } from '../types'; -function createMockHistogram(overwrites: Partial = {}): IntervalHistogram { +function createMockRawNsDataHistogram( + overwrites: Partial = {} +): IntervalHistogram { const now = Date.now(); - return { + const mockedRawCollectedDataInNs = { min: 9093120, max: 53247999, - mean: 11993238.600747818, + mean: 11993238, exceeds: 0, - stddev: 1168191.9357543814, + stddev: 1168191, fromTimestamp: moment(now).toISOString(), lastUpdatedAt: moment(now).toISOString(), percentiles: { @@ -28,6 +30,31 @@ function createMockHistogram(overwrites: Partial = {}): Inter }, ...overwrites, }; + return mockedRawCollectedDataInNs; +} + +function createMockMonitorDataMsHistogram( + overwrites: Partial = {} +): IntervalHistogram { + const now = Date.now(); + + const mockedRawCollectedDataInMs = { + min: 9.09312, + max: 53.247999, + mean: 11.993238, + exceeds: 0, + stddev: 1.168191, + fromTimestamp: moment(now).toISOString(), + lastUpdatedAt: moment(now).toISOString(), + percentiles: { + '50': 12.607487, + '75': 12.615679, + '95': 12.648447, + '99': 12.713983, + }, + ...overwrites, + }; + return mockedRawCollectedDataInMs; } function createMockEventLoopDelaysMonitor() { @@ -40,12 +67,12 @@ function createMockEventLoopDelaysMonitor() { stop: jest.fn(), }); - mockCollect.mockReturnValue(createMockHistogram()); + mockCollect.mockReturnValue(createMockMonitorDataMsHistogram()); // this must mock the return value of the public collect method from this monitor. return new MockEventLoopDelaysMonitor(); } export const mocked = { - createHistogram: createMockHistogram, + createHistogram: createMockRawNsDataHistogram, // raw data as received from Node.js perf_hooks.monitorEventLoopDelay([options]) createEventLoopDelaysMonitor: createMockEventLoopDelaysMonitor, }; diff --git a/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.test.ts b/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.test.ts index 3e88dbca8f4e7..052f291f76889 100644 --- a/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.test.ts +++ b/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.test.ts @@ -9,7 +9,7 @@ /* eslint-disable dot-notation */ jest.mock('perf_hooks'); import { monitorEventLoopDelay } from 'perf_hooks'; -import { EventLoopDelaysMonitor } from './event_loop_delays_monitor'; +import { EventLoopDelaysMonitor, nsToMs } from './event_loop_delays_monitor'; import { mocked } from './event_loop_delays_monitor.mocks'; describe('EventLoopDelaysMonitor', () => { @@ -39,10 +39,20 @@ describe('EventLoopDelaysMonitor', () => { expect(eventLoopDelaysMonitor['loopMonitor'].percentile).toHaveBeenNthCalledWith(3, 95); expect(eventLoopDelaysMonitor['loopMonitor'].percentile).toHaveBeenNthCalledWith(4, 99); - // mocked perf_hook returns `mocked.createHistogram()`. + // mocked perf_hook returns `mocked.createNsHistogram()` that returns data in ns. + // The `collect` function returns the data in ms. // This ensures that the wiring of the `collect` function is correct. const mockedHistogram = mocked.createHistogram(); - expect(histogramData).toEqual(mockedHistogram); + + expect(histogramData.min).toEqual(nsToMs(mockedHistogram.min)); + expect(histogramData.max).toEqual(nsToMs(mockedHistogram.max)); + expect(histogramData.mean).toEqual(nsToMs(mockedHistogram.mean)); + expect(histogramData.stddev).toEqual(nsToMs(mockedHistogram.stddev)); + expect(histogramData.exceeds).toEqual(nsToMs(mockedHistogram.exceeds)); + expect(histogramData.percentiles['50']).toEqual(nsToMs(mockedHistogram.percentiles['50'])); + expect(histogramData.percentiles['75']).toEqual(nsToMs(mockedHistogram.percentiles['75'])); + expect(histogramData.percentiles['95']).toEqual(nsToMs(mockedHistogram.percentiles['95'])); + expect(histogramData.percentiles['99']).toEqual(nsToMs(mockedHistogram.percentiles['99'])); }); test('#reset resets histogram data', () => { diff --git a/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.ts b/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.ts index 0f3035c14a923..d36d5428be4d9 100644 --- a/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.ts +++ b/src/core/server/metrics/event_loop_delays/event_loop_delays_monitor.ts @@ -10,6 +10,18 @@ import type { IntervalHistogram as PerfIntervalHistogram } from 'perf_hooks'; import { monitorEventLoopDelay } from 'perf_hooks'; import type { IntervalHistogram } from '../types'; +/** + * Nanosecond to milisecond conversion unit + */ +export const ONE_MILLISECOND_AS_NANOSECONDS = 1_000_000; + +/** + * Converts time metric from ns to ms + **/ +export function nsToMs(metric: number) { + return metric / ONE_MILLISECOND_AS_NANOSECONDS; +} + export class EventLoopDelaysMonitor { private readonly loopMonitor: PerfIntervalHistogram; private fromTimestamp: Date; @@ -28,26 +40,36 @@ export class EventLoopDelaysMonitor { * Collect gathers event loop delays metrics from nodejs perf_hooks.monitorEventLoopDelay * the histogram calculations start from the last time `reset` was called or this * EventLoopDelaysMonitor instance was created. + * + * Returns metrics in milliseconds. + * @returns {IntervalHistogram} */ + public collect(): IntervalHistogram { const lastUpdated = new Date(); this.loopMonitor.disable(); - const { min, max, mean, exceeds, stddev } = this.loopMonitor; + const { + min: minNs, + max: maxNs, + mean: meanNs, + exceeds: exceedsNs, + stddev: stddevNs, + } = this.loopMonitor; const collectedData: IntervalHistogram = { - min, - max, - mean, - exceeds, - stddev, + min: nsToMs(minNs), + max: nsToMs(maxNs), + mean: nsToMs(meanNs), + exceeds: nsToMs(exceedsNs), + stddev: nsToMs(stddevNs), fromTimestamp: this.fromTimestamp.toISOString(), lastUpdatedAt: lastUpdated.toISOString(), percentiles: { - 50: this.loopMonitor.percentile(50), - 75: this.loopMonitor.percentile(75), - 95: this.loopMonitor.percentile(95), - 99: this.loopMonitor.percentile(99), + 50: nsToMs(this.loopMonitor.percentile(50)), + 75: nsToMs(this.loopMonitor.percentile(75)), + 95: nsToMs(this.loopMonitor.percentile(95)), + 99: nsToMs(this.loopMonitor.percentile(99)), }, }; diff --git a/src/core/server/metrics/types.ts b/src/core/server/metrics/types.ts index 550a60d0d295a..20594f5122f08 100644 --- a/src/core/server/metrics/types.ts +++ b/src/core/server/metrics/types.ts @@ -70,7 +70,7 @@ export interface OpsMetrics { /** * an IntervalHistogram object that samples and reports the event loop delay over time. - * The delays will be reported in nanoseconds. + * The delays will be reported in milliseconds. * * @public */ diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts index 4661441a15a6b..c6e7604723a7a 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/constants.ts @@ -32,7 +32,7 @@ export const MONITOR_EVENT_LOOP_DELAYS_RESET = 24 * 60 * 60 * 1000; export const MONITOR_EVENT_LOOP_DELAYS_START = 1 * 60 * 1000; /** - * Mean event loop delay threshold for logging a warning. + * Mean event loop delay threshold in ms for logging a warning. */ export const MONITOR_EVENT_LOOP_WARN_THRESHOLD = 350; @@ -45,8 +45,3 @@ export const MONITOR_EVENT_LOOP_THRESHOLD_START = 1 * 60 * 1000; * Check the event loop utilization every 30 seconds */ export const MONITOR_EVENT_LOOP_THRESHOLD_INTERVAL = 30 * 1000; - -/** - * Nanosecond to milisecond conversion unit - */ -export const ONE_MILLISECOND_AS_NANOSECONDS = 1000000; diff --git a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/schema.ts b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/schema.ts index 757e96e5602f0..7ac0bd408c49b 100644 --- a/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/schema.ts +++ b/src/plugins/kibana_usage_collection/server/collectors/event_loop_delays/schema.ts @@ -59,19 +59,19 @@ export const eventLoopDelaysUsageSchema: MakeSchemaFrom eventLoopDelaysMonitor.stop()) ) .subscribe(async () => { - const { mean } = eventLoopDelaysMonitor.collect(); - const meanDurationMs = moment - .duration(mean / ONE_MILLISECOND_AS_NANOSECONDS) - .asMilliseconds(); + const { mean: meanMS } = eventLoopDelaysMonitor.collect(); - if (meanDurationMs > warnThreshold) { + if (meanMS > warnThreshold) { logger.warn( - `Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanDurationMs}ms. ` + + `Average event loop delay threshold exceeded ${warnThreshold}ms. Received ${meanMS}ms. ` + `See https://ela.st/kibana-scaling-considerations for more information about scaling Kibana.` ); diff --git a/src/plugins/telemetry/schema/oss_plugins.json b/src/plugins/telemetry/schema/oss_plugins.json index e0991d5e984b7..54032badcd8e0 100644 --- a/src/plugins/telemetry/schema/oss_plugins.json +++ b/src/plugins/telemetry/schema/oss_plugins.json @@ -7075,19 +7075,19 @@ "min": { "type": "long", "_meta": { - "description": "The minimum recorded event loop delay." + "description": "The minimum recorded event loop delay in ms." } }, "max": { "type": "long", "_meta": { - "description": "The maximum recorded event loop delay." + "description": "The maximum recorded event loop delay in ms." } }, "mean": { "type": "long", "_meta": { - "description": "The mean of the recorded event loop delays." + "description": "The mean of the recorded event loop delays in ms." } }, "exceeds": { @@ -7099,7 +7099,7 @@ "stddev": { "type": "long", "_meta": { - "description": "The standard deviation of the recorded event loop delays." + "description": "The standard deviation of the recorded event loop delays in ms." } }, "percentiles": { @@ -7107,25 +7107,25 @@ "50": { "type": "long", "_meta": { - "description": "The 50th accumulated percentile distribution" + "description": "The 50th accumulated percentile distribution in ms" } }, "75": { "type": "long", "_meta": { - "description": "The 75th accumulated percentile distribution" + "description": "The 75th accumulated percentile distribution in ms" } }, "95": { "type": "long", "_meta": { - "description": "The 95th accumulated percentile distribution" + "description": "The 95th accumulated percentile distribution in ms" } }, "99": { "type": "long", "_meta": { - "description": "The 99th accumulated percentile distribution" + "description": "The 99th accumulated percentile distribution in ms" } } } @@ -9308,4 +9308,4 @@ } } } -} \ No newline at end of file +} diff --git a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json index f4158fe8fbf1e..639782bdc9766 100644 --- a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json +++ b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json @@ -154,29 +154,29 @@ } } }, - "throttle_time_number_s": { + "schedule_time": { "properties": { "min": { - "type": "long" + "type": "keyword" }, "avg": { - "type": "float" + "type": "keyword" }, "max": { - "type": "long" + "type": "keyword" } } }, - "schedule_time": { + "throttle_time_number_s": { "properties": { "min": { - "type": "keyword" + "type": "long" }, "avg": { - "type": "keyword" + "type": "float" }, "max": { - "type": "keyword" + "type": "long" } } },