From 5f850dbf74a6fa0601a57a368373767b3acd7006 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 18 Sep 2020 11:21:39 +0100 Subject: [PATCH 01/67] moved TM integration tests to TS --- .../task_manager/{index.js => index.ts} | 6 +- ...ager_integration.js => task_management.ts} | 67 +++++++++++-------- 2 files changed, 44 insertions(+), 29 deletions(-) rename x-pack/test/plugin_api_integration/test_suites/task_manager/{index.js => index.ts} (64%) rename x-pack/test/plugin_api_integration/test_suites/task_manager/{task_manager_integration.js => task_management.ts} (91%) diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/index.js b/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts similarity index 64% rename from x-pack/test/plugin_api_integration/test_suites/task_manager/index.js rename to x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts index 93350ad4d2c8..c6d817119d41 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/index.js +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts @@ -4,9 +4,11 @@ * you may not use this file except in compliance with the Elastic License. */ -export default function ({ loadTestFile }) { +import { FtrProviderContext } from '../../ftr_provider_context'; + +export default function ({ loadTestFile }: FtrProviderContext) { describe('task_manager', function taskManagerSuite() { this.tags('ciGroup2'); - loadTestFile(require.resolve('./task_manager_integration')); + loadTestFile(require.resolve('./task_management')); }); } diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/task_manager_integration.js b/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts similarity index 91% rename from x-pack/test/plugin_api_integration/test_suites/task_manager/task_manager_integration.js rename to x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts index c87a5039360b..fcf2d5b23512 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/task_manager_integration.js +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts @@ -8,19 +8,33 @@ import _ from 'lodash'; import expect from '@kbn/expect'; import url from 'url'; import supertestAsPromised from 'supertest-as-promised'; +import { FtrProviderContext } from '../../ftr_provider_context'; +import TaskManagerMapping from '../../../../plugins/task_manager/server/saved_objects/mappings.json'; +import { + DEFAULT_MAX_WORKERS, + DEFAULT_POLL_INTERVAL, +} from '../../../../plugins/task_manager/server/config'; +import { ConcreteTaskInstance } from '../../../../plugins/task_manager/server'; +import { SavedObjectsRawDoc } from '../../../../../src/core/server'; const { task: { properties: taskManagerIndexMapping }, -} = require('../../../../plugins/task_manager/server/saved_objects/mappings.json'); +} = TaskManagerMapping; -const { - DEFAULT_MAX_WORKERS, - DEFAULT_POLL_INTERVAL, -} = require('../../../../plugins/task_manager/server/config.ts'); +const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); -const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); +export interface RawDoc { + _id: string; + _source: any; + _type?: string; +} +export interface SearchResults { + hits: { + hits: RawDoc[]; + }; +} -export default function ({ getService }) { +export default function ({ getService }: FtrProviderContext) { const es = getService('legacyEs'); const log = getService('log'); const retry = getService('retry'); @@ -53,14 +67,16 @@ export default function ({ getService }) { } }); - function currentTasks() { + function currentTasks(): Promise<{ + docs: ConcreteTaskInstance[]; + }> { return supertest .get('/api/sample_tasks') .expect(200) .then((response) => response.body); } - function currentTask(task) { + function currentTask(task: string): ConcreteTaskInstance { return supertest .get(`/api/sample_tasks/task/${task}`) .send({ task }) @@ -68,30 +84,26 @@ export default function ({ getService }) { .then((response) => response.body); } - function ensureTasksIndexRefreshed() { - return supertest - .get(`/api/ensure_tasks_index_refreshed`) - .send({}) - .expect(200) - .then((response) => response.body); + function ensureTasksIndexRefreshed(): Promise { + return supertest.get(`/api/ensure_tasks_index_refreshed`).send({}).expect(200); } - function historyDocs(taskId) { + function historyDocs(taskId: string) { return es .search({ index: testHistoryIndex, q: taskId ? `taskId:${taskId}` : 'type:task', }) - .then((result) => result.hits.hits); + .then((result: SearchResults) => result.hits.hits); } - function scheduleTask(task) { + function scheduleTask(task: string): ConcreteTaskInstance { return supertest .post('/api/sample_tasks/schedule') .set('kbn-xsrf', 'xxx') .send({ task }) .expect(200) - .then((response) => response.body); + .then((response: { body: ConcreteTaskInstance }) => response.body); } function runTaskNow(task) { @@ -109,7 +121,7 @@ export default function ({ getService }) { .set('kbn-xsrf', 'xxx') .send({ task }) .expect(200) - .then((response) => response.body); + .then((response: { body: ConcreteTaskInstance }) => response.body); } function releaseTasksWaitingForEventToComplete(event) { @@ -120,11 +132,14 @@ export default function ({ getService }) { .expect(200); } - function getTaskById(tasks, id) { + function getTaskById(tasks: ConcreteTaskInstance[], id: string) { return tasks.filter((task) => task.id === id)[0]; } - async function provideParamsToTasksWaitingForParams(taskId, data = {}) { + async function provideParamsToTasksWaitingForParams( + taskId: string, + data: Record = {} + ) { // wait for task to start running and stall on waitForParams await retry.try(async () => { const tasks = (await currentTasks()).docs; @@ -564,12 +579,10 @@ export default function ({ getService }) { expect(await runNowResultWithExpectedFailure).to.eql({ id: taskThatFailsBeforeRunNow.id }); }); - async function expectReschedule(originalRunAt, currentTask, expectedDiff) { + async function expectReschedule(originalRunAt: number, task: Task, expectedDiff: number) { const buffer = 10000; - expect(Date.parse(currentTask.runAt) - originalRunAt).to.be.greaterThan( - expectedDiff - buffer - ); - expect(Date.parse(currentTask.runAt) - originalRunAt).to.be.lessThan(expectedDiff + buffer); + expect(Date.parse(task.runAt) - originalRunAt).to.be.greaterThan(expectedDiff - buffer); + expect(Date.parse(task.runAt) - originalRunAt).to.be.lessThan(expectedDiff + buffer); } it('should run tasks in parallel, allowing for long running tasks along side faster tasks', async () => { From c0a7038f94f8c3a6ca74ded9c8c84561d50ed51a Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 22 Sep 2020 12:36:07 +0100 Subject: [PATCH 02/67] introduce health endpoint in TM --- .../server/action_type_registry.test.ts | 4 +- .../actions/server/actions_client.test.ts | 4 +- .../server/builtin_action_types/index.test.ts | 4 +- .../server/create_execute_function.test.ts | 2 +- .../usage/actions_usage_collector.test.ts | 4 +- .../alerts/server/alert_type_registry.test.ts | 4 +- .../alerts/server/alerts_client.test.ts | 4 +- .../server/alerts_client_factory.test.ts | 4 +- .../usage/alerts_usage_collector.test.ts | 4 +- .../task_manager/server/config.test.ts | 1 + x-pack/plugins/task_manager/server/config.ts | 9 + .../task_manager/server/monitoring/index.ts | 18 ++ .../runtime_statistics_aggregator.ts | 14 ++ .../monitoring/workload_statistics.test.ts | 118 +++++++++++ .../server/monitoring/workload_statistics.ts | 65 ++++++ x-pack/plugins/task_manager/server/plugin.ts | 16 +- .../server/queries/aggregation_clauses.ts | 84 ++++++++ .../server/routes/_mock_handler_arguments.ts | 33 +++ .../task_manager/server/routes/health.test.ts | 188 ++++++++++++++++++ .../task_manager/server/routes/health.ts | 108 ++++++++++ .../task_manager/server/routes/index.ts | 7 + .../task_manager/server/task_manager.mock.ts | 39 ++-- .../task_manager/server/task_manager.ts | 15 ++ .../task_manager/server/task_store.mock.ts | 1 + .../plugins/task_manager/server/task_store.ts | 38 ++++ x-pack/test/plugin_api_integration/config.ts | 1 + .../test_suites/task_manager/health_route.ts | 86 ++++++++ .../test_suites/task_manager/index.ts | 1 + .../task_manager/task_management.ts | 84 +++++--- 29 files changed, 891 insertions(+), 69 deletions(-) create mode 100644 x-pack/plugins/task_manager/server/monitoring/index.ts create mode 100644 x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts create mode 100644 x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts create mode 100644 x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts create mode 100644 x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts create mode 100644 x-pack/plugins/task_manager/server/routes/_mock_handler_arguments.ts create mode 100644 x-pack/plugins/task_manager/server/routes/health.test.ts create mode 100644 x-pack/plugins/task_manager/server/routes/health.ts create mode 100644 x-pack/plugins/task_manager/server/routes/index.ts create mode 100644 x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts diff --git a/x-pack/plugins/actions/server/action_type_registry.test.ts b/x-pack/plugins/actions/server/action_type_registry.test.ts index b25e33400df5..52cf6b6473f7 100644 --- a/x-pack/plugins/actions/server/action_type_registry.test.ts +++ b/x-pack/plugins/actions/server/action_type_registry.test.ts @@ -4,7 +4,7 @@ * you may not use this file except in compliance with the Elastic License. */ -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { ActionTypeRegistry, ActionTypeRegistryOpts } from './action_type_registry'; import { ActionType, ExecutorType } from './types'; import { ActionExecutor, ExecutorError, ILicenseState, TaskRunnerFactory } from './lib'; @@ -12,7 +12,7 @@ import { actionsConfigMock } from './actions_config.mock'; import { licenseStateMock } from './lib/license_state.mock'; import { ActionsConfigurationUtilities } from './actions_config'; -const mockTaskManager = taskManagerMock.setup(); +const mockTaskManager = taskManagerMock.createSetup(); let mockedLicenseState: jest.Mocked; let mockedActionsConfig: jest.Mocked; let actionTypeRegistryParams: ActionTypeRegistryOpts; diff --git a/x-pack/plugins/actions/server/actions_client.test.ts b/x-pack/plugins/actions/server/actions_client.test.ts index adef12454f2d..7ea215c0a592 100644 --- a/x-pack/plugins/actions/server/actions_client.test.ts +++ b/x-pack/plugins/actions/server/actions_client.test.ts @@ -10,7 +10,7 @@ import { ActionTypeRegistry, ActionTypeRegistryOpts } from './action_type_regist import { ActionsClient } from './actions_client'; import { ExecutorType } from './types'; import { ActionExecutor, TaskRunnerFactory, ILicenseState } from './lib'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { actionsConfigMock } from './actions_config.mock'; import { getActionsConfigurationUtilities } from './actions_config'; import { licenseStateMock } from './lib/license_state.mock'; @@ -33,7 +33,7 @@ const authorization = actionsAuthorizationMock.create(); const executionEnqueuer = jest.fn(); const request = {} as KibanaRequest; -const mockTaskManager = taskManagerMock.setup(); +const mockTaskManager = taskManagerMock.createSetup(); let actionsClient: ActionsClient; let mockedLicenseState: jest.Mocked; diff --git a/x-pack/plugins/actions/server/builtin_action_types/index.test.ts b/x-pack/plugins/actions/server/builtin_action_types/index.test.ts index acab6dd41b4b..1088a4e0d906 100644 --- a/x-pack/plugins/actions/server/builtin_action_types/index.test.ts +++ b/x-pack/plugins/actions/server/builtin_action_types/index.test.ts @@ -6,7 +6,7 @@ import { ActionExecutor, TaskRunnerFactory } from '../lib'; import { ActionTypeRegistry } from '../action_type_registry'; -import { taskManagerMock } from '../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../task_manager/server/mocks'; import { registerBuiltInActionTypes } from './index'; import { Logger } from '../../../../../src/core/server'; import { loggingSystemMock } from '../../../../../src/core/server/mocks'; @@ -21,7 +21,7 @@ export function createActionTypeRegistry(): { } { const logger = loggingSystemMock.create().get() as jest.Mocked; const actionTypeRegistry = new ActionTypeRegistry({ - taskManager: taskManagerMock.setup(), + taskManager: taskManagerMock.createSetup(), taskRunnerFactory: new TaskRunnerFactory( new ActionExecutor({ isESOUsingEphemeralEncryptionKey: false }) ), diff --git a/x-pack/plugins/actions/server/create_execute_function.test.ts b/x-pack/plugins/actions/server/create_execute_function.test.ts index 7682f01ed769..cfbc68879ae0 100644 --- a/x-pack/plugins/actions/server/create_execute_function.test.ts +++ b/x-pack/plugins/actions/server/create_execute_function.test.ts @@ -6,7 +6,7 @@ import { KibanaRequest } from 'src/core/server'; import uuid from 'uuid'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { createExecutionEnqueuerFunction } from './create_execute_function'; import { savedObjectsClientMock } from '../../../../src/core/server/mocks'; import { actionTypeRegistryMock } from './action_type_registry.mock'; diff --git a/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts b/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts index 2e2944aab425..0e6c2ff37eb0 100644 --- a/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts +++ b/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts @@ -6,9 +6,9 @@ import { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; import { registerActionsUsageCollector } from './actions_usage_collector'; -import { taskManagerMock } from '../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../task_manager/server/mocks'; -const mockTaskManagerStart = taskManagerMock.start(); +const mockTaskManagerStart = taskManagerMock.createStart(); beforeEach(() => jest.resetAllMocks()); diff --git a/x-pack/plugins/alerts/server/alert_type_registry.test.ts b/x-pack/plugins/alerts/server/alert_type_registry.test.ts index 048cc3d5a444..020b4f55619b 100644 --- a/x-pack/plugins/alerts/server/alert_type_registry.test.ts +++ b/x-pack/plugins/alerts/server/alert_type_registry.test.ts @@ -7,9 +7,9 @@ import { TaskRunnerFactory } from './task_runner'; import { AlertTypeRegistry } from './alert_type_registry'; import { AlertType } from './types'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; -const taskManager = taskManagerMock.setup(); +const taskManager = taskManagerMock.createSetup(); const alertTypeRegistryParams = { taskManager, taskRunnerFactory: new TaskRunnerFactory(), diff --git a/x-pack/plugins/alerts/server/alerts_client.test.ts b/x-pack/plugins/alerts/server/alerts_client.test.ts index a6cffb028481..250adb83dad4 100644 --- a/x-pack/plugins/alerts/server/alerts_client.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client.test.ts @@ -9,7 +9,7 @@ import { AlertsClient, CreateOptions, ConstructorOptions } from './alerts_client import { savedObjectsClientMock, loggingSystemMock } from '../../../../src/core/server/mocks'; import { nodeTypes } from '../../../../src/plugins/data/common'; import { esKuery } from '../../../../src/plugins/data/server'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { alertTypeRegistryMock } from './alert_type_registry.mock'; import { alertsAuthorizationMock } from './authorization/alerts_authorization.mock'; import { TaskStatus } from '../../task_manager/server'; @@ -24,7 +24,7 @@ import { QueryEventsBySavedObjectResult } from '../../event_log/server'; import { SavedObject } from 'kibana/server'; import { EventsFactory } from './lib/alert_instance_summary_from_event_log.test'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); const eventLogClient = eventLogClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client_factory.test.ts b/x-pack/plugins/alerts/server/alerts_client_factory.test.ts index ac91d689798c..770658fdde10 100644 --- a/x-pack/plugins/alerts/server/alerts_client_factory.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client_factory.test.ts @@ -7,7 +7,7 @@ import { Request } from 'hapi'; import { AlertsClientFactory, AlertsClientFactoryOpts } from './alerts_client_factory'; import { alertTypeRegistryMock } from './alert_type_registry.mock'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { KibanaRequest } from '../../../../src/core/server'; import { savedObjectsClientMock, @@ -35,7 +35,7 @@ const features = featuresPluginMock.createStart(); const securityPluginSetup = securityMock.createSetup(); const alertsClientFactoryParams: jest.Mocked = { logger: loggingSystemMock.create().get(), - taskManager: taskManagerMock.start(), + taskManager: taskManagerMock.createStart(), alertTypeRegistry: alertTypeRegistryMock.create(), getSpaceId: jest.fn(), getSpace: jest.fn(), diff --git a/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts b/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts index b48d173ba36d..a5f83bc393d4 100644 --- a/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts +++ b/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts @@ -6,8 +6,8 @@ import { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; import { registerAlertsUsageCollector } from './alerts_usage_collector'; -import { taskManagerMock } from '../../../task_manager/server/task_manager.mock'; -const taskManagerStart = taskManagerMock.start(); +import { taskManagerMock } from '../../../task_manager/server/mocks'; +const taskManagerStart = taskManagerMock.createStart(); beforeEach(() => jest.resetAllMocks()); diff --git a/x-pack/plugins/task_manager/server/config.test.ts b/x-pack/plugins/task_manager/server/config.test.ts index d5bbbe65582f..2eb132185ff7 100644 --- a/x-pack/plugins/task_manager/server/config.test.ts +++ b/x-pack/plugins/task_manager/server/config.test.ts @@ -15,6 +15,7 @@ describe('config validation', () => { "max_attempts": 3, "max_poll_inactivity_cycles": 10, "max_workers": 10, + "monitored_aggregated_stats_refresh_rate": 60000, "poll_interval": 3000, "request_capacity": 1000, } diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index aa78cf3baa96..1b79c17220f4 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -10,6 +10,9 @@ export const DEFAULT_MAX_WORKERS = 10; export const DEFAULT_POLL_INTERVAL = 3000; export const DEFAULT_MAX_POLL_INACTIVITY_CYCLES = 10; +// Refresh "pull based" monitored stats at a default rate of once a minute +export const DEFAULT_MONITORING_REFRESH_RATE = 60 * 1000; + export const configSchema = schema.object({ enabled: schema.boolean({ defaultValue: true }), /* The maximum number of times a task will be attempted before being abandoned as failed */ @@ -48,6 +51,12 @@ export const configSchema = schema.object({ // disable the task manager rather than trying to specify it with 0 workers min: 1, }), + /* The rate at which we refresh monitored stats that require aggregation queries against ES. */ + monitored_aggregated_stats_refresh_rate: schema.number({ + defaultValue: DEFAULT_MONITORING_REFRESH_RATE, + /* don't run monitored stat aggregations any faster than once every 5 seconds */ + min: 5000, + }), }); export type TaskManagerConfig = TypeOf; diff --git a/x-pack/plugins/task_manager/server/monitoring/index.ts b/x-pack/plugins/task_manager/server/monitoring/index.ts new file mode 100644 index 000000000000..cf75294be126 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/index.ts @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +import { TaskManager } from '../task_manager'; +import { AggregatedStatProvider } from './runtime_statistics_aggregator'; +import { createWorkloadAggregator } from './workload_statistics'; +import { TaskManagerConfig } from '../config'; + +export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; + +export function createAggregatedStatsStream( + taskManager: TaskManager, + config: TaskManagerConfig +): AggregatedStatProvider { + return createWorkloadAggregator(taskManager, config.monitored_aggregated_stats_refresh_rate); +} diff --git a/x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts b/x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts new file mode 100644 index 000000000000..f895bf2b02e6 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts @@ -0,0 +1,14 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +import { Observable } from 'rxjs'; +import { JsonObject, JsonValue } from 'src/plugins/kibana_utils/common'; + +export interface AggregatedStat { + key: string; + value: JsonObject | JsonValue; +} + +export type AggregatedStatProvider = Observable; diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts new file mode 100644 index 000000000000..32e8c2111239 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -0,0 +1,118 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { createWorkloadAggregator } from './workload_statistics'; +import { taskManagerMock } from '../task_manager.mock'; +import { first } from 'rxjs/operators'; +import { AggregationResult } from '../queries/aggregation_clauses'; + +describe('Workload Statistics Aggregator', () => { + test('queries the Task Store at a fixed interval for the current workload', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate.mockResolvedValue(({ + task: { + doc_count: 0, + taskType: { + buckets: [], + }, + }, + } as unknown) as AggregationResult); + + const workloadAggregator = createWorkloadAggregator(taskManager, 10); + + return new Promise((resolve) => { + workloadAggregator.pipe(first()).subscribe(() => { + expect(taskManager.aggregate).toHaveBeenCalledWith({ + aggs: { + taskType: { + terms: { field: 'task.taskType' }, + aggs: { + status: { + terms: { field: 'task.status' }, + }, + }, + }, + }, + }); + resolve(); + }); + }); + }); + + test('returns a summary of the workload by task type', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate.mockResolvedValue(({ + task: { + doc_count: 4, + taskType: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'actions_telemetry', + doc_count: 2, + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'idle', + doc_count: 2, + }, + ], + }, + }, + { + key: 'alerting_telemetry', + doc_count: 1, + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'idle', + doc_count: 1, + }, + ], + }, + }, + { + key: 'session_cleanup', + doc_count: 1, + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'idle', + doc_count: 1, + }, + ], + }, + }, + ], + }, + }, + } as unknown) as AggregationResult); + + const workloadAggregator = createWorkloadAggregator(taskManager, 10); + + return new Promise((resolve) => { + workloadAggregator.pipe(first()).subscribe((result) => { + expect(result.key).toEqual('workload'); + expect(result.value).toMatchObject({ + sum: 4, + types: { + actions_telemetry: { sum: 2, status: { idle: 2 } }, + alerting_telemetry: { sum: 1, status: { idle: 1 } }, + session_cleanup: { sum: 1, status: { idle: 1 } }, + }, + }); + resolve(); + }); + }); + }); +}); diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts new file mode 100644 index 000000000000..8e73d88bea25 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { interval } from 'rxjs'; +import { concatMap, map } from 'rxjs/operators'; +import { JsonObject } from 'src/plugins/kibana_utils/common'; +import { keyBy, mapValues } from 'lodash'; +import { AggregatedStatProvider } from './runtime_statistics_aggregator'; +import { TaskManager } from '../task_manager'; +import { + AggregationResult, + AggregationBucketWithSubAgg, + AggregationBucket, +} from '../queries/aggregation_clauses'; + +export function createWorkloadAggregator( + taskManager: TaskManager, + refreshInterval: number +): AggregatedStatProvider { + return interval(refreshInterval).pipe( + concatMap(() => + taskManager.aggregate({ + aggs: { + taskType: { + terms: { field: 'task.taskType' }, + aggs: { + status: { + terms: { field: 'task.status' }, + }, + }, + }, + }, + }) + ), + map( + ({ + task: { + doc_count: sum, + taskType: { buckets: types }, + }, + }: AggregationResult<'task' | 'taskType' | 'status'>) => { + const summary: JsonObject = { + sum, + types: mapValues( + keyBy>( + types as Array>, + 'key' + ), + ({ doc_count: docCount, status }) => ({ + sum: docCount, + status: mapValues(keyBy(status.buckets, 'key'), 'doc_count'), + }) + ), + }; + return { + key: 'workload', + value: summary, + }; + } + ) + ); +} diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index d7dcf779376b..715d8cf1b4d0 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -11,6 +11,8 @@ import { TaskManager } from './task_manager'; import { TaskManagerConfig } from './config'; import { Middleware } from './lib/middleware'; import { setupSavedObjects } from './saved_objects'; +import { healthRoute } from './routes'; +import { createAggregatedStatsStream } from './monitoring'; export type TaskManagerSetupContract = Pick< TaskManager, @@ -36,14 +38,24 @@ export class TaskManagerPlugin } public async setup(core: CoreSetup): Promise { - this.config = await this.initContext.config + const config = (this.config = await this.initContext.config .create() .pipe(first()) - .toPromise(); + .toPromise()); setupSavedObjects(core.savedObjects, this.config); this.taskManagerId = this.initContext.env.instanceUuid; + // Routes + const router = core.http.createRouter(); + healthRoute( + router, + config, + this.taskManager.then((tm) => createAggregatedStatsStream(tm, config)), + // if health is any more stale than the pollInterval (+1s buffer) consider the system unhealthy + config.poll_interval + 1000 + ); + return { addMiddleware: (middleware: Middleware) => { this.taskManager.then((tm) => tm.addMiddleware(middleware)); diff --git a/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts b/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts new file mode 100644 index 000000000000..84cd9d6ae2b5 --- /dev/null +++ b/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { TermFilter } from './query_clauses'; + +/** + * Terminology + * =========== + * The terms for the different clauses in an Elasticsearch query aggregation can be confusing, here are some + * clarifications that might help you understand the Typescript types we use here. + * + * Given the following Aggregation: + * { + * "size": 0, + * "aggs": { (1) + * "task": { + * "filter": { + * "term": { + * "type": "task" + * } + * }, + * "aggs": { (1) + * "taskType": { (2) + * "terms": { "field": "task.taskType" }, + * "aggs": { + * "status": { (2) + * "terms": { "field": "task.status" } + * } + * } + * } + * } + * } + * } + * } + * + * These are referred to as: + * (1). AggregationQuery + * (2). TermAggregation + * + */ + +export interface AggregationQuery { + [aggregationName: string]: (TermAggregation | { aggs: AggregationQuery }) & { + filter?: TermFilter; + }; +} + +interface TermAggregation { + terms: { + field: string; + }; +} + +/** + * Results of an Aggregation + */ +type ReservedNames = 'doc_count'; +type AggregationNames = Exclude; +export type Aggregation = { + doc_count: number; +} & { + [innerAggregation in Name]: AggregationBuckets; +}; + +export interface AggregationBucket { + key: string; + doc_count: number; +} + +export type AggregationBucketWithSubAgg = AggregationBucket & + { + [innerAggregation in Name]: AggregationBuckets; + }; + +export interface AggregationBuckets { + buckets: AggregationBucket[] | Array>; +} + +export type AggregationResult = { + [aggregationName in Name]: Aggregation; +}; diff --git a/x-pack/plugins/task_manager/server/routes/_mock_handler_arguments.ts b/x-pack/plugins/task_manager/server/routes/_mock_handler_arguments.ts new file mode 100644 index 000000000000..c9f4de25afaf --- /dev/null +++ b/x-pack/plugins/task_manager/server/routes/_mock_handler_arguments.ts @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { RequestHandlerContext, KibanaRequest, KibanaResponseFactory } from 'kibana/server'; +import { identity } from 'lodash'; +import { httpServerMock } from '../../../../../src/core/server/mocks'; + +export function mockHandlerArguments( + {}: {}, + req: unknown, + res?: Array> +): [RequestHandlerContext, KibanaRequest, KibanaResponseFactory] { + return [ + ({} as unknown) as RequestHandlerContext, + req as KibanaRequest, + mockResponseFactory(res), + ]; +} + +export const mockResponseFactory = (resToMock: Array> = []) => { + const factory: jest.Mocked = httpServerMock.createResponseFactory(); + resToMock.forEach((key: string) => { + if (key in factory) { + Object.defineProperty(factory, key, { + value: jest.fn(identity), + }); + } + }); + return (factory as unknown) as KibanaResponseFactory; +}; diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts new file mode 100644 index 000000000000..4fc7b9d6b352 --- /dev/null +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -0,0 +1,188 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { healthRoute } from './health'; +import { httpServiceMock } from 'src/core/server/mocks'; +import { mockHandlerArguments } from './_mock_handler_arguments'; +import { TaskManagerConfig } from '../config'; +import { of, Subject } from 'rxjs'; +import { get } from 'lodash'; +import { sleep } from '../test_utils'; +import { AggregatedStat } from '../monitoring'; + +beforeEach(() => { + jest.resetAllMocks(); +}); + +const configuration: TaskManagerConfig = { + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, +}; + +describe('healthRoute', () => { + it('registers the route', async () => { + const router = httpServiceMock.createRouter(); + + healthRoute(router, configuration, Promise.resolve(of()), 1000); + + const [config] = router.get.mock.calls[0]; + + expect(config.path).toMatchInlineSnapshot(`"/api/task_manager/_health"`); + }); + + it('returns the initial config used to configure Task Manager', async () => { + const router = httpServiceMock.createRouter(); + + healthRoute(router, configuration, Promise.resolve(of()), 1000); + + const [, handler] = router.get.mock.calls[0]; + + const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); + + expect(get(await handler(context, req, res), 'body.stats')).toMatchObject({ + configuration: { + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + }, + }, + }); + }); + + it('returns an error response if the stats are no longer fresh', async () => { + const router = httpServiceMock.createRouter(); + + healthRoute(router, configuration, Promise.resolve(of()), 1000); + + const [, handler] = router.get.mock.calls[0]; + + const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); + + await sleep(2000); + + expect(await handler(context, req, res)).toMatchObject({ + body: { + attributes: { + lastUpdate: expect.any(String), + stats: { + configuration: { + timestamp: expect.any(String), + value: { + max_poll_inactivity_cycles: 10, + max_workers: 10, + poll_interval: 6000000, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + }, + }, + }, + }, + message: new Error('Task Manager monitored stats are out of date'), + }, + }); + }); + + it('incrementally updates the stats returned by the endpoint', async () => { + const router = httpServiceMock.createRouter(); + + const aggregatedStats = Promise.resolve(new Subject()); + + healthRoute(router, configuration, Promise.resolve(aggregatedStats), 1000); + + const [, handler] = router.get.mock.calls[0]; + + const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); + + return aggregatedStats.then(async (aggregatedStats$) => { + aggregatedStats$.next({ + key: 'newAggregatedStat', + value: { + some: { + complex: { + value: 123, + }, + }, + }, + }); + + expect(await handler(context, req, res)).toMatchObject({ + body: { + lastUpdate: expect.any(String), + stats: { + newAggregatedStat: { + timestamp: expect.any(String), + value: { + some: { + complex: { + value: 123, + }, + }, + }, + }, + configuration: { + timestamp: expect.any(String), + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + }, + }, + }, + }, + }); + + aggregatedStats$.next({ + key: 'newAggregatedStat', + value: { + some: { + updated: { + value: 456, + }, + }, + }, + }); + + expect(await handler(context, req, res)).toMatchObject({ + body: { + lastUpdate: expect.any(String), + stats: { + newAggregatedStat: { + timestamp: expect.any(String), + value: { + some: { + updated: { + value: 456, + }, + }, + }, + }, + configuration: { + timestamp: expect.any(String), + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + }, + }, + }, + }, + }); + }); + }); +}); diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts new file mode 100644 index 000000000000..cf73c9314391 --- /dev/null +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -0,0 +1,108 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { + IRouter, + RequestHandlerContext, + KibanaRequest, + IKibanaResponse, + KibanaResponseFactory, +} from 'kibana/server'; +import { pick } from 'lodash'; +import { set } from '@elastic/safer-lodash-set'; +import { JsonObject } from 'src/plugins/kibana_utils/common'; +import { map } from 'rxjs/operators'; +import { TaskManagerConfig } from '../config'; +import { AggregatedStatProvider } from '../monitoring'; + +const CONFIG_FIELDS_TO_EXPOSE = [ + 'max_workers', + 'poll_interval', + 'request_capacity', + 'max_poll_inactivity_cycles', + 'monitored_aggregated_stats_refresh_rate', +]; + +interface MonitoredStat { + timestamp: string; + value: JsonObject; +} + +interface MonitoringStats { + lastUpdate: string; + stats: Record; +} + +export function healthRoute( + router: IRouter, + initialConfig: TaskManagerConfig, + aggregatedStats: Promise, + requiredFreshness: number +) { + const initialisationTimestamp = new Date().toISOString(); + const monitoringStats: MonitoringStats = { + lastUpdate: initialisationTimestamp, + stats: { + configuration: { + timestamp: initialisationTimestamp, + value: pick<{ + max_workers: number; + poll_interval: number; + request_capacity: number; + max_poll_inactivity_cycles: number; + monitored_aggregated_stats_refresh_rate: number; + }>(initialConfig, ...CONFIG_FIELDS_TO_EXPOSE) as JsonObject, + }, + }, + }; + + aggregatedStats.then((aggregatedStats$) => { + aggregatedStats$ + .pipe( + map(({ key, value }) => { + return { + value: { timestamp: new Date().toISOString(), value }, + key, + }; + }) + ) + .subscribe(({ key, value }) => { + set(monitoringStats.stats, key, value); + monitoringStats.lastUpdate = new Date().toISOString(); + }); + }); + + router.get( + { + path: '/api/task_manager/_health', + validate: false, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest, + res: KibanaResponseFactory + ): Promise { + const lastUpdate = Date.parse(monitoringStats.lastUpdate); + + /** + * If the monitored stats aren't fresh, return an `500 internalError` with + * the stats in the body of the api call. This makes it easier for monitoring + * services to mark the service as broken + */ + if (Date.now() - lastUpdate > requiredFreshness) { + return res.internalError({ + body: { + message: new Error('Task Manager monitored stats are out of date'), + attributes: monitoringStats, + }, + }); + } + return res.ok({ + body: monitoringStats, + }); + } + ); +} diff --git a/x-pack/plugins/task_manager/server/routes/index.ts b/x-pack/plugins/task_manager/server/routes/index.ts new file mode 100644 index 000000000000..4fa1aa6cb7a9 --- /dev/null +++ b/x-pack/plugins/task_manager/server/routes/index.ts @@ -0,0 +1,7 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +export { healthRoute } from './health'; diff --git a/x-pack/plugins/task_manager/server/task_manager.mock.ts b/x-pack/plugins/task_manager/server/task_manager.mock.ts index 1fc626e7d58d..8afaa2251566 100644 --- a/x-pack/plugins/task_manager/server/task_manager.mock.ts +++ b/x-pack/plugins/task_manager/server/task_manager.mock.ts @@ -4,27 +4,24 @@ * you may not use this file except in compliance with the Elastic License. */ -import { TaskManagerSetupContract, TaskManagerStartContract } from './plugin'; +import { TaskManager } from './task_manager'; + +const createTaskManagerMock = () => { + return { + registerTaskDefinitions: jest.fn(), + addMiddleware: jest.fn(), + ensureScheduled: jest.fn(), + schedule: jest.fn(), + fetch: jest.fn(), + aggregate: jest.fn(), + get: jest.fn(), + runNow: jest.fn(), + remove: jest.fn(), + start: jest.fn(), + stop: jest.fn(), + } as jest.Mocked; +}; export const taskManagerMock = { - setup(overrides: Partial> = {}) { - const mocked: jest.Mocked = { - registerTaskDefinitions: jest.fn(), - addMiddleware: jest.fn(), - ...overrides, - }; - return mocked; - }, - start(overrides: Partial> = {}) { - const mocked: jest.Mocked = { - ensureScheduled: jest.fn(), - schedule: jest.fn(), - fetch: jest.fn(), - get: jest.fn(), - runNow: jest.fn(), - remove: jest.fn(), - ...overrides, - }; - return mocked; - }, + create: createTaskManagerMock, }; diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index fb2d5e07030a..7df3186ca8ec 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -60,10 +60,12 @@ import { OwnershipClaimingOpts, ClaimOwnershipResult, SearchOpts, + AggregationOpts, } from './task_store'; import { identifyEsError } from './lib/identify_es_error'; import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; import { BufferedTaskStore } from './buffered_task_store'; +import { AggregationResult } from './queries/aggregation_clauses'; const VERSION_CONFLICT_STATUS = 409; @@ -372,6 +374,19 @@ export class TaskManager { return this.store.fetch(opts); } + /** + * Fetches a list of scheduled tasks. + * + * @param opts - The query options used to filter tasks + * @returns {Promise} + */ + public async aggregate( + opts: AggregationOpts + ): Promise> { + await this.waitUntilStarted(); + return this.store.aggregate(opts); + } + /** * Get the current state of a specified task. * diff --git a/x-pack/plugins/task_manager/server/task_store.mock.ts b/x-pack/plugins/task_manager/server/task_store.mock.ts index 86db695bc5e2..a960b52cf659 100644 --- a/x-pack/plugins/task_manager/server/task_store.mock.ts +++ b/x-pack/plugins/task_manager/server/task_store.mock.ts @@ -22,6 +22,7 @@ export const taskStoreMock = { get: jest.fn(), getLifecycle: jest.fn(), fetch: jest.fn(), + aggregate: jest.fn(), maxAttempts, index, taskManagerId, diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index acd19bd75f7a..17523ee9efb6 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -60,6 +60,7 @@ import { SortByRunAtAndRetryAt, tasksClaimedByOwner, } from './queries/mark_available_tasks_as_claimed'; +import { AggregationQuery, AggregationResult } from './queries/aggregation_clauses'; export interface StoreOpts { callCluster: ElasticJs; @@ -79,6 +80,11 @@ export interface SearchOpts { search_after?: unknown[]; } +export interface AggregationOpts { + aggs: AggregationQuery; + size?: number; +} + export interface UpdateByQuerySearchOpts extends SearchOpts { script?: object; } @@ -458,6 +464,22 @@ export class TaskStore { }; } + public async aggregate({ + aggs, + size = 0, + }: AggregationOpts): Promise> { + const result = await this.callCluster('search', { + index: this.index, + ignoreUnavailable: true, + body: { + aggs: ensureAggregationOnlyReturnsTaskObjects(aggs), + size, + }, + }); + + return (result as { aggregations: AggregationResult }).aggregations; + } + private async updateByQuery( opts: UpdateByQuerySearchOpts = {}, // eslint-disable-next-line @typescript-eslint/naming-convention @@ -537,6 +559,22 @@ function ensureQueryOnlyReturnsTaskObjects(opts: SearchOpts): SearchOpts { }; } +function ensureAggregationOnlyReturnsTaskObjects( + aggs: AggregationOpts['aggs'] +): AggregationOpts['aggs'] { + const filteredAgg: AggregationQuery = { + task: { + filter: { + term: { + type: 'task', + }, + }, + aggs, + }, + }; + return filteredAgg; +} + function isSavedObjectsUpdateResponse( result: SavedObjectsUpdateResponse | Error ): result is SavedObjectsUpdateResponse { diff --git a/x-pack/test/plugin_api_integration/config.ts b/x-pack/test/plugin_api_integration/config.ts index b89ed6ad550a..30a361ea2a37 100644 --- a/x-pack/test/plugin_api_integration/config.ts +++ b/x-pack/test/plugin_api_integration/config.ts @@ -43,6 +43,7 @@ export default async function ({ readConfigFile }: FtrConfigProviderContext) { '--xpack.eventLog.enabled=true', '--xpack.eventLog.logEntries=true', '--xpack.eventLog.indexEntries=true', + '--xpack.task_manager.monitored_aggregated_stats_refresh_rate=5000', ...plugins.map( (pluginDir) => `--plugin-path=${path.resolve(__dirname, 'plugins', pluginDir)}` ), diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts new file mode 100644 index 000000000000..9cc7b6174443 --- /dev/null +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import expect from '@kbn/expect'; +import url from 'url'; +import supertestAsPromised from 'supertest-as-promised'; +import { FtrProviderContext } from '../../ftr_provider_context'; +import { ConcreteTaskInstance } from '../../../../plugins/task_manager/server'; + +interface MonitoringStats { + lastUpdate: string; + stats: { + configuration: { + timestamp: string; + value: Record; + }; + workload: { + timestamp: string; + value: Record; + }; + }; +} + +export default function ({ getService }: FtrProviderContext) { + const config = getService('config'); + const retry = getService('retry'); + const supertest = supertestAsPromised(url.format(config.get('servers.kibana'))); + + function getHealthRequest() { + return supertest.get('/api/task_manager/_health').set('kbn-xsrf', 'foo'); + } + + function getHealth(): Promise { + return getHealthRequest() + .expect(200) + .then((response) => response.body); + } + + function scheduleTask(task: Partial): Promise { + return supertest + .post('/api/sample_tasks/schedule') + .set('kbn-xsrf', 'xxx') + .send({ task }) + .expect(200) + .then((response: { body: ConcreteTaskInstance }) => response.body); + } + + const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + + describe('health', () => { + it('should return basic configuration of task manager', async () => { + expect((await getHealth()).stats.configuration.value).to.eql({ + poll_interval: 3000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + max_workers: 10, + }); + }); + + it('should return the task manager workload', async () => { + const sumSampleTaskInWorkload = + ((await getHealth()).stats.workload.value.types as { + sampleTask?: { sum: number }; + }).sampleTask?.sum ?? 0; + + await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '1m' }, + }); + + await retry.try(async () => { + // workload is configured to refresh every 5s in FTs + await delay(5000); + + const workloadAfterScheduling = (await getHealth()).stats.workload.value; + + expect( + (workloadAfterScheduling.types as { sampleTask: { sum: number } }).sampleTask.sum + ).to.eql(sumSampleTaskInWorkload + 1); + }); + }); + }); +} diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts index c6d817119d41..5eb1353dd129 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts @@ -10,5 +10,6 @@ export default function ({ loadTestFile }: FtrProviderContext) { describe('task_manager', function taskManagerSuite() { this.tags('ciGroup2'); loadTestFile(require.resolve('./task_management')); + loadTestFile(require.resolve('./health_route')); }); } diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts index fcf2d5b23512..2434f05b5403 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts @@ -15,7 +15,6 @@ import { DEFAULT_POLL_INTERVAL, } from '../../../../plugins/task_manager/server/config'; import { ConcreteTaskInstance } from '../../../../plugins/task_manager/server'; -import { SavedObjectsRawDoc } from '../../../../../src/core/server'; const { task: { properties: taskManagerIndexMapping }, @@ -34,6 +33,22 @@ export interface SearchResults { }; } +type DeprecatedConcreteTaskInstance = Omit & { + interval: string; +}; + +type SerializedConcreteTaskInstance = Omit< + ConcreteTaskInstance, + 'state' | 'params' | 'scheduledAt' | 'startedAt' | 'retryAt' | 'runAt' +> & { + state: State; + params: Params; + scheduledAt: string; + startedAt: string | null; + retryAt: string | null; + runAt: string; +}; + export default function ({ getService }: FtrProviderContext) { const es = getService('legacyEs'); const log = getService('log'); @@ -67,8 +82,8 @@ export default function ({ getService }: FtrProviderContext) { } }); - function currentTasks(): Promise<{ - docs: ConcreteTaskInstance[]; + function currentTasks(): Promise<{ + docs: Array>; }> { return supertest .get('/api/sample_tasks') @@ -76,7 +91,9 @@ export default function ({ getService }: FtrProviderContext) { .then((response) => response.body); } - function currentTask(task: string): ConcreteTaskInstance { + function currentTask( + task: string + ): Promise> { return supertest .get(`/api/sample_tasks/task/${task}`) .send({ task }) @@ -84,11 +101,11 @@ export default function ({ getService }: FtrProviderContext) { .then((response) => response.body); } - function ensureTasksIndexRefreshed(): Promise { + function ensureTasksIndexRefreshed() { return supertest.get(`/api/ensure_tasks_index_refreshed`).send({}).expect(200); } - function historyDocs(taskId: string) { + function historyDocs(taskId?: string): Promise { return es .search({ index: testHistoryIndex, @@ -97,16 +114,18 @@ export default function ({ getService }: FtrProviderContext) { .then((result: SearchResults) => result.hits.hits); } - function scheduleTask(task: string): ConcreteTaskInstance { + function scheduleTask( + task: Partial + ): Promise { return supertest .post('/api/sample_tasks/schedule') .set('kbn-xsrf', 'xxx') .send({ task }) .expect(200) - .then((response: { body: ConcreteTaskInstance }) => response.body); + .then((response: { body: SerializedConcreteTaskInstance }) => response.body); } - function runTaskNow(task) { + function runTaskNow(task: { id: string }) { return supertest .post('/api/sample_tasks/run_now') .set('kbn-xsrf', 'xxx') @@ -115,7 +134,7 @@ export default function ({ getService }: FtrProviderContext) { .then((response) => response.body); } - function scheduleTaskIfNotExists(task) { + function scheduleTaskIfNotExists(task: Partial) { return supertest .post('/api/sample_tasks/ensure_scheduled') .set('kbn-xsrf', 'xxx') @@ -124,7 +143,7 @@ export default function ({ getService }: FtrProviderContext) { .then((response: { body: ConcreteTaskInstance }) => response.body); } - function releaseTasksWaitingForEventToComplete(event) { + function releaseTasksWaitingForEventToComplete(event: string) { return supertest .post('/api/sample_tasks/event') .set('kbn-xsrf', 'xxx') @@ -132,7 +151,10 @@ export default function ({ getService }: FtrProviderContext) { .expect(200); } - function getTaskById(tasks: ConcreteTaskInstance[], id: string) { + function getTaskById( + tasks: Array>, + id: string + ) { return tasks.filter((task) => task.id === id)[0]; } @@ -166,7 +188,7 @@ export default function ({ getService }: FtrProviderContext) { await retry.try(async () => { expect((await historyDocs()).length).to.eql(1); - const [task] = (await currentTasks()).docs; + const [task] = (await currentTasks<{ count: number }>()).docs; log.debug(`Task found: ${task.id}`); log.debug(`Task status: ${task.status}`); log.debug(`Task state: ${JSON.stringify(task.state, null, 2)}`); @@ -251,7 +273,7 @@ export default function ({ getService }: FtrProviderContext) { await retry.try(async () => { expect((await historyDocs(originalTask.id)).length).to.eql(1); - const [task] = (await currentTasks()).docs; + const [task] = (await currentTasks<{ count: number }>()).docs; expect(task.attempts).to.eql(0); expect(task.state.count).to.eql(count + 1); @@ -272,7 +294,7 @@ export default function ({ getService }: FtrProviderContext) { await retry.try(async () => { expect((await historyDocs()).length).to.eql(1); - const [task] = (await currentTasks()).docs; + const [task] = (await currentTasks<{ count: number }>()).docs; expect(task.attempts).to.eql(0); expect(task.state.count).to.eql(1); @@ -293,7 +315,7 @@ export default function ({ getService }: FtrProviderContext) { await retry.try(async () => { expect((await historyDocs()).length).to.eql(1); - const [task] = (await currentTasks()).docs; + const [task] = (await currentTasks<{ count: number }>()).docs; expect(task.attempts).to.eql(0); expect(task.state.count).to.eql(1); @@ -314,7 +336,7 @@ export default function ({ getService }: FtrProviderContext) { 1 ); - const [task] = (await currentTasks()).docs.filter( + const [task] = (await currentTasks<{ count: number }>()).docs.filter( (taskDoc) => taskDoc.id === originalTask.id ); @@ -337,7 +359,7 @@ export default function ({ getService }: FtrProviderContext) { .length ).to.eql(2); - const [task] = (await currentTasks()).docs.filter( + const [task] = (await currentTasks<{ count: number }>()).docs.filter( (taskDoc) => taskDoc.id === originalTask.id ); expect(task.state.count).to.eql(2); @@ -358,7 +380,7 @@ export default function ({ getService }: FtrProviderContext) { const docs = await historyDocs(originalTask.id); expect(docs.length).to.eql(1); - const task = await currentTask(originalTask.id); + const task = await currentTask<{ count: number }>(originalTask.id); expect(task.state.count).to.eql(1); @@ -408,16 +430,16 @@ export default function ({ getService }: FtrProviderContext) { expect(await runNowResult).to.eql({ id: originalTask.id }); await retry.try(async () => { - const task = await currentTask(originalTask.id); + const task = await currentTask<{ count: number }>(originalTask.id); expect(task.state.count).to.eql(2); }); // drain tasks, othrwise they'll keep Task Manager stalled await retry.try(async () => { await releaseTasksWaitingForEventToComplete('releaseTheOthers'); - const tasks = (await currentTasks()).docs.filter( - (task) => task.params.originalParams.waitForEvent === 'releaseTheOthers' - ); + const tasks = ( + await currentTasks<{}, { originalParams: { waitForEvent: string } }>() + ).docs.filter((task) => task.params.originalParams.waitForEvent === 'releaseTheOthers'); expect(tasks.length).to.eql(0); }); }); @@ -435,7 +457,7 @@ export default function ({ getService }: FtrProviderContext) { 1 ); - const task = await currentTask(originalTask.id); + const task = await currentTask<{ count: number }>(originalTask.id); expect(task.state.count).to.eql(1); expect(task.status).to.eql('idle'); @@ -452,7 +474,7 @@ export default function ({ getService }: FtrProviderContext) { expect(successfulRunNowResult).to.eql({ id: originalTask.id }); await retry.try(async () => { - const task = await currentTask(originalTask.id); + const task = await currentTask<{ count: number }>(originalTask.id); expect(task.state.count).to.eql(2); expect(task.status).to.eql('idle'); }); @@ -530,7 +552,7 @@ export default function ({ getService }: FtrProviderContext) { // finish first run by emitting 'runNowHasBeenAttempted' event await releaseTasksWaitingForEventToComplete('runNowHasBeenAttempted'); await retry.try(async () => { - const tasks = (await currentTasks()).docs; + const tasks = (await currentTasks<{ count: number }>()).docs; expect(getTaskById(tasks, longRunningTask.id).state.count).to.eql(1); const task = await currentTask(longRunningTask.id); @@ -579,7 +601,11 @@ export default function ({ getService }: FtrProviderContext) { expect(await runNowResultWithExpectedFailure).to.eql({ id: taskThatFailsBeforeRunNow.id }); }); - async function expectReschedule(originalRunAt: number, task: Task, expectedDiff: number) { + async function expectReschedule( + originalRunAt: number, + task: SerializedConcreteTaskInstance, + expectedDiff: number + ) { const buffer = 10000; expect(Date.parse(task.runAt) - originalRunAt).to.be.greaterThan(expectedDiff - buffer); expect(Date.parse(task.runAt) - originalRunAt).to.be.lessThan(expectedDiff + buffer); @@ -607,14 +633,14 @@ export default function ({ getService }: FtrProviderContext) { }); await retry.try(async () => { - const tasks = (await currentTasks()).docs; + const tasks = (await currentTasks<{ count: number }>()).docs; expect(getTaskById(tasks, fastTask.id).state.count).to.eql(2); }); await releaseTasksWaitingForEventToComplete('rescheduleHasHappened'); await retry.try(async () => { - const tasks = (await currentTasks()).docs; + const tasks = (await currentTasks<{ count: number }>()).docs; expect(getTaskById(tasks, fastTask.id).state.count).to.greaterThan(2); expect(getTaskById(tasks, longRunningTask.id).state.count).to.eql(1); From 7e27d7b9645f3696bd045b52eaa1d2729e1cf80b Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Wed, 23 Sep 2020 13:53:23 +0100 Subject: [PATCH 03/67] added shceduling to health endpoint --- .../server/lib/bulk_operation_buffer.ts | 2 +- .../server/lib/correct_deprecated_fields.ts | 2 +- .../task_manager/server/lib/intervals.test.ts | 37 +-- .../task_manager/server/lib/intervals.ts | 94 +++----- .../task_manager/server/monitoring/index.ts | 10 +- .../monitoring/workload_statistics.test.ts | 210 +++++++++++++----- .../server/monitoring/workload_statistics.ts | 69 +++--- x-pack/plugins/task_manager/server/plugin.ts | 9 +- .../task_manager/server/task_manager.ts | 2 +- .../plugins/task_manager/server/task_pool.ts | 2 +- .../task_manager/server/task_runner.test.ts | 4 +- .../task_manager/server/task_runner.ts | 2 +- .../task_manager/server/test_utils/index.ts | 9 +- x-pack/plugins/task_manager/server/types.ts | 7 - .../test_suites/task_manager/health_route.ts | 37 ++- .../test_suites/task_manager/index.ts | 2 +- 16 files changed, 295 insertions(+), 203 deletions(-) diff --git a/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts b/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts index 57a14c2f8a56..4de92ffc7703 100644 --- a/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts +++ b/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts @@ -4,11 +4,11 @@ * you may not use this file except in compliance with the Elastic License. */ +import { Logger } from 'src/core/server'; import { map } from 'lodash'; import { Subject, race, from } from 'rxjs'; import { bufferWhen, filter, bufferCount, flatMap, mapTo, first } from 'rxjs/operators'; import { either, Result, asOk, asErr, Ok, Err } from './result_type'; -import { Logger } from '../types'; export interface BufferOptions { bufferMaxDuration?: number; diff --git a/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts b/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts index 2de95cbb8c2f..a15682a9d3f3 100644 --- a/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts +++ b/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts @@ -4,8 +4,8 @@ * you may not use this file except in compliance with the Elastic License. */ +import { Logger } from 'src/core/server'; import { TaskInstance, TaskInstanceWithDeprecatedFields } from '../task'; -import { Logger } from '../types'; export function ensureDeprecatedFieldsAreCorrected( { id, taskType, interval, schedule, ...taskInstance }: TaskInstanceWithDeprecatedFields, diff --git a/x-pack/plugins/task_manager/server/lib/intervals.test.ts b/x-pack/plugins/task_manager/server/lib/intervals.test.ts index ac28b81eaf49..3554f8d8294f 100644 --- a/x-pack/plugins/task_manager/server/lib/intervals.test.ts +++ b/x-pack/plugins/task_manager/server/lib/intervals.test.ts @@ -7,11 +7,9 @@ import _ from 'lodash'; import sinon from 'sinon'; import { - assertValidInterval, + parseIntervalAsSecond, intervalFromNow, intervalFromDate, - minutesFromNow, - minutesFromDate, secondsFromNow, secondsFromDate, } from './intervals'; @@ -25,29 +23,29 @@ beforeAll(() => { afterAll(() => fakeTimer.restore()); describe('taskIntervals', () => { - describe('assertValidInterval', () => { + describe('parseIntervalAsSecond', () => { test('it accepts intervals in the form `Nm`', () => { - expect(() => assertValidInterval(`${_.random(1, 1000)}m`)).not.toThrow(); + expect(() => parseIntervalAsSecond(`${_.random(1, 1000)}m`)).not.toThrow(); }); test('it accepts intervals in the form `Ns`', () => { - expect(() => assertValidInterval(`${_.random(1, 1000)}s`)).not.toThrow(); + expect(() => parseIntervalAsSecond(`${_.random(1, 1000)}s`)).not.toThrow(); }); test('it rejects 0 based intervals', () => { - expect(() => assertValidInterval('0m')).toThrow( + expect(() => parseIntervalAsSecond('0m')).toThrow( /Invalid interval "0m"\. Intervals must be of the form {number}m. Example: 5m/ ); - expect(() => assertValidInterval('0s')).toThrow( + expect(() => parseIntervalAsSecond('0s')).toThrow( /Invalid interval "0s"\. Intervals must be of the form {number}m. Example: 5m/ ); }); test('it rejects intervals are not of the form `Nm` or `Ns`', () => { - expect(() => assertValidInterval(`5m 2s`)).toThrow( + expect(() => parseIntervalAsSecond(`5m 2s`)).toThrow( /Invalid interval "5m 2s"\. Intervals must be of the form {number}m. Example: 5m/ ); - expect(() => assertValidInterval(`hello`)).toThrow( + expect(() => parseIntervalAsSecond(`hello`)).toThrow( /Invalid interval "hello"\. Intervals must be of the form {number}m. Example: 5m/ ); }); @@ -125,25 +123,6 @@ describe('taskIntervals', () => { }); }); - describe('minutesFromNow', () => { - test('it returns the current date plus a number of minutes', () => { - const mins = _.random(1, 100); - const expected = Date.now() + mins * 60 * 1000; - const nextRun = minutesFromNow(mins).getTime(); - expect(nextRun).toEqual(expected); - }); - }); - - describe('minutesFromDate', () => { - test('it returns the given date plus a number of minutes', () => { - const originalDate = new Date(2019, 1, 1); - const mins = _.random(1, 100); - const expected = originalDate.valueOf() + mins * 60 * 1000; - const nextRun = minutesFromDate(originalDate, mins).getTime(); - expect(expected).toEqual(nextRun); - }); - }); - describe('secondsFromNow', () => { test('it returns the current date plus a number of seconds', () => { const secs = _.random(1, 100); diff --git a/x-pack/plugins/task_manager/server/lib/intervals.ts b/x-pack/plugins/task_manager/server/lib/intervals.ts index 9009be5f7822..967251e6d717 100644 --- a/x-pack/plugins/task_manager/server/lib/intervals.ts +++ b/x-pack/plugins/task_manager/server/lib/intervals.ts @@ -4,6 +4,22 @@ * you may not use this file except in compliance with the Elastic License. */ +import { memoize } from 'lodash'; + +export enum IntervalCadence { + Minute = 'm', + Second = 's', +} +const VALID_CADENCE = new Set(Object.values(IntervalCadence)); +const CADENCE_IN_SECONDS: Record = { + [IntervalCadence.Second]: 1, + [IntervalCadence.Minute]: 60, +}; + +function isCadence(cadence: IntervalCadence | string): cadence is IntervalCadence { + return VALID_CADENCE.has(cadence as IntervalCadence); +} + /** * Returns a date that is the specified interval from now. Currently, * only minute-intervals and second-intervals are supported. @@ -14,14 +30,7 @@ export function intervalFromNow(interval?: string): Date | undefined { if (interval === undefined) { return; } - - assertValidInterval(interval); - - if (isSeconds(interval)) { - return secondsFromNow(parseInterval(interval)); - } - - return minutesFromNow(parseInterval(interval)); + return secondsFromNow(parseIntervalAsSecond(interval)); } /** @@ -35,37 +44,7 @@ export function intervalFromDate(date: Date, interval?: string): Date | undefine if (interval === undefined) { return; } - - assertValidInterval(interval); - - if (isSeconds(interval)) { - return secondsFromDate(date, parseInterval(interval)); - } - - return minutesFromDate(date, parseInterval(interval)); -} - -/** - * Returns a date that is mins minutes from now. - * - * @param mins The number of mintues from now - */ -export function minutesFromNow(mins: number): Date { - return minutesFromDate(new Date(), mins); -} - -/** - * Returns a date that is mins minutes from given date. - * - * @param date The date to add minutes to - * @param mins The number of mintues from given date - */ -export function minutesFromDate(date: Date, mins: number): Date { - const result = new Date(date.valueOf()); - - result.setMinutes(result.getMinutes() + mins); - - return result; + return secondsFromDate(date, parseIntervalAsSecond(interval)); } /** @@ -85,9 +64,7 @@ export function secondsFromNow(secs: number): Date { */ export function secondsFromDate(date: Date, secs: number): Date { const result = new Date(date.valueOf()); - result.setSeconds(result.getSeconds() + secs); - return result; } @@ -95,29 +72,18 @@ export function secondsFromDate(date: Date, secs: number): Date { * Verifies that the specified interval matches our expected format. * * @param {string} interval - An interval such as `5m` or `10s` + * @returns {number} The interval as seconds */ -export function assertValidInterval(interval: string) { - if (isMinutes(interval)) { - return interval; +export const parseIntervalAsSecond = memoize((interval: string): number => { + const numericAsStr: string = interval.slice(0, -1); + const numeric: number = parseInt(numericAsStr, 10); + const cadence: IntervalCadence | string = interval.slice(-1); + if (!isCadence(cadence) || isNaN(numeric) || numeric <= 0 || !isNumeric(numericAsStr)) { + throw new Error( + `Invalid interval "${interval}". Intervals must be of the form {number}m. Example: 5m.` + ); } + return numeric * CADENCE_IN_SECONDS[cadence]; +}); - if (isSeconds(interval)) { - return interval; - } - - throw new Error( - `Invalid interval "${interval}". Intervals must be of the form {number}m. Example: 5m.` - ); -} - -function parseInterval(interval: string) { - return parseInt(interval, 10); -} - -function isMinutes(interval: string) { - return /^[1-9][0-9]*m$/.test(interval); -} - -function isSeconds(interval: string) { - return /^[1-9][0-9]*s$/.test(interval); -} +const isNumeric = (numAsStr: string) => /^\d+$/.test(numAsStr); diff --git a/x-pack/plugins/task_manager/server/monitoring/index.ts b/x-pack/plugins/task_manager/server/monitoring/index.ts index cf75294be126..347731752d85 100644 --- a/x-pack/plugins/task_manager/server/monitoring/index.ts +++ b/x-pack/plugins/task_manager/server/monitoring/index.ts @@ -3,6 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ +import { Logger } from 'src/core/server'; import { TaskManager } from '../task_manager'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; import { createWorkloadAggregator } from './workload_statistics'; @@ -12,7 +13,12 @@ export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_agg export function createAggregatedStatsStream( taskManager: TaskManager, - config: TaskManagerConfig + config: TaskManagerConfig, + logger: Logger ): AggregatedStatProvider { - return createWorkloadAggregator(taskManager, config.monitored_aggregated_stats_refresh_rate); + return createWorkloadAggregator( + taskManager, + config.monitored_aggregated_stats_refresh_rate, + logger + ); } diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index 32e8c2111239..f85a6571899e 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -6,8 +6,9 @@ import { createWorkloadAggregator } from './workload_statistics'; import { taskManagerMock } from '../task_manager.mock'; -import { first } from 'rxjs/operators'; +import { first, take, bufferCount } from 'rxjs/operators'; import { AggregationResult } from '../queries/aggregation_clauses'; +import { mockLogger } from '../test_utils'; describe('Workload Statistics Aggregator', () => { test('queries the Task Store at a fixed interval for the current workload', async () => { @@ -18,10 +19,13 @@ describe('Workload Statistics Aggregator', () => { taskType: { buckets: [], }, + schedule: { + buckets: [], + }, }, } as unknown) as AggregationResult); - const workloadAggregator = createWorkloadAggregator(taskManager, 10); + const workloadAggregator = createWorkloadAggregator(taskManager, 10, mockLogger()); return new Promise((resolve) => { workloadAggregator.pipe(first()).subscribe(() => { @@ -35,6 +39,11 @@ describe('Workload Statistics Aggregator', () => { }, }, }, + schedule: { + terms: { + field: 'task.schedule.interval', + }, + }, }, }); resolve(); @@ -42,70 +51,122 @@ describe('Workload Statistics Aggregator', () => { }); }); - test('returns a summary of the workload by task type', async () => { - const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(({ + const mockAggregatedResult = ({ + task: { + doc_count: 4, + schedule: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: '3600s', + doc_count: 1, + }, + { + key: '60s', + doc_count: 1, + }, + { + key: '720m', + doc_count: 1, + }, + ], + }, + taskType: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'actions_telemetry', + doc_count: 2, + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'idle', + doc_count: 2, + }, + ], + }, + }, + { + key: 'alerting_telemetry', + doc_count: 1, + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'idle', + doc_count: 1, + }, + ], + }, + }, + { + key: 'session_cleanup', + doc_count: 1, + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: [ + { + key: 'idle', + doc_count: 1, + }, + ], + }, + }, + ], + }, + }, + } as unknown) as AggregationResult; + + function setTaskTypeCount( + result: AggregationResult, + taskType: string, + status: Record + ) { + const buckets = [ + ...result.task.taskType.buckets.filter(({ key }) => key !== taskType), + { + key: taskType, + doc_count: Object.values(status).reduce((sum, count) => sum + count, 0), + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: Object.entries(status).map(([key, count]) => ({ + key, + doc_count: count, + })), + }, + }, + ]; + return ({ task: { - doc_count: 4, + doc_count: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0), taskType: { doc_count_error_upper_bound: 0, sum_other_doc_count: 0, - buckets: [ - { - key: 'actions_telemetry', - doc_count: 2, - status: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, - buckets: [ - { - key: 'idle', - doc_count: 2, - }, - ], - }, - }, - { - key: 'alerting_telemetry', - doc_count: 1, - status: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, - buckets: [ - { - key: 'idle', - doc_count: 1, - }, - ], - }, - }, - { - key: 'session_cleanup', - doc_count: 1, - status: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, - buckets: [ - { - key: 'idle', - doc_count: 1, - }, - ], - }, - }, - ], + buckets, }, }, - } as unknown) as AggregationResult); + } as unknown) as AggregationResult; + } - const workloadAggregator = createWorkloadAggregator(taskManager, 10); + test('returns a summary of the workload by task type', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + + const workloadAggregator = createWorkloadAggregator(taskManager, 10, mockLogger()); return new Promise((resolve) => { workloadAggregator.pipe(first()).subscribe((result) => { expect(result.key).toEqual('workload'); expect(result.value).toMatchObject({ sum: 4, - types: { + taskTypes: { actions_telemetry: { sum: 2, status: { idle: 2 } }, alerting_telemetry: { sum: 1, status: { idle: 1 } }, session_cleanup: { sum: 1, status: { idle: 1 } }, @@ -115,4 +176,47 @@ describe('Workload Statistics Aggregator', () => { }); }); }); + + test('recovers from errors fetching the workload', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate + .mockResolvedValueOnce( + setTaskTypeCount(mockAggregatedResult, 'alerting_telemetry', { + idle: 2, + }) + ) + .mockRejectedValueOnce(new Error('Elasticsearch has gone poof')) + .mockResolvedValueOnce( + setTaskTypeCount(mockAggregatedResult, 'alerting_telemetry', { + idle: 1, + failed: 1, + }) + ); + const logger = mockLogger(); + const workloadAggregator = createWorkloadAggregator(taskManager, 10, logger); + + return new Promise((resolve) => { + workloadAggregator.pipe(take(2), bufferCount(2)).subscribe((results) => { + expect(results[0].key).toEqual('workload'); + expect(results[0].value).toMatchObject({ + sum: 5, + taskTypes: { + actions_telemetry: { sum: 2, status: { idle: 2 } }, + alerting_telemetry: { sum: 2, status: { idle: 2 } }, + session_cleanup: { sum: 1, status: { idle: 1 } }, + }, + }); + expect(results[1].key).toEqual('workload'); + expect(results[1].value).toMatchObject({ + sum: 5, + taskTypes: { + actions_telemetry: { sum: 2, status: { idle: 2 } }, + alerting_telemetry: { sum: 2, status: { idle: 1, failed: 1 } }, + session_cleanup: { sum: 1, status: { idle: 1 } }, + }, + }); + resolve(); + }); + }); + }); }); diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 8e73d88bea25..6cb6be979780 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -5,7 +5,8 @@ */ import { interval } from 'rxjs'; -import { concatMap, map } from 'rxjs/operators'; +import { concatMap, map, catchError } from 'rxjs/operators'; +import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { keyBy, mapValues } from 'lodash'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; @@ -15,10 +16,12 @@ import { AggregationBucketWithSubAgg, AggregationBucket, } from '../queries/aggregation_clauses'; +import { parseIntervalAsSecond } from '../lib/intervals'; export function createWorkloadAggregator( taskManager: TaskManager, - refreshInterval: number + refreshInterval: number, + logger: Logger ): AggregatedStatProvider { return interval(refreshInterval).pipe( concatMap(() => @@ -32,34 +35,46 @@ export function createWorkloadAggregator( }, }, }, + schedule: { + terms: { field: 'task.schedule.interval' }, + }, }, }) ), - map( - ({ - task: { - doc_count: sum, - taskType: { buckets: types }, - }, - }: AggregationResult<'task' | 'taskType' | 'status'>) => { - const summary: JsonObject = { - sum, - types: mapValues( - keyBy>( - types as Array>, - 'key' - ), - ({ doc_count: docCount, status }) => ({ - sum: docCount, - status: mapValues(keyBy(status.buckets, 'key'), 'doc_count'), - }) + map(({ task }: AggregationResult<'task' | 'taskType' | 'schedule' | 'status'>) => { + const { + doc_count: sum = 0, + taskType: { buckets: taskTypes = [] } = {}, + schedule: { buckets: schedules = [] } = {}, + } = task; + const summary: JsonObject = { + sum, + taskTypes: mapValues( + keyBy>( + taskTypes as Array>, + 'key' ), - }; - return { - key: 'workload', - value: summary, - }; - } - ) + ({ doc_count: docCount, status }) => ({ + sum: docCount, + status: mapValues(keyBy(status.buckets, 'key'), 'doc_count'), + }) + ), + schedule: (schedules as AggregationBucket[]) + .sort( + ({ key: scheduleLeft }, { key: scheduleRight }) => + parseIntervalAsSecond(scheduleLeft) - parseIntervalAsSecond(scheduleRight) + ) + .map(({ key: schedule, doc_count: count }) => [schedule, count]), + }; + return { + key: 'workload', + value: summary, + }; + }), + catchError((ex: Error, caught) => { + logger.error(`[WorkloadAggregator]: ${ex}`); + // continue to pull values from the same observable + return caught; + }) ); } diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 715d8cf1b4d0..3a4577db01b4 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -3,7 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -import { PluginInitializerContext, Plugin, CoreSetup, CoreStart } from 'src/core/server'; +import { PluginInitializerContext, Plugin, CoreSetup, CoreStart, Logger } from 'src/core/server'; import { Subject } from 'rxjs'; import { first } from 'rxjs/operators'; import { TaskDictionary, TaskDefinition } from './task'; @@ -31,13 +31,16 @@ export class TaskManagerPlugin currentConfig: TaskManagerConfig; taskManagerId?: string; config?: TaskManagerConfig; + logger: Logger; constructor(private readonly initContext: PluginInitializerContext) { this.initContext = initContext; this.currentConfig = {} as TaskManagerConfig; + this.logger = initContext.logger.get('taskManager'); } public async setup(core: CoreSetup): Promise { + const { logger } = this; const config = (this.config = await this.initContext.config .create() .pipe(first()) @@ -51,7 +54,7 @@ export class TaskManagerPlugin healthRoute( router, config, - this.taskManager.then((tm) => createAggregatedStatsStream(tm, config)), + this.taskManager.then((tm) => createAggregatedStatsStream(tm, config, logger)), // if health is any more stale than the pollInterval (+1s buffer) consider the system unhealthy config.poll_interval + 1000 ); @@ -67,7 +70,7 @@ export class TaskManagerPlugin } public start({ savedObjects, elasticsearch }: CoreStart): TaskManagerStartContract { - const logger = this.initContext.logger.get('taskManager'); + const { logger } = this; const savedObjectsRepository = savedObjects.createInternalRepository(['task']); this.legacyTaskManager$.next( diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index 7df3186ca8ec..44e409a2aec3 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -3,6 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ +import { Logger } from 'src/core/server'; import { Subject, Observable, Subscription } from 'rxjs'; import { filter } from 'rxjs/operators'; @@ -19,7 +20,6 @@ import { import { Result, asOk, asErr, either, map, mapErr, promiseResult } from './lib/result_type'; import { TaskManagerConfig } from './config'; -import { Logger } from './types'; import { TaskMarkRunning, TaskRun, diff --git a/x-pack/plugins/task_manager/server/task_pool.ts b/x-pack/plugins/task_manager/server/task_pool.ts index 92374908c60f..ce7cd2bba92d 100644 --- a/x-pack/plugins/task_manager/server/task_pool.ts +++ b/x-pack/plugins/task_manager/server/task_pool.ts @@ -11,7 +11,7 @@ import moment, { Duration } from 'moment'; import { performance } from 'perf_hooks'; import { padStart } from 'lodash'; -import { Logger } from './types'; +import { Logger } from 'src/core/server'; import { TaskRunner } from './task_runner'; import { isTaskSavedObjectNotFoundError } from './lib/is_task_not_found_error'; diff --git a/x-pack/plugins/task_manager/server/task_runner.test.ts b/x-pack/plugins/task_manager/server/task_runner.test.ts index c3191dbb349e..81fe097f4369 100644 --- a/x-pack/plugins/task_manager/server/task_runner.test.ts +++ b/x-pack/plugins/task_manager/server/task_runner.test.ts @@ -6,7 +6,7 @@ import _ from 'lodash'; import sinon from 'sinon'; -import { minutesFromNow } from './lib/intervals'; +import { secondsFromNow } from './lib/intervals'; import { asOk, asErr } from './lib/result_type'; import { TaskEvent, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; import { ConcreteTaskInstance, TaskStatus, TaskDictionary, TaskDefinition } from './task'; @@ -15,6 +15,8 @@ import { mockLogger } from './test_utils'; import { SavedObjectsErrorHelpers } from '../../../../src/core/server'; import moment from 'moment'; +const minutesFromNow = (mins: number): Date => secondsFromNow(mins * 60); + let fakeTimer: sinon.SinonFakeTimers; beforeAll(() => { diff --git a/x-pack/plugins/task_manager/server/task_runner.ts b/x-pack/plugins/task_manager/server/task_runner.ts index ebf13fac2f31..87d1938393f6 100644 --- a/x-pack/plugins/task_manager/server/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_runner.ts @@ -10,6 +10,7 @@ * rescheduling, middleware application, etc. */ +import { Logger } from 'src/core/server'; import apm from 'elastic-apm-node'; import { performance } from 'perf_hooks'; import Joi from 'joi'; @@ -18,7 +19,6 @@ import { identity, defaults, flow } from 'lodash'; import { asOk, asErr, mapErr, eitherAsync, unwrap, mapOk, Result } from './lib/result_type'; import { TaskRun, TaskMarkRunning, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; import { intervalFromDate, intervalFromNow } from './lib/intervals'; -import { Logger } from './types'; import { BeforeRunFunction, BeforeMarkRunningFunction } from './lib/middleware'; import { CancelFunction, diff --git a/x-pack/plugins/task_manager/server/test_utils/index.ts b/x-pack/plugins/task_manager/server/test_utils/index.ts index 6f43a60ff42d..a732aaf88466 100644 --- a/x-pack/plugins/task_manager/server/test_utils/index.ts +++ b/x-pack/plugins/task_manager/server/test_utils/index.ts @@ -3,6 +3,8 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ +import { Logger } from 'src/core/server'; +import { loggingSystemMock } from 'src/core/server/mocks'; /* * A handful of helper functions for testing the task manager. @@ -15,12 +17,7 @@ const nativeTimeout = setTimeout; * Creates a mock task manager Logger. */ export function mockLogger() { - return { - info: jest.fn(), - debug: jest.fn(), - warn: jest.fn(), - error: jest.fn(), - }; + return loggingSystemMock.create().get() as jest.Mocked; } export interface Resolvable { diff --git a/x-pack/plugins/task_manager/server/types.ts b/x-pack/plugins/task_manager/server/types.ts index a38730ad7f76..c86ae1c3fd98 100644 --- a/x-pack/plugins/task_manager/server/types.ts +++ b/x-pack/plugins/task_manager/server/types.ts @@ -7,10 +7,3 @@ import { TaskManager as TaskManagerClass } from './task_manager'; export type TaskManager = PublicMethodsOf; - -export interface Logger { - info(message: string): void; - debug(message: string): void; - warn(message: string): void; - error(message: string): void; -} diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index 9cc7b6174443..c3c15c7ba481 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -6,6 +6,7 @@ import expect from '@kbn/expect'; import url from 'url'; +import { keyBy, mapValues } from 'lodash'; import supertestAsPromised from 'supertest-as-promised'; import { FtrProviderContext } from '../../ftr_provider_context'; import { ConcreteTaskInstance } from '../../../../plugins/task_manager/server'; @@ -50,36 +51,62 @@ export default function ({ getService }: FtrProviderContext) { const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + const monitoredAggregatedStatsRefreshRate = 5000; + describe('health', () => { it('should return basic configuration of task manager', async () => { expect((await getHealth()).stats.configuration.value).to.eql({ poll_interval: 3000, max_poll_inactivity_cycles: 10, + monitored_aggregated_stats_refresh_rate: monitoredAggregatedStatsRefreshRate, request_capacity: 1000, max_workers: 10, }); }); it('should return the task manager workload', async () => { + const workload = (await getHealth()).stats.workload; const sumSampleTaskInWorkload = - ((await getHealth()).stats.workload.value.types as { + (workload.value.taskTypes as { sampleTask?: { sum: number }; }).sampleTask?.sum ?? 0; + const schedulesWorkload = (mapValues( + keyBy(workload.value.schedule as Array<[string, number]>, ([interval, count]) => interval), + ([, count]) => count + ) as unknown) as { '37m': number | undefined; '37s': number | undefined }; + + await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '37s' }, + }); await scheduleTask({ taskType: 'sampleTask', - schedule: { interval: '1m' }, + schedule: { interval: '37m' }, }); await retry.try(async () => { // workload is configured to refresh every 5s in FTs - await delay(5000); + await delay(monitoredAggregatedStatsRefreshRate); const workloadAfterScheduling = (await getHealth()).stats.workload.value; expect( - (workloadAfterScheduling.types as { sampleTask: { sum: number } }).sampleTask.sum - ).to.eql(sumSampleTaskInWorkload + 1); + (workloadAfterScheduling.taskTypes as { sampleTask: { sum: number } }).sampleTask.sum + ).to.eql(sumSampleTaskInWorkload + 2); + + const schedulesWorkloadAfterScheduling = (mapValues( + keyBy( + workloadAfterScheduling.schedule as Array<[string, number]>, + ([interval]) => interval + ), + ([, count]) => count + ) as unknown) as { + '37m': number; + '37s': number; + }; + expect(schedulesWorkloadAfterScheduling['37s']).to.eql(schedulesWorkload['37s'] ?? 0 + 1); + expect(schedulesWorkloadAfterScheduling['37m']).to.eql(schedulesWorkload['37m'] ?? 0 + 1); }); }); }); diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts index 5eb1353dd129..b542bff3a4aa 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/index.ts @@ -9,7 +9,7 @@ import { FtrProviderContext } from '../../ftr_provider_context'; export default function ({ loadTestFile }: FtrProviderContext) { describe('task_manager', function taskManagerSuite() { this.tags('ciGroup2'); - loadTestFile(require.resolve('./task_management')); loadTestFile(require.resolve('./health_route')); + loadTestFile(require.resolve('./task_management')); }); } From 0a22c432f28ab03b9aa17b62a8d33cca903e169c Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Wed, 23 Sep 2020 14:46:00 +0100 Subject: [PATCH 04/67] fixed tests --- x-pack/plugins/actions/server/create_execute_function.test.ts | 2 +- x-pack/plugins/task_manager/server/task_manager.test.ts | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugins/actions/server/create_execute_function.test.ts b/x-pack/plugins/actions/server/create_execute_function.test.ts index cfbc68879ae0..d0500e37ceed 100644 --- a/x-pack/plugins/actions/server/create_execute_function.test.ts +++ b/x-pack/plugins/actions/server/create_execute_function.test.ts @@ -15,7 +15,7 @@ import { asSavedObjectExecutionSource, } from './lib/action_execution_source'; -const mockTaskManager = taskManagerMock.start(); +const mockTaskManager = taskManagerMock.createStart(); const savedObjectsClient = savedObjectsClientMock.create(); const request = {} as KibanaRequest; diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index cf7f9e2a7cff..017540a2dcc5 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -41,6 +41,7 @@ describe('TaskManager', () => { max_attempts: 9, poll_interval: 6000000, max_poll_inactivity_cycles: 10, + monitored_aggregated_stats_refresh_rate: 5000, request_capacity: 1000, }; const taskManagerOpts = { From 7c226e968808a7833e43a4889f8425209599e542 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Wed, 23 Sep 2020 16:17:49 +0100 Subject: [PATCH 05/67] fixed typing --- x-pack/plugins/task_manager/server/task_manager.mock.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugins/task_manager/server/task_manager.mock.ts b/x-pack/plugins/task_manager/server/task_manager.mock.ts index 8afaa2251566..ae71ea5c2379 100644 --- a/x-pack/plugins/task_manager/server/task_manager.mock.ts +++ b/x-pack/plugins/task_manager/server/task_manager.mock.ts @@ -18,6 +18,7 @@ const createTaskManagerMock = () => { runNow: jest.fn(), remove: jest.fn(), start: jest.fn(), + isStarted: jest.fn(() => true), stop: jest.fn(), } as jest.Mocked; }; From e1ee96774de6edbbb54b785f1d9020278614df3d Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Wed, 23 Sep 2020 17:08:47 +0100 Subject: [PATCH 06/67] fixed typing again --- .../plugins/task_manager/server/task_manager.mock.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/x-pack/plugins/task_manager/server/task_manager.mock.ts b/x-pack/plugins/task_manager/server/task_manager.mock.ts index ae71ea5c2379..e5325274024d 100644 --- a/x-pack/plugins/task_manager/server/task_manager.mock.ts +++ b/x-pack/plugins/task_manager/server/task_manager.mock.ts @@ -6,8 +6,8 @@ import { TaskManager } from './task_manager'; -const createTaskManagerMock = () => { - return { +const createTaskManagerMock = (isStarted: boolean = true) => { + return ({ registerTaskDefinitions: jest.fn(), addMiddleware: jest.fn(), ensureScheduled: jest.fn(), @@ -18,9 +18,11 @@ const createTaskManagerMock = () => { runNow: jest.fn(), remove: jest.fn(), start: jest.fn(), - isStarted: jest.fn(() => true), + get isStarted() { + return isStarted; + }, stop: jest.fn(), - } as jest.Mocked; + } as unknown) as jest.Mocked; }; export const taskManagerMock = { From abec231aee9fdca0d543cbe10537d8eaca90bdda Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 28 Sep 2020 14:50:10 +0100 Subject: [PATCH 07/67] added task runtime stats to health endpoint --- .../task_manager/server/config.test.ts | 1 + x-pack/plugins/task_manager/server/config.ts | 7 + .../task_manager/server/monitoring/index.ts | 27 ++- .../monitoring_stats_stream.test.ts | 155 ++++++++++++++ .../monitoring/monitoring_stats_stream.ts | 127 ++++++++++++ .../runtime_statistics_aggregator.ts | 10 +- .../monitoring/task_run_statistics.test.ts | 193 ++++++++++++++++++ .../server/monitoring/task_run_statistics.ts | 166 +++++++++++++++ .../monitoring/workload_statistics.test.ts | 2 +- .../server/monitoring/workload_statistics.ts | 24 ++- x-pack/plugins/task_manager/server/plugin.ts | 6 +- .../task_manager/server/routes/health.test.ts | 144 ++----------- .../task_manager/server/routes/health.ts | 97 +++------ .../task_manager/server/task_events.ts | 20 +- .../task_manager/server/task_manager.mock.ts | 14 +- .../task_manager/server/task_manager.test.ts | 1 + .../task_manager/server/task_manager.ts | 51 +++-- .../test_suites/task_manager/health_route.ts | 35 +++- 18 files changed, 837 insertions(+), 243 deletions(-) create mode 100644 x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts create mode 100644 x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts create mode 100644 x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts create mode 100644 x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts diff --git a/x-pack/plugins/task_manager/server/config.test.ts b/x-pack/plugins/task_manager/server/config.test.ts index 2eb132185ff7..f0c193763899 100644 --- a/x-pack/plugins/task_manager/server/config.test.ts +++ b/x-pack/plugins/task_manager/server/config.test.ts @@ -16,6 +16,7 @@ describe('config validation', () => { "max_poll_inactivity_cycles": 10, "max_workers": 10, "monitored_aggregated_stats_refresh_rate": 60000, + "monitored_stats_running_average_window": 50, "poll_interval": 3000, "request_capacity": 1000, } diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index 1b79c17220f4..a530cb2d44f4 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -9,6 +9,7 @@ import { schema, TypeOf } from '@kbn/config-schema'; export const DEFAULT_MAX_WORKERS = 10; export const DEFAULT_POLL_INTERVAL = 3000; export const DEFAULT_MAX_POLL_INACTIVITY_CYCLES = 10; +export const DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW = 50; // Refresh "pull based" monitored stats at a default rate of once a minute export const DEFAULT_MONITORING_REFRESH_RATE = 60 * 1000; @@ -57,6 +58,12 @@ export const configSchema = schema.object({ /* don't run monitored stat aggregations any faster than once every 5 seconds */ min: 5000, }), + /* The size of the running average window for monitored stats. */ + monitored_stats_running_average_window: schema.number({ + defaultValue: DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW, + max: 100, + min: 10, + }), }); export type TaskManagerConfig = TypeOf; diff --git a/x-pack/plugins/task_manager/server/monitoring/index.ts b/x-pack/plugins/task_manager/server/monitoring/index.ts index 347731752d85..ef447d6ef062 100644 --- a/x-pack/plugins/task_manager/server/monitoring/index.ts +++ b/x-pack/plugins/task_manager/server/monitoring/index.ts @@ -3,22 +3,29 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ + import { Logger } from 'src/core/server'; +import { Observable } from 'rxjs'; import { TaskManager } from '../task_manager'; -import { AggregatedStatProvider } from './runtime_statistics_aggregator'; -import { createWorkloadAggregator } from './workload_statistics'; import { TaskManagerConfig } from '../config'; +import { + MonitoringStats, + createAggregators, + createMonitoringStatsStream, +} from './monitoring_stats_stream'; -export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; +export { + MonitoringStats, + RawMonitoringStats, + summarizeMonitoringStats, + createAggregators, + createMonitoringStatsStream, +} from './monitoring_stats_stream'; -export function createAggregatedStatsStream( +export function createMonitoringStats( taskManager: TaskManager, config: TaskManagerConfig, logger: Logger -): AggregatedStatProvider { - return createWorkloadAggregator( - taskManager, - config.monitored_aggregated_stats_refresh_rate, - logger - ); +): Observable { + return createMonitoringStatsStream(createAggregators(taskManager, config, logger), config); } diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts new file mode 100644 index 000000000000..063947f2ecad --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts @@ -0,0 +1,155 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { TaskManagerConfig } from '../config'; +import { of, Subject } from 'rxjs'; +import { take, bufferCount } from 'rxjs/operators'; +import { createMonitoringStatsStream, AggregatedStat } from './monitoring_stats_stream'; +import { JsonValue } from 'src/plugins/kibana_utils/common'; + +beforeEach(() => { + jest.resetAllMocks(); +}); + +describe('createMonitoringStatsStream', () => { + const configuration: TaskManagerConfig = { + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + }; + + it('returns the initial config used to configure Task Manager', async () => { + return new Promise((resolve) => { + createMonitoringStatsStream(of(), configuration) + .pipe(take(1)) + .subscribe((firstValue) => { + expect(firstValue.stats).toMatchObject({ + configuration: { + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + }, + }, + }); + resolve(); + }); + }); + }); + + it('incrementally updates the stats returned by the endpoint', async () => { + const aggregatedStats$ = new Subject(); + + return new Promise((resolve) => { + createMonitoringStatsStream(aggregatedStats$, configuration) + .pipe(take(3), bufferCount(3)) + .subscribe(([initialValue, secondValue, thirdValue]) => { + expect(initialValue.stats).toMatchObject({ + lastUpdate: expect.any(String), + stats: { + configuration: { + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + }, + }, + }, + }); + + expect(secondValue).toMatchObject({ + lastUpdate: expect.any(String), + stats: { + newAggregatedStat: { + timestamp: expect.any(String), + value: { + some: { + complex: { + value: 123, + }, + }, + }, + }, + configuration: { + timestamp: expect.any(String), + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + }, + }, + }, + }); + + expect(thirdValue).toMatchObject({ + lastUpdate: expect.any(String), + stats: { + newAggregatedStat: { + timestamp: expect.any(String), + value: { + some: { + updated: { + value: 456, + }, + }, + }, + }, + configuration: { + timestamp: expect.any(String), + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + }, + }, + }, + }); + }); + + aggregatedStats$.next({ + key: 'newAggregatedStat', + value: { + some: { + complex: { + value: 123, + }, + }, + } as JsonValue, + }); + + aggregatedStats$.next({ + key: 'newAggregatedStat', + value: { + some: { + updated: { + value: 456, + }, + }, + } as JsonValue, + }); + + resolve(); + }); + }); +}); diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts new file mode 100644 index 000000000000..03fa889fb732 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -0,0 +1,127 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +import { merge, of, Observable } from 'rxjs'; +import { map, scan } from 'rxjs/operators'; +import { set } from '@elastic/safer-lodash-set'; +import { pick } from 'lodash'; +import { Logger } from 'src/core/server'; +import { JsonObject } from 'src/plugins/kibana_utils/common'; +import { TaskManager } from '../task_manager'; +import { createWorkloadAggregator, WorkloadStat } from './workload_statistics'; +import { createTaskRunAggregator, summarizeTaskRunStat, TaskRunStat } from './task_run_statistics'; +import { TaskManagerConfig } from '../config'; +import { AggregatedStatProvider } from './runtime_statistics_aggregator'; + +export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; + +const CONFIG_FIELDS_TO_EXPOSE = [ + 'max_workers', + 'poll_interval', + 'request_capacity', + 'max_poll_inactivity_cycles', + 'monitored_aggregated_stats_refresh_rate', + 'monitored_stats_running_average_window', +] as const; + +type ConfigStat = Pick; + +export interface MonitoringStats { + lastUpdate: string; + stats: { + configuration: { + timestamp: string; + value: ConfigStat; + }; + workload?: { + timestamp: string; + value: WorkloadStat; + }; + runtime?: { + timestamp: string; + value: TaskRunStat; + }; + }; +} + +interface MonitoredStat { + timestamp: string; + value: JsonObject; +} + +export interface RawMonitoringStats { + lastUpdate: string; + stats: Record; +} + +export function createAggregators( + taskManager: TaskManager, + config: TaskManagerConfig, + logger: Logger +): AggregatedStatProvider { + return merge( + createTaskRunAggregator(taskManager, config.monitored_stats_running_average_window, logger), + createWorkloadAggregator(taskManager, config.monitored_aggregated_stats_refresh_rate, logger) + ); +} + +export function createMonitoringStatsStream( + provider$: AggregatedStatProvider, + config: TaskManagerConfig +): Observable { + const initialStats = initializeStats(new Date().toISOString(), config); + return merge( + // emit the initial stats + of(initialStats), + // emit updated stats whenever a provider updates a specific key on the stats + provider$.pipe( + map(({ key, value }) => { + return { + value: { timestamp: new Date().toISOString(), value }, + key, + }; + }), + scan((monitoringStats: MonitoringStats, { key, value }) => { + // incrementally merge stats as they come in + set(monitoringStats.stats, key, value); + monitoringStats.lastUpdate = new Date().toISOString(); + return monitoringStats; + }, initialStats) + ) + ); +} + +export function summarizeMonitoringStats({ + lastUpdate, + stats: { runtime, ...otherStats }, +}: MonitoringStats): RawMonitoringStats { + return { + lastUpdate, + stats: { + ...((otherStats as unknown) as RawMonitoringStats['stats']), + ...(runtime + ? { + runtime: { + ...runtime, + value: summarizeTaskRunStat(runtime.value), + }, + } + : {}), + }, + }; +} + +const initializeStats = ( + initialisationTimestamp: string, + config: TaskManagerConfig +): MonitoringStats => ({ + lastUpdate: initialisationTimestamp, + stats: { + configuration: { + timestamp: initialisationTimestamp, + value: pick(config, ...CONFIG_FIELDS_TO_EXPOSE) as ConfigStat, + }, + }, +}); diff --git a/x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts b/x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts index f895bf2b02e6..bd2b3845f252 100644 --- a/x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts +++ b/x-pack/plugins/task_manager/server/monitoring/runtime_statistics_aggregator.ts @@ -4,11 +4,13 @@ * you may not use this file except in compliance with the Elastic License. */ import { Observable } from 'rxjs'; -import { JsonObject, JsonValue } from 'src/plugins/kibana_utils/common'; +import { JsonValue } from 'src/plugins/kibana_utils/common'; -export interface AggregatedStat { +export interface AggregatedStat { key: string; - value: JsonObject | JsonValue; + value: Stat; } -export type AggregatedStatProvider = Observable; +export type AggregatedStatProvider = Observable< + AggregatedStat +>; diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts new file mode 100644 index 000000000000..365b8962146d --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -0,0 +1,193 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import uuid from 'uuid'; +import { Subject } from 'rxjs'; +import stats from 'stats-lite'; +import sinon from 'sinon'; +import { take, tap, bufferCount, startWith, map } from 'rxjs/operators'; + +import { ConcreteTaskInstance, TaskStatus } from '../task'; +import { asTaskRunEvent, asTaskPollingCycleEvent } from '../task_events'; +import { asOk } from '../lib/result_type'; +import { TaskLifecycleEvent } from '../task_manager'; +import { + createTaskRunAggregator, + summarizeTaskRunStat, + TaskRunStat, + SummarizedTaskRunStat, +} from './task_run_statistics'; +import { taskManagerMock } from '../task_manager.mock'; +import { mockLogger } from '../test_utils'; +import { AggregatedStat } from './runtime_statistics_aggregator'; +import { FillPoolResult } from '../lib/fill_pool'; + +describe('Task Run Statistics', () => { + let fakeTimer: sinon.SinonFakeTimers; + + beforeAll(() => { + fakeTimer = sinon.useFakeTimers(); + }); + + afterAll(() => fakeTimer.restore()); + + test('returns a running average of task drift', async () => { + const runAtDrift = [1000, 2000, 500, 300, 400, 15000, 20000, 200]; + const taskManager = taskManagerMock.create({ + events: new Subject().pipe( + startWith( + ...runAtDrift.map((drift) => mockTaskRunEvent({ runAt: runAtMillisecondsAgo(drift) })) + ) + ), + }); + + const runningAverageWindowSize = 5; + const taskRunAggregator = createTaskRunAggregator( + taskManager, + runningAverageWindowSize, + mockLogger() + ); + + function expectWindowEqualsUpdate( + taskStat: AggregatedStat, + window: number[] + ) { + expect(taskStat.value.drift).toMatchObject({ + mean: stats.mean(window), + median: stats.median(window), + mode: stats.mode(window), + }); + } + + return new Promise((resolve) => { + taskRunAggregator + .pipe( + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeTaskRunStat(value), + })), + take(runAtDrift.length), + bufferCount(runAtDrift.length) + ) + .subscribe((taskStats: Array>) => { + expectWindowEqualsUpdate(taskStats[0], runAtDrift.slice(0, 1)); + expectWindowEqualsUpdate(taskStats[1], runAtDrift.slice(0, 2)); + expectWindowEqualsUpdate(taskStats[2], runAtDrift.slice(0, 3)); + expectWindowEqualsUpdate(taskStats[3], runAtDrift.slice(0, 4)); + expectWindowEqualsUpdate(taskStats[4], runAtDrift.slice(0, 5)); + // from the 6th value, begin to drop old values as out window is 5 + expectWindowEqualsUpdate(taskStats[5], runAtDrift.slice(1, 6)); + expectWindowEqualsUpdate(taskStats[6], runAtDrift.slice(2, 7)); + expectWindowEqualsUpdate(taskStats[7], runAtDrift.slice(3, 8)); + resolve(); + }); + }); + }); + + test('returns polling stats', async () => { + const expectedTimestamp: string[] = []; + const taskManager = taskManagerMock.create({ + events: new Subject().pipe( + startWith( + asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), + asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), + asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), + asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled)), + asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled)), + asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled)), + asTaskPollingCycleEvent(asOk(FillPoolResult.RanOutOfCapacity)), + asTaskPollingCycleEvent(asOk(FillPoolResult.RanOutOfCapacity)), + asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), + asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)) + ) + ), + }); + + const runningAverageWindowSize = 5; + const taskRunAggregator = createTaskRunAggregator( + taskManager, + runningAverageWindowSize, + mockLogger() + ); + + return new Promise((resolve) => { + taskRunAggregator + .pipe( + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeTaskRunStat(value), + })), + tap(() => { + expectedTimestamp.push(new Date().toISOString()); + // each event is a second after the previous one + fakeTimer.tick(1000); + }), + take(10), + bufferCount(10) + ) + .subscribe((taskStats: Array>) => { + expect(taskStats.map((taskStat) => taskStat.value.polling.lastSuccessfulPoll)).toEqual( + expectedTimestamp + ); + + /** + * At any given time we only keep track of the last X Polling Results + * In the tests this is ocnfiugured to a window size of 5 + */ + expect(taskStats.map((taskStat) => taskStat.value.polling.resultFrequency)).toEqual([ + // NoTasksClaimed + { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, + // NoTasksClaimed, NoTasksClaimed, + { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, + // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed + { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, + // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed, PoolFilled + { NoTasksClaimed: 75, RanOutOfCapacity: 0, PoolFilled: 25 }, + // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed, PoolFilled, PoolFilled + { NoTasksClaimed: 60, RanOutOfCapacity: 0, PoolFilled: 40 }, + // NoTasksClaimed, NoTasksClaimed, PoolFilled, PoolFilled, PoolFilled + { NoTasksClaimed: 40, RanOutOfCapacity: 0, PoolFilled: 60 }, + // NoTasksClaimed, PoolFilled, PoolFilled, PoolFilled, RanOutOfCapacity + { NoTasksClaimed: 20, RanOutOfCapacity: 20, PoolFilled: 60 }, + // PoolFilled, PoolFilled, PoolFilled, RanOutOfCapacity, RanOutOfCapacity + { NoTasksClaimed: 0, RanOutOfCapacity: 40, PoolFilled: 60 }, + // PoolFilled, PoolFilled, RanOutOfCapacity, RanOutOfCapacity, NoTasksClaimed + { NoTasksClaimed: 20, RanOutOfCapacity: 40, PoolFilled: 40 }, + // PoolFilled, RanOutOfCapacity, RanOutOfCapacity, NoTasksClaimed, NoTasksClaimed + { NoTasksClaimed: 40, RanOutOfCapacity: 40, PoolFilled: 20 }, + ]); + resolve(); + }); + }); + }); +}); + +function runAtMillisecondsAgo(ms: number): Date { + return new Date(Date.now() - ms); +} + +const mockTaskRunEvent = (overrides: Partial = {}) => { + const task = mockTaskInstance(overrides); + return asTaskRunEvent(task.id, asOk(task)); +}; + +const mockTaskInstance = (overrides: Partial = {}): ConcreteTaskInstance => ({ + id: uuid.v4(), + attempts: 0, + status: TaskStatus.Running, + version: '123', + runAt: new Date(), + scheduledAt: new Date(), + startedAt: new Date(), + retryAt: new Date(Date.now() + 5 * 60 * 1000), + state: {}, + taskType: 'alerting:test', + params: { + alertId: '1', + }, + ownerId: null, + ...overrides, +}); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts new file mode 100644 index 000000000000..ca224fc28199 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -0,0 +1,166 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { Logger } from 'src/core/server'; +import { of, empty } from 'rxjs'; +import { filter, flatMap } from 'rxjs/operators'; +import { isUndefined, countBy, mapValues } from 'lodash'; +import stats from 'stats-lite'; +import { JsonObject } from 'src/plugins/kibana_utils/common'; +import { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; +import { TaskManager, TaskLifecycleEvent } from '../task_manager'; +import { isTaskRunEvent, isTaskPollingCycleEvent } from '../task_events'; +import { isOk } from '../lib/result_type'; +import { ConcreteTaskInstance } from '../task'; +import { FillPoolResult } from '../lib/fill_pool'; + +interface AveragedStat extends JsonObject { + mean: number; + median: number; + mode: number; +} + +interface FillPoolStat extends JsonObject { + lastSuccessfulPoll: string; + resultFrequency: FillPoolResult[]; +} + +export interface TaskRunStat extends JsonObject { + drift: number[]; + polling: FillPoolStat | Omit; +} + +interface FillPoolRawStat extends JsonObject { + lastSuccessfulPoll: string; + resultFrequency: { + [FillPoolResult.NoTasksClaimed]: number; + [FillPoolResult.RanOutOfCapacity]: number; + [FillPoolResult.PoolFilled]: number; + }; +} + +export interface SummarizedTaskRunStat extends JsonObject { + drift: AveragedStat; + polling: FillPoolRawStat | Omit; +} + +export function createTaskRunAggregator( + taskManager: TaskManager, + runningAverageWindowSize: number, + logger: Logger +): AggregatedStatProvider { + const runningStats: { + runtime: { + polling: { + lastSuccessfulPoll: (value?: string) => string | undefined; + resultFrequency: (value?: FillPoolResult) => FillPoolResult[]; + }; + drift: (value?: number) => number[]; + }; + } = { + runtime: { + polling: { + lastSuccessfulPoll: createLastValueStat(), + resultFrequency: createRunningAveragedStat(runningAverageWindowSize), + }, + drift: createRunningAveragedStat(runningAverageWindowSize), + }, + }; + return taskManager.events.pipe( + filter( + (taskEvent: TaskLifecycleEvent) => + (isTaskRunEvent(taskEvent) || isTaskPollingCycleEvent(taskEvent)) && + isOk(taskEvent.event) + ), + flatMap((taskEvent: TaskLifecycleEvent) => { + if (isTaskRunEvent(taskEvent) && isOk(taskEvent.event)) { + const task = taskEvent.event.value; + const now = Date.now(); + return of({ + key: 'runtime', + value: { + polling: { + lastSuccessfulPoll: runningStats.runtime.polling.lastSuccessfulPoll(), + resultFrequency: runningStats.runtime.polling.resultFrequency(), + }, + drift: runningStats.runtime.drift(now - task.runAt.getTime()), + }, + } as AggregatedStat); + } else if (isTaskPollingCycleEvent(taskEvent) && isOk(taskEvent.event)) { + return of({ + key: 'runtime', + value: { + polling: { + lastSuccessfulPoll: runningStats.runtime.polling.lastSuccessfulPoll( + new Date().toISOString() + ), + resultFrequency: runningStats.runtime.polling.resultFrequency(taskEvent.event.value), + }, + drift: runningStats.runtime.drift(), + }, + } as AggregatedStat); + } + return empty(); + }) + ); +} + +export function summarizeTaskRunStat({ + polling: { lastSuccessfulPoll, resultFrequency }, + drift, +}: TaskRunStat): SummarizedTaskRunStat { + return { + polling: { + ...(lastSuccessfulPoll ? { lastSuccessfulPoll } : {}), + resultFrequency: { + [FillPoolResult.NoTasksClaimed]: 0, + [FillPoolResult.RanOutOfCapacity]: 0, + [FillPoolResult.PoolFilled]: 0, + ...calculateFrequency(resultFrequency as FillPoolResult[]), + }, + }, + drift: calculateRunningAverage(drift), + }; +} + +function calculateRunningAverage(values: number[]): AveragedStat { + return { + mean: stats.mean(values), + median: stats.median(values), + mode: stats.mode(values), + }; +} + +function calculateFrequency(values: T[]): JsonObject { + return mapValues(countBy(values), (count) => Math.round((count * 100) / values.length)); +} + +function createLastValueStat() { + let lastValue: T; + return (value?: T) => { + if (isUndefined(value)) { + return lastValue; + } else { + lastValue = value; + return lastValue; + } + }; +} + +function createRunningAveragedStat(runningAverageWindowSize: number) { + const queue = new Array(); + return (value?: T) => { + if (isUndefined(value)) { + return queue; + } else { + if (queue.length === runningAverageWindowSize) { + queue.shift(); + } + queue.push(value); + return [...queue]; + } + }; +} diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index f85a6571899e..0bcf3abfc760 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -4,9 +4,9 @@ * you may not use this file except in compliance with the Elastic License. */ +import { first, take, bufferCount } from 'rxjs/operators'; import { createWorkloadAggregator } from './workload_statistics'; import { taskManagerMock } from '../task_manager.mock'; -import { first, take, bufferCount } from 'rxjs/operators'; import { AggregationResult } from '../queries/aggregation_clauses'; import { mockLogger } from '../test_utils'; diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 6cb6be979780..669e6af16ea0 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -4,7 +4,7 @@ * you may not use this file except in compliance with the Elastic License. */ -import { interval } from 'rxjs'; +import { timer } from 'rxjs'; import { concatMap, map, catchError } from 'rxjs/operators'; import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; @@ -18,12 +18,28 @@ import { } from '../queries/aggregation_clauses'; import { parseIntervalAsSecond } from '../lib/intervals'; +interface StatusStat extends JsonObject { + [status: string]: number; +} +interface TaskTypeStat extends JsonObject { + [taskType: string]: { + sum: number; + status: StatusStat; + }; +} + +export interface WorkloadStat extends JsonObject { + sum: number; + taskTypes: TaskTypeStat; + schedule: Array<[string, number]>; +} + export function createWorkloadAggregator( taskManager: TaskManager, refreshInterval: number, logger: Logger -): AggregatedStatProvider { - return interval(refreshInterval).pipe( +): AggregatedStatProvider { + return timer(0, refreshInterval).pipe( concatMap(() => taskManager.aggregate({ aggs: { @@ -47,7 +63,7 @@ export function createWorkloadAggregator( taskType: { buckets: taskTypes = [] } = {}, schedule: { buckets: schedules = [] } = {}, } = task; - const summary: JsonObject = { + const summary: WorkloadStat = { sum, taskTypes: mapValues( keyBy>( diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 3a4577db01b4..f53418aec05a 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -12,7 +12,7 @@ import { TaskManagerConfig } from './config'; import { Middleware } from './lib/middleware'; import { setupSavedObjects } from './saved_objects'; import { healthRoute } from './routes'; -import { createAggregatedStatsStream } from './monitoring'; +import { createMonitoringStats } from './monitoring'; export type TaskManagerSetupContract = Pick< TaskManager, @@ -53,8 +53,8 @@ export class TaskManagerPlugin const router = core.http.createRouter(); healthRoute( router, - config, - this.taskManager.then((tm) => createAggregatedStatsStream(tm, config, logger)), + this.taskManager.then((tm) => createMonitoringStats(tm, config, logger)), + logger, // if health is any more stale than the pollInterval (+1s buffer) consider the system unhealthy config.poll_interval + 1000 ); diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 4fc7b9d6b352..1ea33794a279 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -7,64 +7,39 @@ import { healthRoute } from './health'; import { httpServiceMock } from 'src/core/server/mocks'; import { mockHandlerArguments } from './_mock_handler_arguments'; -import { TaskManagerConfig } from '../config'; -import { of, Subject } from 'rxjs'; -import { get } from 'lodash'; -import { sleep } from '../test_utils'; -import { AggregatedStat } from '../monitoring'; - -beforeEach(() => { - jest.resetAllMocks(); -}); - -const configuration: TaskManagerConfig = { - enabled: true, - max_workers: 10, - index: 'foo', - max_attempts: 9, - poll_interval: 6000000, - max_poll_inactivity_cycles: 10, - request_capacity: 1000, - monitored_aggregated_stats_refresh_rate: 5000, -}; +import { of } from 'rxjs'; +import { sleep, mockLogger } from '../test_utils'; describe('healthRoute', () => { + beforeEach(() => { + jest.resetAllMocks(); + }); + it('registers the route', async () => { const router = httpServiceMock.createRouter(); - healthRoute(router, configuration, Promise.resolve(of()), 1000); + healthRoute(router, Promise.resolve(of()), mockLogger(), 1000); const [config] = router.get.mock.calls[0]; expect(config.path).toMatchInlineSnapshot(`"/api/task_manager/_health"`); }); - it('returns the initial config used to configure Task Manager', async () => { + it('logs the Task Manager stats at a fixed interval', async () => { const router = httpServiceMock.createRouter(); + const logger = mockLogger(); - healthRoute(router, configuration, Promise.resolve(of()), 1000); + healthRoute(router, Promise.resolve(of()), logger, 1000); - const [, handler] = router.get.mock.calls[0]; + await sleep(1000); - const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); - - expect(get(await handler(context, req, res), 'body.stats')).toMatchObject({ - configuration: { - value: { - max_workers: 10, - poll_interval: 6000000, - max_poll_inactivity_cycles: 10, - request_capacity: 1000, - monitored_aggregated_stats_refresh_rate: 5000, - }, - }, - }); + expect(logger.debug).toHaveBeenCalledWith(''); }); it('returns an error response if the stats are no longer fresh', async () => { const router = httpServiceMock.createRouter(); - healthRoute(router, configuration, Promise.resolve(of()), 1000); + healthRoute(router, Promise.resolve(of()), mockLogger(), 1000); const [, handler] = router.get.mock.calls[0]; @@ -85,6 +60,7 @@ describe('healthRoute', () => { poll_interval: 6000000, request_capacity: 1000, monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, }, }, }, @@ -93,96 +69,4 @@ describe('healthRoute', () => { }, }); }); - - it('incrementally updates the stats returned by the endpoint', async () => { - const router = httpServiceMock.createRouter(); - - const aggregatedStats = Promise.resolve(new Subject()); - - healthRoute(router, configuration, Promise.resolve(aggregatedStats), 1000); - - const [, handler] = router.get.mock.calls[0]; - - const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); - - return aggregatedStats.then(async (aggregatedStats$) => { - aggregatedStats$.next({ - key: 'newAggregatedStat', - value: { - some: { - complex: { - value: 123, - }, - }, - }, - }); - - expect(await handler(context, req, res)).toMatchObject({ - body: { - lastUpdate: expect.any(String), - stats: { - newAggregatedStat: { - timestamp: expect.any(String), - value: { - some: { - complex: { - value: 123, - }, - }, - }, - }, - configuration: { - timestamp: expect.any(String), - value: { - max_workers: 10, - poll_interval: 6000000, - max_poll_inactivity_cycles: 10, - request_capacity: 1000, - monitored_aggregated_stats_refresh_rate: 5000, - }, - }, - }, - }, - }); - - aggregatedStats$.next({ - key: 'newAggregatedStat', - value: { - some: { - updated: { - value: 456, - }, - }, - }, - }); - - expect(await handler(context, req, res)).toMatchObject({ - body: { - lastUpdate: expect.any(String), - stats: { - newAggregatedStat: { - timestamp: expect.any(String), - value: { - some: { - updated: { - value: 456, - }, - }, - }, - }, - configuration: { - timestamp: expect.any(String), - value: { - max_workers: 10, - poll_interval: 6000000, - max_poll_inactivity_cycles: 10, - request_capacity: 1000, - monitored_aggregated_stats_refresh_rate: 5000, - }, - }, - }, - }, - }); - }); - }); }); diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index cf73c9314391..e99c1298363a 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -11,68 +11,23 @@ import { IKibanaResponse, KibanaResponseFactory, } from 'kibana/server'; -import { pick } from 'lodash'; -import { set } from '@elastic/safer-lodash-set'; -import { JsonObject } from 'src/plugins/kibana_utils/common'; -import { map } from 'rxjs/operators'; -import { TaskManagerConfig } from '../config'; -import { AggregatedStatProvider } from '../monitoring'; - -const CONFIG_FIELDS_TO_EXPOSE = [ - 'max_workers', - 'poll_interval', - 'request_capacity', - 'max_poll_inactivity_cycles', - 'monitored_aggregated_stats_refresh_rate', -]; - -interface MonitoredStat { - timestamp: string; - value: JsonObject; -} - -interface MonitoringStats { - lastUpdate: string; - stats: Record; -} +import { Logger } from 'src/core/server'; +import { Observable } from 'rxjs'; +import { take } from 'rxjs/operators'; +import { debounceTime } from 'rxjs/operators'; +import { MonitoringStats, RawMonitoringStats, summarizeMonitoringStats } from '../monitoring'; export function healthRoute( router: IRouter, - initialConfig: TaskManagerConfig, - aggregatedStats: Promise, + monitoringStats: Promise>, + logger: Logger, requiredFreshness: number ) { - const initialisationTimestamp = new Date().toISOString(); - const monitoringStats: MonitoringStats = { - lastUpdate: initialisationTimestamp, - stats: { - configuration: { - timestamp: initialisationTimestamp, - value: pick<{ - max_workers: number; - poll_interval: number; - request_capacity: number; - max_poll_inactivity_cycles: number; - monitored_aggregated_stats_refresh_rate: number; - }>(initialConfig, ...CONFIG_FIELDS_TO_EXPOSE) as JsonObject, - }, - }, - }; - - aggregatedStats.then((aggregatedStats$) => { - aggregatedStats$ - .pipe( - map(({ key, value }) => { - return { - value: { timestamp: new Date().toISOString(), value }, - key, - }; - }) - ) - .subscribe(({ key, value }) => { - set(monitoringStats.stats, key, value); - monitoringStats.lastUpdate = new Date().toISOString(); - }); + /* Log Task Manager stats as a Debug log line at a fixed interval */ + monitoringStats.then((monitoringStats$) => { + monitoringStats$ + .pipe(debounceTime(requiredFreshness)) + .subscribe((stats) => logger.debug(JSON.stringify(summarizeMonitoringStats(stats)))); }); router.get( @@ -85,24 +40,32 @@ export function healthRoute( req: KibanaRequest, res: KibanaResponseFactory ): Promise { - const lastUpdate = Date.parse(monitoringStats.lastUpdate); + const { lastUpdate, stats } = await getLatestStats(await monitoringStats); + const now = Date.now(); + const timestamp = new Date(now).toISOString(); /** * If the monitored stats aren't fresh, return an `500 internalError` with * the stats in the body of the api call. This makes it easier for monitoring * services to mark the service as broken */ - if (Date.now() - lastUpdate > requiredFreshness) { - return res.internalError({ - body: { - message: new Error('Task Manager monitored stats are out of date'), - attributes: monitoringStats, - }, - }); - } + // if (now - Date.parse(lastUpdate) > requiredFreshness) { + // return res.internalError({ + // body: { + // message: new Error('Task Manager monitored stats are out of date'), + // attributes: { lastUpdate, timestamp, stats }, + // }, + // }); + // } return res.ok({ - body: monitoringStats, + body: { lastUpdate, timestamp, stats }, }); } ); } + +async function getLatestStats(monitoringStats$: Observable) { + return new Promise((resolve) => + monitoringStats$.pipe(take(1)).subscribe((stats) => resolve(summarizeMonitoringStats(stats))) + ); +} diff --git a/x-pack/plugins/task_manager/server/task_events.ts b/x-pack/plugins/task_manager/server/task_events.ts index e1dd85f868cd..6dd0c1546733 100644 --- a/x-pack/plugins/task_manager/server/task_events.ts +++ b/x-pack/plugins/task_manager/server/task_events.ts @@ -9,16 +9,19 @@ import { Option } from 'fp-ts/lib/Option'; import { ConcreteTaskInstance } from './task'; import { Result, Err } from './lib/result_type'; +import { FillPoolResult } from './lib/fill_pool'; +import { PollingError } from './polling'; export enum TaskEventType { TASK_CLAIM = 'TASK_CLAIM', TASK_MARK_RUNNING = 'TASK_MARK_RUNNING', TASK_RUN = 'TASK_RUN', TASK_RUN_REQUEST = 'TASK_RUN_REQUEST', + TASK_POLLING_CYCLE = 'TASK_POLLING_CYCLE', } export interface TaskEvent { - id: string; + id?: string; type: TaskEventType; event: Result; } @@ -26,6 +29,7 @@ export type TaskMarkRunning = TaskEvent; export type TaskRun = TaskEvent; export type TaskClaim = TaskEvent>; export type TaskRunRequest = TaskEvent; +export type TaskPollingCycle = TaskEvent>; export function asTaskMarkRunningEvent( id: string, @@ -69,6 +73,15 @@ export function asTaskRunRequestEvent( }; } +export function asTaskPollingCycleEvent( + event: Result> +): TaskPollingCycle { + return { + type: TaskEventType.TASK_POLLING_CYCLE, + event, + }; +} + export function isTaskMarkRunningEvent( taskEvent: TaskEvent ): taskEvent is TaskMarkRunning { @@ -85,3 +98,8 @@ export function isTaskRunRequestEvent( ): taskEvent is TaskRunRequest { return taskEvent.type === TaskEventType.TASK_RUN_REQUEST; } +export function isTaskPollingCycleEvent( + taskEvent: TaskEvent +): taskEvent is TaskPollingCycle { + return taskEvent.type === TaskEventType.TASK_POLLING_CYCLE; +} diff --git a/x-pack/plugins/task_manager/server/task_manager.mock.ts b/x-pack/plugins/task_manager/server/task_manager.mock.ts index e5325274024d..edd56b63e480 100644 --- a/x-pack/plugins/task_manager/server/task_manager.mock.ts +++ b/x-pack/plugins/task_manager/server/task_manager.mock.ts @@ -4,9 +4,16 @@ * you may not use this file except in compliance with the Elastic License. */ -import { TaskManager } from './task_manager'; +import { TaskManager, TaskLifecycleEvent } from './task_manager'; +import { of, Observable } from 'rxjs'; -const createTaskManagerMock = (isStarted: boolean = true) => { +const createTaskManagerMock = ({ + isStarted = true, + events = of(), +}: { + isStarted?: boolean; + events?: Observable; +} = {}) => { return ({ registerTaskDefinitions: jest.fn(), addMiddleware: jest.fn(), @@ -21,6 +28,9 @@ const createTaskManagerMock = (isStarted: boolean = true) => { get isStarted() { return isStarted; }, + get events() { + return events; + }, stop: jest.fn(), } as unknown) as jest.Mocked; }; diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index 017540a2dcc5..decd7291bc0c 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -42,6 +42,7 @@ describe('TaskManager', () => { poll_interval: 6000000, max_poll_inactivity_cycles: 10, monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, request_capacity: 1000, }; const taskManagerOpts = { diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index 44e409a2aec3..7fcf496e0d11 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -5,7 +5,7 @@ */ import { Logger } from 'src/core/server'; import { Subject, Observable, Subscription } from 'rxjs'; -import { filter } from 'rxjs/operators'; +import { filter, tap } from 'rxjs/operators'; import { performance } from 'perf_hooks'; @@ -25,10 +25,12 @@ import { TaskRun, TaskClaim, TaskRunRequest, + TaskPollingCycle, isTaskRunEvent, isTaskClaimEvent, isTaskRunRequestEvent, asTaskRunRequestEvent, + asTaskPollingCycleEvent, } from './task_events'; import { fillPool, FillPoolResult } from './lib/fill_pool'; import { addMiddlewareToChain, BeforeSaveMiddlewareParams, Middleware } from './lib/middleware'; @@ -52,7 +54,7 @@ import { PollingErrorType, createObservableMonitor, } from './polling'; -import { TaskPool } from './task_pool'; +import { TaskPool, TaskPoolRunResult } from './task_pool'; import { TaskManagerRunner, TaskRunner } from './task_runner'; import { FetchResult, @@ -82,7 +84,12 @@ interface RunNowResult { id: string; } -export type TaskLifecycleEvent = TaskMarkRunning | TaskRun | TaskClaim | TaskRunRequest; +export type TaskLifecycleEvent = + | TaskMarkRunning + | TaskRun + | TaskClaim + | TaskRunRequest + | TaskPollingCycle; /* * The TaskManager is the public interface into the task manager system. This glues together @@ -195,6 +202,10 @@ export class TaskManager { ); } + public get events(): Observable { + return this.events$; + } + private emitEvent = (event: TaskLifecycleEvent) => { this.events$.next(event); }; @@ -245,17 +256,23 @@ export class TaskManager { this.startQueue.forEach((fn) => fn()); this.startQueue = []; - this.pollingSubscription = this.poller$.subscribe( - mapErr((error: PollingError) => { - if (error.type === PollingErrorType.RequestCapacityReached) { - pipe( - error.data, - mapOptional((id) => this.emitEvent(asTaskRunRequestEvent(id, asErr(error)))) - ); - } - this.logger.error(error.message); - }) - ); + this.pollingSubscription = this.poller$ + .pipe( + tap( + mapErr((error: PollingError) => { + if (error.type === PollingErrorType.RequestCapacityReached) { + pipe( + error.data, + mapOptional((id) => this.emitEvent(asTaskRunRequestEvent(id, asErr(error)))) + ); + } + this.logger.error(error.message); + }) + ) + ) + .subscribe((event: Result>) => { + this.emitEvent(asTaskPollingCycleEvent(event)); + }); } } @@ -522,13 +539,13 @@ export async function awaitTaskRunResult( ); }, taskEvent.event); } else { - either>( + either>( taskEvent.event, - (taskInstance: ConcreteTaskInstance) => { + (taskInstance: ConcreteTaskInstance | FillPoolResult) => { // resolve if the task has run sucessfully if (isTaskRunEvent(taskEvent)) { subscription.unsubscribe(); - resolve({ id: taskInstance.id }); + resolve({ id: (taskInstance as ConcreteTaskInstance).id }); } }, async (error: Error | Option) => { diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index c3c15c7ba481..3c792966e868 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -22,6 +22,16 @@ interface MonitoringStats { timestamp: string; value: Record; }; + runtime: { + timestamp: string; + value: { + drift: Record; + polling: { + lastSuccessfulPoll: string; + resultFrequency: Record; + }; + }; + }; }; } @@ -59,18 +69,19 @@ export default function ({ getService }: FtrProviderContext) { poll_interval: 3000, max_poll_inactivity_cycles: 10, monitored_aggregated_stats_refresh_rate: monitoredAggregatedStatsRefreshRate, + monitored_stats_running_average_window: 50, request_capacity: 1000, max_workers: 10, }); }); it('should return the task manager workload', async () => { - const workload = (await getHealth()).stats.workload; + const { workload } = (await getHealth()).stats; const sumSampleTaskInWorkload = (workload.value.taskTypes as { sampleTask?: { sum: number }; }).sampleTask?.sum ?? 0; - const schedulesWorkload = (mapValues( + const scheduledWorkload = (mapValues( keyBy(workload.value.schedule as Array<[string, number]>, ([interval, count]) => interval), ([, count]) => count ) as unknown) as { '37m': number | undefined; '37s': number | undefined }; @@ -105,9 +116,25 @@ export default function ({ getService }: FtrProviderContext) { '37m': number; '37s': number; }; - expect(schedulesWorkloadAfterScheduling['37s']).to.eql(schedulesWorkload['37s'] ?? 0 + 1); - expect(schedulesWorkloadAfterScheduling['37m']).to.eql(schedulesWorkload['37m'] ?? 0 + 1); + expect(schedulesWorkloadAfterScheduling['37s']).to.eql(1 + (scheduledWorkload['37s'] ?? 0)); + expect(schedulesWorkloadAfterScheduling['37m']).to.eql(1 + (scheduledWorkload['37m'] ?? 0)); }); }); + + it('should return the task manager runtime stats', async () => { + const { + runtime: { + value: { drift, polling }, + }, + } = (await getHealth()).stats; + + expect(isNaN(Date.parse(polling.lastSuccessfulPoll as string))).to.eql(false); + expect(typeof polling.resultFrequency.NoTasksClaimed).to.eql('number'); + expect(typeof polling.resultFrequency.RanOutOfCapacity).to.eql('number'); + expect(typeof polling.resultFrequency.PoolFilled).to.eql('number'); + + expect(typeof drift.mean).to.eql('number'); + expect(typeof drift.median).to.eql('number'); + }); }); } From bf0c3b443ac214733917a028f1b2b66b4f77add0 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 28 Sep 2020 15:11:40 +0100 Subject: [PATCH 08/67] removed unused import --- x-pack/plugins/task_manager/server/task_manager.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index 7fcf496e0d11..de182ea02a67 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -54,7 +54,7 @@ import { PollingErrorType, createObservableMonitor, } from './polling'; -import { TaskPool, TaskPoolRunResult } from './task_pool'; +import { TaskPool } from './task_pool'; import { TaskManagerRunner, TaskRunner } from './task_runner'; import { FetchResult, From acae863dc5e951eaee59f1cc356d096f9b517d55 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 28 Sep 2020 17:25:55 +0100 Subject: [PATCH 09/67] made task run stats reactive --- .../monitoring/monitoring_stats_stream.ts | 2 +- .../monitoring/task_run_statistics.test.ts | 61 +++++---- .../server/monitoring/task_run_statistics.ts | 117 ++++++++++-------- 3 files changed, 94 insertions(+), 86 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts index 03fa889fb732..02ed298a047e 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -62,7 +62,7 @@ export function createAggregators( logger: Logger ): AggregatedStatProvider { return merge( - createTaskRunAggregator(taskManager, config.monitored_stats_running_average_window, logger), + createTaskRunAggregator(taskManager, config.monitored_stats_running_average_window), createWorkloadAggregator(taskManager, config.monitored_aggregated_stats_refresh_rate, logger) ); } diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index 365b8962146d..ee5940d84bb5 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -8,7 +8,7 @@ import uuid from 'uuid'; import { Subject } from 'rxjs'; import stats from 'stats-lite'; import sinon from 'sinon'; -import { take, tap, bufferCount, startWith, map } from 'rxjs/operators'; +import { take, tap, bufferCount, skip, map } from 'rxjs/operators'; import { ConcreteTaskInstance, TaskStatus } from '../task'; import { asTaskRunEvent, asTaskPollingCycleEvent } from '../task_events'; @@ -21,7 +21,6 @@ import { SummarizedTaskRunStat, } from './task_run_statistics'; import { taskManagerMock } from '../task_manager.mock'; -import { mockLogger } from '../test_utils'; import { AggregatedStat } from './runtime_statistics_aggregator'; import { FillPoolResult } from '../lib/fill_pool'; @@ -36,20 +35,13 @@ describe('Task Run Statistics', () => { test('returns a running average of task drift', async () => { const runAtDrift = [1000, 2000, 500, 300, 400, 15000, 20000, 200]; + const events = new Subject(); const taskManager = taskManagerMock.create({ - events: new Subject().pipe( - startWith( - ...runAtDrift.map((drift) => mockTaskRunEvent({ runAt: runAtMillisecondsAgo(drift) })) - ) - ), + events, }); const runningAverageWindowSize = 5; - const taskRunAggregator = createTaskRunAggregator( - taskManager, - runningAverageWindowSize, - mockLogger() - ); + const taskRunAggregator = createTaskRunAggregator(taskManager, runningAverageWindowSize); function expectWindowEqualsUpdate( taskStat: AggregatedStat, @@ -65,6 +57,10 @@ describe('Task Run Statistics', () => { return new Promise((resolve) => { taskRunAggregator .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, value: summarizeTaskRunStat(value), @@ -84,38 +80,30 @@ describe('Task Run Statistics', () => { expectWindowEqualsUpdate(taskStats[7], runAtDrift.slice(3, 8)); resolve(); }); + + for (const drift of runAtDrift) { + events.next(mockTaskRunEvent({ runAt: runAtMillisecondsAgo(drift) })); + } }); }); test('returns polling stats', async () => { const expectedTimestamp: string[] = []; + const events = new Subject(); const taskManager = taskManagerMock.create({ - events: new Subject().pipe( - startWith( - asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), - asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), - asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), - asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled)), - asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled)), - asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled)), - asTaskPollingCycleEvent(asOk(FillPoolResult.RanOutOfCapacity)), - asTaskPollingCycleEvent(asOk(FillPoolResult.RanOutOfCapacity)), - asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)), - asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed)) - ) - ), + events, }); const runningAverageWindowSize = 5; - const taskRunAggregator = createTaskRunAggregator( - taskManager, - runningAverageWindowSize, - mockLogger() - ); + const taskRunAggregator = createTaskRunAggregator(taskManager, runningAverageWindowSize); return new Promise((resolve) => { taskRunAggregator .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, value: summarizeTaskRunStat(value), @@ -161,6 +149,17 @@ describe('Task Run Statistics', () => { ]); resolve(); }); + + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.PoolFilled))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.RanOutOfCapacity))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.RanOutOfCapacity))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed))); + events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed))); }); }); }); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index ca224fc28199..1c07dfa8e39d 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -4,16 +4,15 @@ * you may not use this file except in compliance with the Elastic License. */ -import { Logger } from 'src/core/server'; -import { of, empty } from 'rxjs'; -import { filter, flatMap } from 'rxjs/operators'; +import { combineLatest, Observable } from 'rxjs'; +import { filter, startWith, map } from 'rxjs/operators'; import { isUndefined, countBy, mapValues } from 'lodash'; import stats from 'stats-lite'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; import { TaskManager, TaskLifecycleEvent } from '../task_manager'; import { isTaskRunEvent, isTaskPollingCycleEvent } from '../task_events'; -import { isOk } from '../lib/result_type'; +import { isOk, Ok } from '../lib/result_type'; import { ConcreteTaskInstance } from '../task'; import { FillPoolResult } from '../lib/fill_pool'; @@ -49,61 +48,60 @@ export interface SummarizedTaskRunStat extends JsonObject { export function createTaskRunAggregator( taskManager: TaskManager, - runningAverageWindowSize: number, - logger: Logger + runningAverageWindowSize: number ): AggregatedStatProvider { - const runningStats: { - runtime: { - polling: { - lastSuccessfulPoll: (value?: string) => string | undefined; - resultFrequency: (value?: FillPoolResult) => FillPoolResult[]; - }; - drift: (value?: number) => number[]; - }; - } = { - runtime: { - polling: { - lastSuccessfulPoll: createLastValueStat(), - resultFrequency: createRunningAveragedStat(runningAverageWindowSize), - }, - drift: createRunningAveragedStat(runningAverageWindowSize), - }, + const driftQueue = createRunningAveragedStat(runningAverageWindowSize); + const taskRunEvents$: Observable = taskManager.events.pipe( + filter( + (taskEvent: TaskLifecycleEvent) => + isTaskRunEvent(taskEvent) && isOk(taskEvent.event) + ), + map((taskEvent: TaskLifecycleEvent) => { + const task = (taskEvent.event as Ok).value; + const now = Date.now(); + return driftQueue(now - task.runAt.getTime()); + }) + ); + + const pollingQueue = { + lastSuccessfulPoll: createLastValueStat(), + resultFrequency: createRunningAveragedStat(runningAverageWindowSize), }; - return taskManager.events.pipe( + const taskPollingEvents$: Observable = taskManager.events.pipe( filter( (taskEvent: TaskLifecycleEvent) => - (isTaskRunEvent(taskEvent) || isTaskPollingCycleEvent(taskEvent)) && - isOk(taskEvent.event) + isTaskPollingCycleEvent(taskEvent) && isOk(taskEvent.event) ), - flatMap((taskEvent: TaskLifecycleEvent) => { - if (isTaskRunEvent(taskEvent) && isOk(taskEvent.event)) { - const task = taskEvent.event.value; - const now = Date.now(); - return of({ - key: 'runtime', - value: { - polling: { - lastSuccessfulPoll: runningStats.runtime.polling.lastSuccessfulPoll(), - resultFrequency: runningStats.runtime.polling.resultFrequency(), - }, - drift: runningStats.runtime.drift(now - task.runAt.getTime()), - }, - } as AggregatedStat); - } else if (isTaskPollingCycleEvent(taskEvent) && isOk(taskEvent.event)) { - return of({ - key: 'runtime', - value: { - polling: { - lastSuccessfulPoll: runningStats.runtime.polling.lastSuccessfulPoll( - new Date().toISOString() - ), - resultFrequency: runningStats.runtime.polling.resultFrequency(taskEvent.event.value), - }, - drift: runningStats.runtime.drift(), - }, - } as AggregatedStat); - } - return empty(); + map((taskEvent: TaskLifecycleEvent) => { + return { + lastSuccessfulPoll: pollingQueue.lastSuccessfulPoll(new Date().toISOString()), + resultFrequency: pollingQueue.resultFrequency( + (taskEvent.event as Ok).value + ), + }; + }) + ); + + return combineLatest( + taskRunEvents$.pipe(startWith([])), + taskPollingEvents$.pipe( + startWith({ + resultFrequency: { + [FillPoolResult.NoTasksClaimed]: 0, + [FillPoolResult.RanOutOfCapacity]: 0, + [FillPoolResult.PoolFilled]: 0, + }, + }) + ) + ).pipe( + map(([drift, polling]) => { + return { + key: 'runtime', + value: { + drift, + polling, + }, + } as AggregatedStat; }) ); } @@ -134,10 +132,17 @@ function calculateRunningAverage(values: number[]): AveragedStat { }; } +/** + * Calculate the frequency of each term in a list of terms. + * @param values + */ function calculateFrequency(values: T[]): JsonObject { return mapValues(countBy(values), (count) => Math.round((count * 100) / values.length)); } +/** + * Utility to keep track of one value which might change over time + */ function createLastValueStat() { let lastValue: T; return (value?: T) => { @@ -150,6 +155,10 @@ function createLastValueStat() { }; } +/** + * Utility to keep track of a limited queue of values which changes over time + * dropping older values as they slide out of the window we wish to track + */ function createRunningAveragedStat(runningAverageWindowSize: number) { const queue = new Array(); return (value?: T) => { From 4d34dac7528d8bef47462ef1a8c8c11c890a39fc Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 28 Sep 2020 20:13:30 +0100 Subject: [PATCH 10/67] fixed unit tests in health route --- .../task_manager/server/routes/health.test.ts | 159 ++++++++++++++++-- .../task_manager/server/routes/health.ts | 48 ++++-- 2 files changed, 173 insertions(+), 34 deletions(-) diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 1ea33794a279..b0533e6a52ee 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -4,11 +4,13 @@ * you may not use this file except in compliance with the Elastic License. */ -import { healthRoute } from './health'; +import { of, Subject } from 'rxjs'; +import { merge } from 'lodash'; import { httpServiceMock } from 'src/core/server/mocks'; +import { healthRoute } from './health'; import { mockHandlerArguments } from './_mock_handler_arguments'; -import { of } from 'rxjs'; import { sleep, mockLogger } from '../test_utils'; +import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; describe('healthRoute', () => { beforeEach(() => { @@ -29,17 +31,39 @@ describe('healthRoute', () => { const router = httpServiceMock.createRouter(); const logger = mockLogger(); - healthRoute(router, Promise.resolve(of()), logger, 1000); + const mockStat = mockHealthStats(); + await sleep(10); + const skippedMockStat = mockHealthStats(); + await sleep(10); + const nextMockStat = mockHealthStats(); + + const stats = Promise.resolve(new Subject()); + + healthRoute(router, stats, logger, 1000); + + const stats$ = await stats; - await sleep(1000); + stats$.next(mockStat); + await sleep(500); + stats$.next(skippedMockStat); + await sleep(600); + stats$.next(nextMockStat); - expect(logger.debug).toHaveBeenCalledWith(''); + expect(logger.debug).toHaveBeenCalledWith(JSON.stringify(summarizeMonitoringStats(mockStat))); + expect(logger.debug).not.toHaveBeenCalledWith( + JSON.stringify(summarizeMonitoringStats(skippedMockStat)) + ); + expect(logger.debug).toHaveBeenCalledWith( + JSON.stringify(summarizeMonitoringStats(nextMockStat)) + ); + expect(logger.debug).toHaveBeenCalledTimes(2); }); it('returns an error response if the stats are no longer fresh', async () => { const router = httpServiceMock.createRouter(); - healthRoute(router, Promise.resolve(of()), mockLogger(), 1000); + const mockStat = mockHealthStats(); + healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000); const [, handler] = router.get.mock.calls[0]; @@ -49,24 +73,123 @@ describe('healthRoute', () => { expect(await handler(context, req, res)).toMatchObject({ body: { - attributes: { - lastUpdate: expect.any(String), - stats: { - configuration: { - timestamp: expect.any(String), - value: { - max_poll_inactivity_cycles: 10, - max_workers: 10, - poll_interval: 6000000, - request_capacity: 1000, - monitored_aggregated_stats_refresh_rate: 5000, - monitored_stats_running_average_window: 50, + attributes: summarizeMonitoringStats( + mockHealthStats({ + lastUpdate: expect.any(String), + stats: { + configuration: { + timestamp: expect.any(String), + }, + workload: { + timestamp: expect.any(String), + }, + runtime: { + timestamp: expect.any(String), + value: { + polling: { + lastSuccessfulPoll: expect.any(String), + }, + }, }, }, + }) + ), + message: new Error('Task Manager monitored stats are out of date'), + }, + }); + }); + + it('returns an error response if the poller hasnt polled within the required freshness', async () => { + const router = httpServiceMock.createRouter(); + + const lastSuccessfulPoll = new Date(Date.now() - 2000).toISOString(); + const mockStat = mockHealthStats({ + stats: { + runtime: { + value: { + polling: { + lastSuccessfulPoll, + }, }, }, + }, + }); + healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000); + + const [, handler] = router.get.mock.calls[0]; + + const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); + + expect(await handler(context, req, res)).toMatchObject({ + body: { + attributes: summarizeMonitoringStats( + mockHealthStats({ + lastUpdate: expect.any(String), + stats: { + configuration: { + timestamp: expect.any(String), + }, + workload: { + timestamp: expect.any(String), + }, + runtime: { + timestamp: expect.any(String), + value: { + polling: { + lastSuccessfulPoll, + }, + }, + }, + }, + }) + ), message: new Error('Task Manager monitored stats are out of date'), }, }); }); }); + +function mockHealthStats(overrides = {}) { + return (merge( + { + lastUpdate: new Date().toISOString(), + stats: { + configuration: { + timestamp: new Date().toISOString(), + value: { + value: { + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + }, + }, + }, + workload: { + timestamp: new Date().toISOString(), + value: { + sum: 4, + taskTypes: { + actions_telemetry: { sum: 2, status: { idle: 2 } }, + alerting_telemetry: { sum: 1, status: { idle: 1 } }, + session_cleanup: { sum: 1, status: { idle: 1 } }, + }, + }, + }, + runtime: { + timestamp: new Date().toISOString(), + value: { + drift: [1000, 1000], + polling: { + lastSuccessfulPoll: new Date().toISOString(), + resultFrequency: ['NoTasksClaimed', 'NoTasksClaimed', 'NoTasksClaimed'], + }, + }, + }, + }, + }, + overrides + ) as unknown) as MonitoringStats; +} diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index e99c1298363a..46797b02740c 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -14,7 +14,8 @@ import { import { Logger } from 'src/core/server'; import { Observable } from 'rxjs'; import { take } from 'rxjs/operators'; -import { debounceTime } from 'rxjs/operators'; +import { throttleTime } from 'rxjs/operators'; +import { isString } from 'lodash'; import { MonitoringStats, RawMonitoringStats, summarizeMonitoringStats } from '../monitoring'; export function healthRoute( @@ -25,9 +26,9 @@ export function healthRoute( ) { /* Log Task Manager stats as a Debug log line at a fixed interval */ monitoringStats.then((monitoringStats$) => { - monitoringStats$ - .pipe(debounceTime(requiredFreshness)) - .subscribe((stats) => logger.debug(JSON.stringify(summarizeMonitoringStats(stats)))); + monitoringStats$.pipe(throttleTime(requiredFreshness)).subscribe((stats) => { + logger.debug(JSON.stringify(summarizeMonitoringStats(stats))); + }); }); router.get( @@ -40,7 +41,7 @@ export function healthRoute( req: KibanaRequest, res: KibanaResponseFactory ): Promise { - const { lastUpdate, stats } = await getLatestStats(await monitoringStats); + const stats = await getLatestStats(await monitoringStats); const now = Date.now(); const timestamp = new Date(now).toISOString(); @@ -49,23 +50,38 @@ export function healthRoute( * the stats in the body of the api call. This makes it easier for monitoring * services to mark the service as broken */ - // if (now - Date.parse(lastUpdate) > requiredFreshness) { - // return res.internalError({ - // body: { - // message: new Error('Task Manager monitored stats are out of date'), - // attributes: { lastUpdate, timestamp, stats }, - // }, - // }); - // } + if ( + now - + getOldestTimestamp( + stats.lastUpdate, + stats.stats.runtime?.value.polling.lastSuccessfulPoll + ) > + requiredFreshness + ) { + return res.internalError({ + body: { + message: new Error('Task Manager monitored stats are out of date'), + attributes: { timestamp, ...summarizeMonitoringStats(stats) }, + }, + }); + } return res.ok({ - body: { lastUpdate, timestamp, stats }, + body: { timestamp, ...summarizeMonitoringStats(stats) }, }); } ); } +function getOldestTimestamp(...timestamps: unknown[]): number { + return Math.min( + ...timestamps + .map((timestamp) => (isString(timestamp) ? Date.parse(timestamp) : NaN)) + .filter((timestamp) => !isNaN(timestamp)) + ); +} + async function getLatestStats(monitoringStats$: Observable) { - return new Promise((resolve) => - monitoringStats$.pipe(take(1)).subscribe((stats) => resolve(summarizeMonitoringStats(stats))) + return new Promise((resolve) => + monitoringStats$.pipe(take(1)).subscribe((stats) => resolve(stats)) ); } From e3ba8ad782d82ca2505c2c0fe399805f748bb703 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 29 Sep 2020 10:10:07 +0100 Subject: [PATCH 11/67] removed unused import --- x-pack/plugins/task_manager/server/routes/health.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 46797b02740c..d48775803c78 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -16,7 +16,7 @@ import { Observable } from 'rxjs'; import { take } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; import { isString } from 'lodash'; -import { MonitoringStats, RawMonitoringStats, summarizeMonitoringStats } from '../monitoring'; +import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; export function healthRoute( router: IRouter, From e7c5da2ff6164bd443a00bed52c2682387aea989 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 29 Sep 2020 19:51:11 +0100 Subject: [PATCH 12/67] added task run duration to health endpoint --- .../monitoring/task_run_calcultors.test.ts | 76 +++++++++++++ .../server/monitoring/task_run_calcultors.ts | 62 +++++++++++ .../monitoring/task_run_statistics.test.ts | 15 ++- .../server/monitoring/task_run_statistics.ts | 104 ++++++------------ .../task_manager/server/task_events.ts | 34 +++++- .../task_manager/server/task_runner.ts | 24 +++- .../test_suites/task_manager/health_route.ts | 11 +- 7 files changed, 237 insertions(+), 89 deletions(-) create mode 100644 x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts create mode 100644 x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts new file mode 100644 index 000000000000..2ee18da9607a --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import uuid from 'uuid'; + +import { + calculateRunningAverage, + calculateFrequency, + createRunningAveragedStat, + createMapOfRunningAveragedStats, +} from './task_run_calcultors'; + +describe('calculateRunningAverage', () => { + test('calculates the running average, median and mode of a window of values', async () => { + expect(calculateRunningAverage([2, 2, 4, 6, 6])).toEqual({ + mean: 4, + median: 4, + mode: new Set([2, 6]), + }); + }); +}); + +describe('calculateFrequency', () => { + test('calculates the frequency of each terms in the list as a percentage', async () => { + const [term1, term2, term3] = [uuid.v4(), uuid.v4(), uuid.v4()]; + expect( + calculateFrequency([term1, term2, term2, term3, term1, term1, term2, term1, term3]) + ).toEqual({ + [term3]: 22, + [term1]: 44, + [term2]: 33, + }); + }); +}); + +describe('createRunningAveragedStat', () => { + test('create a function which tracks a window of values', async () => { + const queue = createRunningAveragedStat(3); + expect(queue(1)).toEqual([1]); + expect(queue(2)).toEqual([1, 2]); + expect(queue(3)).toEqual([1, 2, 3]); + expect(queue(4)).toEqual([2, 3, 4]); + expect(queue(5)).toEqual([3, 4, 5]); + }); +}); + +describe('createMapOfRunningAveragedStats', () => { + test('create a function which tracks multiple window of values by key', async () => { + const [term1, term2, term3] = [uuid.v4(), uuid.v4(), uuid.v4()]; + const mappedQueues = createMapOfRunningAveragedStats(3); + expect(mappedQueues(term1, 1)).toEqual({ [term1]: [1] }); + expect(mappedQueues(term1, 2)).toEqual({ [term1]: [1, 2] }); + expect(mappedQueues(term2, 3)).toEqual({ [term1]: [1, 2], [term2]: [3] }); + expect(mappedQueues(term3, 4)).toEqual({ [term1]: [1, 2], [term2]: [3], [term3]: [4] }); + expect(mappedQueues(term2, 5)).toEqual({ [term1]: [1, 2], [term2]: [3, 5], [term3]: [4] }); + expect(mappedQueues(term2, 6)).toEqual({ [term1]: [1, 2], [term2]: [3, 5, 6], [term3]: [4] }); + expect(mappedQueues(term1, 7)).toEqual({ + [term1]: [1, 2, 7], + [term2]: [3, 5, 6], + [term3]: [4], + }); + expect(mappedQueues(term1, 8)).toEqual({ + [term1]: [2, 7, 8], + [term2]: [3, 5, 6], + [term3]: [4], + }); + expect(mappedQueues(term1, 9)).toEqual({ + [term1]: [7, 8, 9], + [term2]: [3, 5, 6], + [term3]: [4], + }); + }); +}); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts new file mode 100644 index 000000000000..ef3f98b75282 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import stats from 'stats-lite'; +import { JsonObject } from 'src/plugins/kibana_utils/common'; +import { isUndefined, countBy, mapValues } from 'lodash'; + +export interface AveragedStat extends JsonObject { + mean: number; + median: number; + mode: number; +} + +export function calculateRunningAverage(values: number[]): AveragedStat { + return { + mean: stats.mean(values), + median: stats.median(values), + mode: stats.mode(values), + }; +} + +/** + * Calculate the frequency of each term in a list of terms. + * @param values + */ +export function calculateFrequency(values: T[]): JsonObject { + return mapValues(countBy(values), (count) => Math.round((count * 100) / values.length)); +} + +/** + * Utility to keep track of a limited queue of values which changes over time + * dropping older values as they slide out of the window we wish to track + */ +export function createRunningAveragedStat(runningAverageWindowSize: number) { + const queue = new Array(); + return (value?: T) => { + if (isUndefined(value)) { + return queue; + } else { + if (queue.length === runningAverageWindowSize) { + queue.shift(); + } + queue.push(value); + return [...queue]; + } + }; +} + +export function createMapOfRunningAveragedStats(runningAverageWindowSize: number) { + const mappedQueue: Record T[]> = {}; + const asRecordOfValues = () => mapValues(mappedQueue, (queue) => queue()); + return (key?: string, value?: T) => { + if (!isUndefined(key)) { + mappedQueue[key] = mappedQueue[key] ?? createRunningAveragedStat(runningAverageWindowSize); + mappedQueue[key](value); + } + return asRecordOfValues(); + }; +} diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index ee5940d84bb5..8bd94685c4c9 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -11,7 +11,7 @@ import sinon from 'sinon'; import { take, tap, bufferCount, skip, map } from 'rxjs/operators'; import { ConcreteTaskInstance, TaskStatus } from '../task'; -import { asTaskRunEvent, asTaskPollingCycleEvent } from '../task_events'; +import { asTaskRunEvent, asTaskPollingCycleEvent, TaskTiming } from '../task_events'; import { asOk } from '../lib/result_type'; import { TaskLifecycleEvent } from '../task_manager'; import { @@ -81,8 +81,15 @@ describe('Task Run Statistics', () => { resolve(); }); + const now = Date.now(); for (const drift of runAtDrift) { - events.next(mockTaskRunEvent({ runAt: runAtMillisecondsAgo(drift) })); + const start = Math.floor(Math.random() * 1000); + events.next( + mockTaskRunEvent( + { runAt: runAtMillisecondsAgo(drift + start) }, + { start: runAtMillisecondsAgo(start).getTime(), stop: now } + ) + ); } }); }); @@ -168,9 +175,9 @@ function runAtMillisecondsAgo(ms: number): Date { return new Date(Date.now() - ms); } -const mockTaskRunEvent = (overrides: Partial = {}) => { +const mockTaskRunEvent = (overrides: Partial = {}, timing: TaskTiming) => { const task = mockTaskInstance(overrides); - return asTaskRunEvent(task.id, asOk(task)); + return asTaskRunEvent(task.id, asOk(task), timing); }; const mockTaskInstance = (overrides: Partial = {}): ConcreteTaskInstance => ({ diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index 1c07dfa8e39d..cea38ad0b55b 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -6,21 +6,21 @@ import { combineLatest, Observable } from 'rxjs'; import { filter, startWith, map } from 'rxjs/operators'; -import { isUndefined, countBy, mapValues } from 'lodash'; -import stats from 'stats-lite'; import { JsonObject } from 'src/plugins/kibana_utils/common'; +import { mapValues } from 'lodash'; import { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; import { TaskManager, TaskLifecycleEvent } from '../task_manager'; import { isTaskRunEvent, isTaskPollingCycleEvent } from '../task_events'; import { isOk, Ok } from '../lib/result_type'; import { ConcreteTaskInstance } from '../task'; import { FillPoolResult } from '../lib/fill_pool'; - -interface AveragedStat extends JsonObject { - mean: number; - median: number; - mode: number; -} +import { + AveragedStat, + calculateRunningAverage, + calculateFrequency, + createRunningAveragedStat, + createMapOfRunningAveragedStats, +} from './task_run_calcultors'; interface FillPoolStat extends JsonObject { lastSuccessfulPoll: string; @@ -29,6 +29,7 @@ interface FillPoolStat extends JsonObject { export interface TaskRunStat extends JsonObject { drift: number[]; + duration: Record; polling: FillPoolStat | Omit; } @@ -43,6 +44,7 @@ interface FillPoolRawStat extends JsonObject { export interface SummarizedTaskRunStat extends JsonObject { drift: AveragedStat; + duration: Record; polling: FillPoolRawStat | Omit; } @@ -51,22 +53,28 @@ export function createTaskRunAggregator( runningAverageWindowSize: number ): AggregatedStatProvider { const driftQueue = createRunningAveragedStat(runningAverageWindowSize); - const taskRunEvents$: Observable = taskManager.events.pipe( + const taskRunDurationQueue = createMapOfRunningAveragedStats(runningAverageWindowSize); + const taskRunEvents$: Observable> = taskManager.events.pipe( filter( (taskEvent: TaskLifecycleEvent) => - isTaskRunEvent(taskEvent) && isOk(taskEvent.event) + isTaskRunEvent(taskEvent) && + isOk(taskEvent.event) && + !!taskEvent?.timing?.start ), map((taskEvent: TaskLifecycleEvent) => { const task = (taskEvent.event as Ok).value; - const now = Date.now(); - return driftQueue(now - task.runAt.getTime()); + const { timing } = taskEvent; + return { + duration: taskRunDurationQueue(task.taskType, timing!.stop - timing!.start), + drift: driftQueue(timing!.start - task.runAt.getTime()), + }; }) ); - const pollingQueue = { - lastSuccessfulPoll: createLastValueStat(), - resultFrequency: createRunningAveragedStat(runningAverageWindowSize), - }; + const resultFrequencyQueue = createRunningAveragedStat(runningAverageWindowSize); const taskPollingEvents$: Observable = taskManager.events.pipe( filter( (taskEvent: TaskLifecycleEvent) => @@ -74,16 +82,14 @@ export function createTaskRunAggregator( ), map((taskEvent: TaskLifecycleEvent) => { return { - lastSuccessfulPoll: pollingQueue.lastSuccessfulPoll(new Date().toISOString()), - resultFrequency: pollingQueue.resultFrequency( - (taskEvent.event as Ok).value - ), + lastSuccessfulPoll: new Date().toISOString(), + resultFrequency: resultFrequencyQueue((taskEvent.event as Ok).value), }; }) ); return combineLatest( - taskRunEvents$.pipe(startWith([])), + taskRunEvents$.pipe(startWith({ duration: {}, drift: [] })), taskPollingEvents$.pipe( startWith({ resultFrequency: { @@ -94,11 +100,11 @@ export function createTaskRunAggregator( }) ) ).pipe( - map(([drift, polling]) => { + map(([taskRun, polling]) => { return { key: 'runtime', value: { - drift, + ...taskRun, polling, }, } as AggregatedStat; @@ -109,6 +115,7 @@ export function createTaskRunAggregator( export function summarizeTaskRunStat({ polling: { lastSuccessfulPoll, resultFrequency }, drift, + duration, }: TaskRunStat): SummarizedTaskRunStat { return { polling: { @@ -121,55 +128,6 @@ export function summarizeTaskRunStat({ }, }, drift: calculateRunningAverage(drift), - }; -} - -function calculateRunningAverage(values: number[]): AveragedStat { - return { - mean: stats.mean(values), - median: stats.median(values), - mode: stats.mode(values), - }; -} - -/** - * Calculate the frequency of each term in a list of terms. - * @param values - */ -function calculateFrequency(values: T[]): JsonObject { - return mapValues(countBy(values), (count) => Math.round((count * 100) / values.length)); -} - -/** - * Utility to keep track of one value which might change over time - */ -function createLastValueStat() { - let lastValue: T; - return (value?: T) => { - if (isUndefined(value)) { - return lastValue; - } else { - lastValue = value; - return lastValue; - } - }; -} - -/** - * Utility to keep track of a limited queue of values which changes over time - * dropping older values as they slide out of the window we wish to track - */ -function createRunningAveragedStat(runningAverageWindowSize: number) { - const queue = new Array(); - return (value?: T) => { - if (isUndefined(value)) { - return queue; - } else { - if (queue.length === runningAverageWindowSize) { - queue.shift(); - } - queue.push(value); - return [...queue]; - } + duration: mapValues(duration, (typedDuration) => calculateRunningAverage(typedDuration)), }; } diff --git a/x-pack/plugins/task_manager/server/task_events.ts b/x-pack/plugins/task_manager/server/task_events.ts index 6dd0c1546733..4982ca7235a7 100644 --- a/x-pack/plugins/task_manager/server/task_events.ts +++ b/x-pack/plugins/task_manager/server/task_events.ts @@ -20,8 +20,19 @@ export enum TaskEventType { TASK_POLLING_CYCLE = 'TASK_POLLING_CYCLE', } +export interface TaskTiming { + start: number; + stop: number; +} + +export function startTaskTimer(): () => TaskTiming { + const start = Date.now(); + return () => ({ start, stop: Date.now() }); +} + export interface TaskEvent { id?: string; + timing?: TaskTiming; type: TaskEventType; event: Result; } @@ -33,52 +44,65 @@ export type TaskPollingCycle = TaskEvent + event: Result, + timing?: TaskTiming ): TaskMarkRunning { return { id, type: TaskEventType.TASK_MARK_RUNNING, event, + timing, }; } -export function asTaskRunEvent(id: string, event: Result): TaskRun { +export function asTaskRunEvent( + id: string, + event: Result, + timing?: TaskTiming +): TaskRun { return { id, type: TaskEventType.TASK_RUN, event, + timing, }; } export function asTaskClaimEvent( id: string, - event: Result> + event: Result>, + timing?: TaskTiming ): TaskClaim { return { id, type: TaskEventType.TASK_CLAIM, event, + timing, }; } export function asTaskRunRequestEvent( id: string, // we only emit a TaskRunRequest event when it fails - event: Err + event: Err, + timing?: TaskTiming ): TaskRunRequest { return { id, type: TaskEventType.TASK_RUN_REQUEST, event, + timing, }; } export function asTaskPollingCycleEvent( - event: Result> + event: Result>, + timing?: TaskTiming ): TaskPollingCycle { return { type: TaskEventType.TASK_POLLING_CYCLE, event, + timing, }; } diff --git a/x-pack/plugins/task_manager/server/task_runner.ts b/x-pack/plugins/task_manager/server/task_runner.ts index 87d1938393f6..3716e57e928f 100644 --- a/x-pack/plugins/task_manager/server/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_runner.ts @@ -17,7 +17,14 @@ import Joi from 'joi'; import { identity, defaults, flow } from 'lodash'; import { asOk, asErr, mapErr, eitherAsync, unwrap, mapOk, Result } from './lib/result_type'; -import { TaskRun, TaskMarkRunning, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; +import { + TaskRun, + TaskMarkRunning, + asTaskRunEvent, + asTaskMarkRunningEvent, + startTaskTimer, + TaskTiming, +} from './task_events'; import { intervalFromDate, intervalFromNow } from './lib/intervals'; import { BeforeRunFunction, BeforeMarkRunningFunction } from './lib/middleware'; import { @@ -174,6 +181,7 @@ export class TaskManagerRunner implements TaskRunner { taskInstance: this.instance, }); + const stopTaskTimer = startTaskTimer(); const apmTrans = apm.startTransaction( `taskManager run ${this.instance.taskType}`, 'taskManager' @@ -183,13 +191,16 @@ export class TaskManagerRunner implements TaskRunner { const result = await this.task.run(); const validatedResult = this.validateResult(result); if (apmTrans) apmTrans.end('success'); - return this.processResult(validatedResult); + return this.processResult(validatedResult, stopTaskTimer()); } catch (err) { this.logger.error(`Task ${this} failed: ${err}`); // in error scenario, we can not get the RunResult // re-use modifiedContext's state, which is correct as of beforeRun if (apmTrans) apmTrans.end('error'); - return this.processResult(asErr({ error: err, state: modifiedContext.taskInstance.state })); + return this.processResult( + asErr({ error: err, state: modifiedContext.taskInstance.state }), + stopTaskTimer() + ); } } @@ -384,7 +395,8 @@ export class TaskManagerRunner implements TaskRunner { } private async processResult( - result: Result + result: Result, + taskTiming: TaskTiming ): Promise> { await eitherAsync( result, @@ -394,11 +406,11 @@ export class TaskManagerRunner implements TaskRunner { } else { await this.processResultWhenDone(); } - this.onTaskEvent(asTaskRunEvent(this.id, asOk(this.instance))); + this.onTaskEvent(asTaskRunEvent(this.id, asOk(this.instance), taskTiming)); }, async ({ error }: FailedRunResult) => { await this.processResultForRecurringTask(result); - this.onTaskEvent(asTaskRunEvent(this.id, asErr(error))); + this.onTaskEvent(asTaskRunEvent(this.id, asErr(error), taskTiming)); } ); return result; diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index 3c792966e868..d5fef2852eed 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -26,6 +26,7 @@ interface MonitoringStats { timestamp: string; value: { drift: Record; + duration: Record>; polling: { lastSuccessfulPoll: string; resultFrequency: Record; @@ -122,9 +123,14 @@ export default function ({ getService }: FtrProviderContext) { }); it('should return the task manager runtime stats', async () => { + await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '5s' }, + }); + const { runtime: { - value: { drift, polling }, + value: { drift, polling, duration }, }, } = (await getHealth()).stats; @@ -135,6 +141,9 @@ export default function ({ getService }: FtrProviderContext) { expect(typeof drift.mean).to.eql('number'); expect(typeof drift.median).to.eql('number'); + + expect(typeof duration.sampleTask.mean).to.eql('number'); + expect(typeof duration.sampleTask.median).to.eql('number'); }); }); } From 192782a6580747e8228bcf87d539149dd1d731ca Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Wed, 30 Sep 2020 10:08:13 +0100 Subject: [PATCH 13/67] removed mode from TM stats --- x-pack/plugins/task_manager/server/config.ts | 4 +++- .../server/monitoring/task_run_calcultors.test.ts | 3 +-- .../task_manager/server/monitoring/task_run_calcultors.ts | 8 ++++---- .../server/monitoring/task_run_statistics.test.ts | 3 +-- .../task_manager/server/monitoring/task_run_statistics.ts | 6 +----- 5 files changed, 10 insertions(+), 14 deletions(-) diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index a530cb2d44f4..f2de10927371 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -9,10 +9,12 @@ import { schema, TypeOf } from '@kbn/config-schema'; export const DEFAULT_MAX_WORKERS = 10; export const DEFAULT_POLL_INTERVAL = 3000; export const DEFAULT_MAX_POLL_INACTIVITY_CYCLES = 10; -export const DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW = 50; +// Monitoring Constants +// =================== // Refresh "pull based" monitored stats at a default rate of once a minute export const DEFAULT_MONITORING_REFRESH_RATE = 60 * 1000; +export const DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW = 50; export const configSchema = schema.object({ enabled: schema.boolean({ defaultValue: true }), diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts index 2ee18da9607a..e2994dd1098f 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts @@ -14,11 +14,10 @@ import { } from './task_run_calcultors'; describe('calculateRunningAverage', () => { - test('calculates the running average, median and mode of a window of values', async () => { + test('calculates the running average and median of a window of values', async () => { expect(calculateRunningAverage([2, 2, 4, 6, 6])).toEqual({ mean: 4, median: 4, - mode: new Set([2, 6]), }); }); }); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts index ef3f98b75282..bda99e8735eb 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts @@ -11,14 +11,12 @@ import { isUndefined, countBy, mapValues } from 'lodash'; export interface AveragedStat extends JsonObject { mean: number; median: number; - mode: number; } export function calculateRunningAverage(values: number[]): AveragedStat { return { - mean: stats.mean(values), + mean: Math.round(stats.mean(values)), median: stats.median(values), - mode: stats.mode(values), }; } @@ -27,7 +25,9 @@ export function calculateRunningAverage(values: number[]): AveragedStat { * @param values */ export function calculateFrequency(values: T[]): JsonObject { - return mapValues(countBy(values), (count) => Math.round((count * 100) / values.length)); + return values.length + ? mapValues(countBy(values), (count) => Math.round((count * 100) / values.length)) + : {}; } /** diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index 8bd94685c4c9..dc536897490c 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -48,9 +48,8 @@ describe('Task Run Statistics', () => { window: number[] ) { expect(taskStat.value.drift).toMatchObject({ - mean: stats.mean(window), + mean: Math.round(stats.mean(window)), median: stats.median(window), - mode: stats.mode(window), }); } diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index cea38ad0b55b..b6db4f40d5ff 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -92,11 +92,7 @@ export function createTaskRunAggregator( taskRunEvents$.pipe(startWith({ duration: {}, drift: [] })), taskPollingEvents$.pipe( startWith({ - resultFrequency: { - [FillPoolResult.NoTasksClaimed]: 0, - [FillPoolResult.RanOutOfCapacity]: 0, - [FillPoolResult.PoolFilled]: 0, - }, + resultFrequency: [], }) ) ).pipe( From 09bf68e965d27759392a475174c25365a674980f Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Wed, 30 Sep 2020 12:29:53 +0100 Subject: [PATCH 14/67] report run result frequency in tm stats --- .../monitoring/task_run_statistics.test.ts | 254 +++++++++++++++--- .../server/monitoring/task_run_statistics.ts | 72 +++-- .../task_manager/server/routes/health.test.ts | 2 + .../task_manager/server/task_events.ts | 13 +- .../task_manager/server/task_manager.test.ts | 31 ++- .../task_manager/server/task_manager.ts | 32 ++- .../task_manager/server/task_runner.test.ts | 50 +++- .../task_manager/server/task_runner.ts | 61 ++++- 8 files changed, 417 insertions(+), 98 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index dc536897490c..b0a67216927f 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -14,6 +14,7 @@ import { ConcreteTaskInstance, TaskStatus } from '../task'; import { asTaskRunEvent, asTaskPollingCycleEvent, TaskTiming } from '../task_events'; import { asOk } from '../lib/result_type'; import { TaskLifecycleEvent } from '../task_manager'; +import { TaskRunResult } from '../task_runner'; import { createTaskRunAggregator, summarizeTaskRunStat, @@ -93,6 +94,183 @@ describe('Task Run Statistics', () => { }); }); + test('returns a running average of task run duration', async () => { + const runDurations = [1000, 2000, 500, 300, 400, 15000, 20000, 200]; + const runDurationsInReverse = runDurations.reverse(); + const events = new Subject(); + const taskManager = taskManagerMock.create({ + events, + }); + + const runningAverageWindowSize = 5; + const taskRunAggregator = createTaskRunAggregator(taskManager, runningAverageWindowSize); + + function expectWindowEqualsUpdate( + taskStat: AggregatedStat, + windows: Record + ) { + for (const [type, window] of Object.entries(windows)) { + expect(taskStat.value.duration[type]).toMatchObject({ + mean: Math.round(stats.mean(window)), + median: stats.median(window), + }); + } + } + + return new Promise((resolve, reject) => { + taskRunAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeTaskRunStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeTaskRunStat(value), + })), + take(runDurations.length * 2), + bufferCount(runDurations.length * 2) + ) + .subscribe((taskStats: Array>) => { + try { + expectWindowEqualsUpdate(taskStats[0], { 'alerting:test': runDurations.slice(0, 1) }); + expectWindowEqualsUpdate(taskStats[1], { 'alerting:test': runDurations.slice(0, 2) }); + expectWindowEqualsUpdate(taskStats[2], { 'alerting:test': runDurations.slice(0, 3) }); + expectWindowEqualsUpdate(taskStats[3], { 'alerting:test': runDurations.slice(0, 4) }); + expectWindowEqualsUpdate(taskStats[4], { 'alerting:test': runDurations.slice(0, 5) }); + // from the 6th value, begin to drop old values as out window is 5 + expectWindowEqualsUpdate(taskStats[5], { 'alerting:test': runDurations.slice(1, 6) }); + expectWindowEqualsUpdate(taskStats[6], { 'alerting:test': runDurations.slice(2, 7) }); + expectWindowEqualsUpdate(taskStats[7], { 'alerting:test': runDurations.slice(3, 8) }); + expectWindowEqualsUpdate(taskStats[8], { + 'actions:test': runDurations.slice(0, 1), + 'alerting:test': runDurations.slice(3, 8), + }); + expectWindowEqualsUpdate(taskStats[9], { + 'actions:test': runDurations.slice(0, 2), + 'alerting:test': runDurations.slice(3, 8), + }); + expectWindowEqualsUpdate(taskStats[10], { + 'actions:test': runDurations.slice(0, 3), + 'alerting:test': runDurations.slice(3, 8), + }); + expectWindowEqualsUpdate(taskStats[11], { + 'actions:test': runDurations.slice(0, 4), + 'alerting:test': runDurations.slice(3, 8), + }); + expectWindowEqualsUpdate(taskStats[12], { + 'actions:test': runDurations.slice(0, 5), + 'alerting:test': runDurations.slice(3, 8), + }); + // from the 6th value, begin to drop old values as out window is 5 + expectWindowEqualsUpdate(taskStats[13], { + 'actions:test': runDurations.slice(1, 6), + 'alerting:test': runDurations.slice(3, 8), + }); + expectWindowEqualsUpdate(taskStats[14], { + 'actions:test': runDurations.slice(2, 7), + 'alerting:test': runDurations.slice(3, 8), + }); + expectWindowEqualsUpdate(taskStats[15], { + 'actions:test': runDurations.slice(3, 8), + 'alerting:test': runDurations.slice(3, 8), + }); + resolve(); + } catch (e) { + reject(e); + } + }); + + const now = Date.now(); + for (const runDuration of runDurations) { + events.next( + mockTaskRunEvent( + { taskType: 'alerting:test' }, + { start: runAtMillisecondsAgo(runDuration).getTime(), stop: now } + ) + ); + } + for (const runDuration of runDurationsInReverse) { + events.next( + mockTaskRunEvent( + { taskType: 'actions:test' }, + { start: runAtMillisecondsAgo(runDuration).getTime(), stop: now } + ) + ); + } + }); + }); + + test('returns the frequency of task run results', async () => { + const events = new Subject(); + const taskManager = taskManagerMock.create({ + events, + }); + + const runningAverageWindowSize = 5; + const taskRunAggregator = createTaskRunAggregator(taskManager, runningAverageWindowSize); + + return new Promise((resolve, reject) => { + taskRunAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeTaskRunStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeTaskRunStat(value), + })), + take(10), + bufferCount(10) + ) + .subscribe((taskStats: Array>) => { + try { + /** + * At any given time we only keep track of the last X Polling Results + * In the tests this is ocnfiugured to a window size of 5 + */ + expect(taskStats.map((taskStat) => taskStat.value.taskRunResultFrequency)).toEqual([ + // Success + { Success: 100, RetryScheduled: 0, Failed: 0 }, + // Success, Success, + { Success: 100, RetryScheduled: 0, Failed: 0 }, + // Success, Success, Success + { Success: 100, RetryScheduled: 0, Failed: 0 }, + // Success, Success, Success, Failed + { Success: 75, RetryScheduled: 0, Failed: 25 }, + // Success, Success, Success, Failed, Failed + { Success: 60, RetryScheduled: 0, Failed: 40 }, + // Success, Success, Failed, Failed, Failed + { Success: 40, RetryScheduled: 0, Failed: 60 }, + // Success, Failed, Failed, Failed, RetryScheduled + { Success: 20, RetryScheduled: 20, Failed: 60 }, + // Failed, Failed, Failed, RetryScheduled, RetryScheduled + { Success: 0, RetryScheduled: 40, Failed: 60 }, + // Failed, Failed, RetryScheduled, RetryScheduled, Success + { Success: 20, RetryScheduled: 40, Failed: 40 }, + // Failed, RetryScheduled, RetryScheduled, Success, Success + { Success: 40, RetryScheduled: 40, Failed: 20 }, + ]); + resolve(); + } catch (e) { + reject(e); + } + }); + + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + }); + }); + test('returns polling stats', async () => { const expectedTimestamp: string[] = []; const events = new Subject(); @@ -103,7 +281,7 @@ describe('Task Run Statistics', () => { const runningAverageWindowSize = 5; const taskRunAggregator = createTaskRunAggregator(taskManager, runningAverageWindowSize); - return new Promise((resolve) => { + return new Promise((resolve, reject) => { taskRunAggregator .pipe( // skip initial stat which is just initialized data which @@ -123,37 +301,41 @@ describe('Task Run Statistics', () => { bufferCount(10) ) .subscribe((taskStats: Array>) => { - expect(taskStats.map((taskStat) => taskStat.value.polling.lastSuccessfulPoll)).toEqual( - expectedTimestamp - ); - - /** - * At any given time we only keep track of the last X Polling Results - * In the tests this is ocnfiugured to a window size of 5 - */ - expect(taskStats.map((taskStat) => taskStat.value.polling.resultFrequency)).toEqual([ - // NoTasksClaimed - { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, - // NoTasksClaimed, NoTasksClaimed, - { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, - // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed - { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, - // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed, PoolFilled - { NoTasksClaimed: 75, RanOutOfCapacity: 0, PoolFilled: 25 }, - // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed, PoolFilled, PoolFilled - { NoTasksClaimed: 60, RanOutOfCapacity: 0, PoolFilled: 40 }, - // NoTasksClaimed, NoTasksClaimed, PoolFilled, PoolFilled, PoolFilled - { NoTasksClaimed: 40, RanOutOfCapacity: 0, PoolFilled: 60 }, - // NoTasksClaimed, PoolFilled, PoolFilled, PoolFilled, RanOutOfCapacity - { NoTasksClaimed: 20, RanOutOfCapacity: 20, PoolFilled: 60 }, - // PoolFilled, PoolFilled, PoolFilled, RanOutOfCapacity, RanOutOfCapacity - { NoTasksClaimed: 0, RanOutOfCapacity: 40, PoolFilled: 60 }, - // PoolFilled, PoolFilled, RanOutOfCapacity, RanOutOfCapacity, NoTasksClaimed - { NoTasksClaimed: 20, RanOutOfCapacity: 40, PoolFilled: 40 }, - // PoolFilled, RanOutOfCapacity, RanOutOfCapacity, NoTasksClaimed, NoTasksClaimed - { NoTasksClaimed: 40, RanOutOfCapacity: 40, PoolFilled: 20 }, - ]); - resolve(); + try { + expect(taskStats.map((taskStat) => taskStat.value.polling.lastSuccessfulPoll)).toEqual( + expectedTimestamp + ); + + /** + * At any given time we only keep track of the last X Polling Results + * In the tests this is ocnfiugured to a window size of 5 + */ + expect(taskStats.map((taskStat) => taskStat.value.polling.resultFrequency)).toEqual([ + // NoTasksClaimed + { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, + // NoTasksClaimed, NoTasksClaimed, + { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, + // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed + { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, + // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed, PoolFilled + { NoTasksClaimed: 75, RanOutOfCapacity: 0, PoolFilled: 25 }, + // NoTasksClaimed, NoTasksClaimed, NoTasksClaimed, PoolFilled, PoolFilled + { NoTasksClaimed: 60, RanOutOfCapacity: 0, PoolFilled: 40 }, + // NoTasksClaimed, NoTasksClaimed, PoolFilled, PoolFilled, PoolFilled + { NoTasksClaimed: 40, RanOutOfCapacity: 0, PoolFilled: 60 }, + // NoTasksClaimed, PoolFilled, PoolFilled, PoolFilled, RanOutOfCapacity + { NoTasksClaimed: 20, RanOutOfCapacity: 20, PoolFilled: 60 }, + // PoolFilled, PoolFilled, PoolFilled, RanOutOfCapacity, RanOutOfCapacity + { NoTasksClaimed: 0, RanOutOfCapacity: 40, PoolFilled: 60 }, + // PoolFilled, PoolFilled, RanOutOfCapacity, RanOutOfCapacity, NoTasksClaimed + { NoTasksClaimed: 20, RanOutOfCapacity: 40, PoolFilled: 40 }, + // PoolFilled, RanOutOfCapacity, RanOutOfCapacity, NoTasksClaimed, NoTasksClaimed + { NoTasksClaimed: 40, RanOutOfCapacity: 40, PoolFilled: 20 }, + ]); + resolve(); + } catch (e) { + reject(e); + } }); events.next(asTaskPollingCycleEvent(asOk(FillPoolResult.NoTasksClaimed))); @@ -174,9 +356,13 @@ function runAtMillisecondsAgo(ms: number): Date { return new Date(Date.now() - ms); } -const mockTaskRunEvent = (overrides: Partial = {}, timing: TaskTiming) => { +const mockTaskRunEvent = ( + overrides: Partial = {}, + timing: TaskTiming, + result: TaskRunResult = TaskRunResult.Success +) => { const task = mockTaskInstance(overrides); - return asTaskRunEvent(task.id, asOk(task), timing); + return asTaskRunEvent(task.id, asOk({ task, result }), timing); }; const mockTaskInstance = (overrides: Partial = {}): ConcreteTaskInstance => ({ diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index b6db4f40d5ff..1708542ed858 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -10,9 +10,17 @@ import { JsonObject } from 'src/plugins/kibana_utils/common'; import { mapValues } from 'lodash'; import { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; import { TaskManager, TaskLifecycleEvent } from '../task_manager'; -import { isTaskRunEvent, isTaskPollingCycleEvent } from '../task_events'; -import { isOk, Ok } from '../lib/result_type'; +import { + isTaskRunEvent, + isTaskPollingCycleEvent, + TaskRun, + ErroredTask, + RanTask, + TaskTiming, +} from '../task_events'; +import { isOk, Ok, unwrap } from '../lib/result_type'; import { ConcreteTaskInstance } from '../task'; +import { TaskRunResult } from '../task_runner'; import { FillPoolResult } from '../lib/fill_pool'; import { AveragedStat, @@ -30,9 +38,9 @@ interface FillPoolStat extends JsonObject { export interface TaskRunStat extends JsonObject { drift: number[]; duration: Record; + taskRunResultFrequency: TaskRunResult[]; polling: FillPoolStat | Omit; } - interface FillPoolRawStat extends JsonObject { lastSuccessfulPoll: string; resultFrequency: { @@ -45,6 +53,12 @@ interface FillPoolRawStat extends JsonObject { export interface SummarizedTaskRunStat extends JsonObject { drift: AveragedStat; duration: Record; + taskRunResultFrequency: { + [TaskRunResult.Success]: number; + [TaskRunResult.SuccessRescheduled]: number; + [TaskRunResult.RetryScheduled]: number; + [TaskRunResult.Failed]: number; + }; polling: FillPoolRawStat | Omit; } @@ -52,25 +66,12 @@ export function createTaskRunAggregator( taskManager: TaskManager, runningAverageWindowSize: number ): AggregatedStatProvider { - const driftQueue = createRunningAveragedStat(runningAverageWindowSize); - const taskRunDurationQueue = createMapOfRunningAveragedStats(runningAverageWindowSize); - const taskRunEvents$: Observable> = taskManager.events.pipe( - filter( - (taskEvent: TaskLifecycleEvent) => - isTaskRunEvent(taskEvent) && - isOk(taskEvent.event) && - !!taskEvent?.timing?.start - ), + const taskRunEventToStat = createTaskRunEventToStat(runningAverageWindowSize); + const taskRunEvents$: Observable> = taskManager.events.pipe( + filter((taskEvent: TaskLifecycleEvent) => isTaskRunEvent(taskEvent) && hasTiming(taskEvent)), map((taskEvent: TaskLifecycleEvent) => { - const task = (taskEvent.event as Ok).value; - const { timing } = taskEvent; - return { - duration: taskRunDurationQueue(task.taskType, timing!.stop - timing!.start), - drift: driftQueue(timing!.start - task.runAt.getTime()), - }; + const { task, result }: RanTask | ErroredTask = unwrap((taskEvent as TaskRun).event); + return taskRunEventToStat(task, taskEvent.timing!, result); }) ); @@ -89,7 +90,7 @@ export function createTaskRunAggregator( ); return combineLatest( - taskRunEvents$.pipe(startWith({ duration: {}, drift: [] })), + taskRunEvents$.pipe(startWith({ duration: {}, drift: [], taskRunResultFrequency: [] })), taskPollingEvents$.pipe( startWith({ resultFrequency: [], @@ -108,10 +109,30 @@ export function createTaskRunAggregator( ); } +function hasTiming(taskEvent: TaskLifecycleEvent) { + return !!taskEvent?.timing; +} + +function createTaskRunEventToStat(runningAverageWindowSize: number) { + const driftQueue = createRunningAveragedStat(runningAverageWindowSize); + const taskRunDurationQueue = createMapOfRunningAveragedStats(runningAverageWindowSize); + const resultFrequencyQueue = createRunningAveragedStat(runningAverageWindowSize); + return ( + task: ConcreteTaskInstance, + timing: TaskTiming, + result: TaskRunResult + ): Omit => ({ + duration: taskRunDurationQueue(task.taskType, timing!.stop - timing!.start), + drift: driftQueue(timing!.start - task.runAt.getTime()), + taskRunResultFrequency: resultFrequencyQueue(result), + }); +} + export function summarizeTaskRunStat({ polling: { lastSuccessfulPoll, resultFrequency }, drift, duration, + taskRunResultFrequency, }: TaskRunStat): SummarizedTaskRunStat { return { polling: { @@ -125,5 +146,12 @@ export function summarizeTaskRunStat({ }, drift: calculateRunningAverage(drift), duration: mapValues(duration, (typedDuration) => calculateRunningAverage(typedDuration)), + taskRunResultFrequency: { + [TaskRunResult.Success]: 0, + [TaskRunResult.SuccessRescheduled]: 0, + [TaskRunResult.RetryScheduled]: 0, + [TaskRunResult.Failed]: 0, + ...calculateFrequency(taskRunResultFrequency), + }, }; } diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index b0533e6a52ee..82f1717092df 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -182,6 +182,8 @@ function mockHealthStats(overrides = {}) { timestamp: new Date().toISOString(), value: { drift: [1000, 1000], + duration: [], + taskRunResultFrequency: [], polling: { lastSuccessfulPoll: new Date().toISOString(), resultFrequency: ['NoTasksClaimed', 'NoTasksClaimed', 'NoTasksClaimed'], diff --git a/x-pack/plugins/task_manager/server/task_events.ts b/x-pack/plugins/task_manager/server/task_events.ts index 4982ca7235a7..b011d435e28d 100644 --- a/x-pack/plugins/task_manager/server/task_events.ts +++ b/x-pack/plugins/task_manager/server/task_events.ts @@ -11,6 +11,7 @@ import { ConcreteTaskInstance } from './task'; import { Result, Err } from './lib/result_type'; import { FillPoolResult } from './lib/fill_pool'; import { PollingError } from './polling'; +import { TaskRunResult } from './task_runner'; export enum TaskEventType { TASK_CLAIM = 'TASK_CLAIM', @@ -36,8 +37,16 @@ export interface TaskEvent { type: TaskEventType; event: Result; } +export interface RanTask { + task: ConcreteTaskInstance; + result: TaskRunResult; +} +export type ErroredTask = RanTask & { + error: Error; +}; + export type TaskMarkRunning = TaskEvent; -export type TaskRun = TaskEvent; +export type TaskRun = TaskEvent; export type TaskClaim = TaskEvent>; export type TaskRunRequest = TaskEvent; export type TaskPollingCycle = TaskEvent>; @@ -57,7 +66,7 @@ export function asTaskMarkRunningEvent( export function asTaskRunEvent( id: string, - event: Result, + event: Result, timing?: TaskTiming ): TaskRun { return { diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index decd7291bc0c..52a3beaf174d 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -26,6 +26,7 @@ import { SavedObjectsSerializer, SavedObjectTypeRegistry } from '../../../../src import { mockLogger } from './test_utils'; import { asErr, asOk } from './lib/result_type'; import { ConcreteTaskInstance, TaskLifecycleResult, TaskStatus } from './task'; +import { TaskRunResult } from './task_runner'; import { Middleware } from './lib/middleware'; const savedObjectsClient = savedObjectsRepositoryMock.create(); @@ -284,7 +285,7 @@ describe('TaskManager', () => { const result = awaitTaskRunResult(id, events$, getLifecycle); const task = { id } as ConcreteTaskInstance; - events$.next(asTaskRunEvent(id, asOk(task))); + events$.next(asTaskRunEvent(id, asOk({ task, result: TaskRunResult.Success }))); return expect(result).resolves.toEqual({ id }); }); @@ -299,7 +300,16 @@ describe('TaskManager', () => { const task = { id } as ConcreteTaskInstance; events$.next(asTaskClaimEvent(id, asOk(task))); events$.next(asTaskMarkRunningEvent(id, asOk(task))); - events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); + events$.next( + asTaskRunEvent( + id, + asErr({ + error: new Error('some thing gone wrong'), + task, + result: TaskRunResult.Failed, + }) + ) + ); return expect(result).rejects.toMatchInlineSnapshot( `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` @@ -381,7 +391,7 @@ describe('TaskManager', () => { await expect(result).rejects.toEqual( new Error( - `Failed to run task "${id}" as Task Manager is at capacity, please try again later` + `Failed to run task "${id}": Task Manager is at capacity, please try again later` ) ); expect(getLifecycle).not.toHaveBeenCalled(); @@ -432,9 +442,20 @@ describe('TaskManager', () => { events$.next(asTaskClaimEvent(id, asOk(task))); events$.next(asTaskClaimEvent(differentTask, asOk(otherTask))); - events$.next(asTaskRunEvent(differentTask, asOk(task))); + events$.next( + asTaskRunEvent(differentTask, asOk({ task: otherTask, result: TaskRunResult.Success })) + ); - events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); + events$.next( + asTaskRunEvent( + id, + asErr({ + task, + error: new Error('some thing gone wrong'), + result: TaskRunResult.Failed, + }) + ) + ); return expect(result).rejects.toMatchInlineSnapshot( `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index de182ea02a67..f9fcb2d56739 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -26,11 +26,13 @@ import { TaskClaim, TaskRunRequest, TaskPollingCycle, + ErroredTask, isTaskRunEvent, isTaskClaimEvent, isTaskRunRequestEvent, asTaskRunRequestEvent, asTaskPollingCycleEvent, + RanTask, } from './task_events'; import { fillPool, FillPoolResult } from './lib/fill_pool'; import { addMiddlewareToChain, BeforeSaveMiddlewareParams, Middleware } from './lib/middleware'; @@ -539,26 +541,32 @@ export async function awaitTaskRunResult( ); }, taskEvent.event); } else { - either>( + either< + RanTask | ConcreteTaskInstance | FillPoolResult, + Error | ErroredTask | Option + >( taskEvent.event, - (taskInstance: ConcreteTaskInstance | FillPoolResult) => { + (taskInstance: RanTask | ConcreteTaskInstance | FillPoolResult) => { // resolve if the task has run sucessfully if (isTaskRunEvent(taskEvent)) { subscription.unsubscribe(); - resolve({ id: (taskInstance as ConcreteTaskInstance).id }); + resolve({ id: (taskInstance as RanTask).task.id }); } }, - async (error: Error | Option) => { + async (errorResult: Error | ErroredTask | Option) => { // reject if any error event takes place for the requested task subscription.unsubscribe(); - if (isTaskRunRequestEvent(taskEvent)) { - return reject( - new Error( - `Failed to run task "${taskId}" as Task Manager is at capacity, please try again later` - ) - ); - } - return reject(new Error(`Failed to run task "${taskId}": ${error}`)); + return reject( + new Error( + `Failed to run task "${taskId}"${ + isTaskRunRequestEvent(taskEvent) + ? `: Task Manager is at capacity, please try again later` + : isTaskRunEvent(taskEvent) + ? `: ${(errorResult as ErroredTask).error}` + : `: ${errorResult}` + }` + ) + ); } ); } diff --git a/x-pack/plugins/task_manager/server/task_runner.test.ts b/x-pack/plugins/task_manager/server/task_runner.test.ts index 81fe097f4369..4a1a1bf73d1d 100644 --- a/x-pack/plugins/task_manager/server/task_runner.test.ts +++ b/x-pack/plugins/task_manager/server/task_runner.test.ts @@ -8,9 +8,9 @@ import _ from 'lodash'; import sinon from 'sinon'; import { secondsFromNow } from './lib/intervals'; import { asOk, asErr } from './lib/result_type'; -import { TaskEvent, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; +import { TaskEvent, asTaskRunEvent, asTaskMarkRunningEvent, TaskRun } from './task_events'; import { ConcreteTaskInstance, TaskStatus, TaskDictionary, TaskDefinition } from './task'; -import { TaskManagerRunner } from './task_runner'; +import { TaskManagerRunner, TaskRunResult } from './task_runner'; import { mockLogger } from './test_utils'; import { SavedObjectsErrorHelpers } from '../../../../src/core/server'; import moment from 'moment'; @@ -790,7 +790,9 @@ describe('TaskManagerRunner', () => { await runner.run(); - expect(onTaskEvent).toHaveBeenCalledWith(asTaskRunEvent(id, asOk(instance))); + expect(onTaskEvent).toHaveBeenCalledWith( + withAnyTiming(asTaskRunEvent(id, asOk({ task: instance, result: TaskRunResult.Success }))) + ); }); test('emits TaskEvent when a recurring task is run successfully', async () => { @@ -816,14 +818,16 @@ describe('TaskManagerRunner', () => { await runner.run(); - expect(onTaskEvent).toHaveBeenCalledWith(asTaskRunEvent(id, asOk(instance))); + expect(onTaskEvent).toHaveBeenCalledWith( + withAnyTiming(asTaskRunEvent(id, asOk({ task: instance, result: TaskRunResult.Success }))) + ); }); test('emits TaskEvent when a task run throws an error', async () => { const id = _.random(1, 20).toString(); const error = new Error('Dangit!'); const onTaskEvent = jest.fn(); - const { runner } = testOpts({ + const { runner, instance } = testOpts({ onTaskEvent, instance: { id, @@ -840,7 +844,11 @@ describe('TaskManagerRunner', () => { }); await runner.run(); - expect(onTaskEvent).toHaveBeenCalledWith(asTaskRunEvent(id, asErr(error))); + expect(onTaskEvent).toHaveBeenCalledWith( + withAnyTiming( + asTaskRunEvent(id, asErr({ error, task: instance, result: TaskRunResult.RetryScheduled })) + ) + ); expect(onTaskEvent).toHaveBeenCalledTimes(1); }); @@ -848,7 +856,7 @@ describe('TaskManagerRunner', () => { const id = _.random(1, 20).toString(); const error = new Error('Dangit!'); const onTaskEvent = jest.fn(); - const { runner } = testOpts({ + const { runner, instance } = testOpts({ onTaskEvent, instance: { id, @@ -868,7 +876,11 @@ describe('TaskManagerRunner', () => { await runner.run(); - expect(onTaskEvent).toHaveBeenCalledWith(asTaskRunEvent(id, asErr(error))); + expect(onTaskEvent).toHaveBeenCalledWith( + withAnyTiming( + asTaskRunEvent(id, asErr({ error, task: instance, result: TaskRunResult.RetryScheduled })) + ) + ); expect(onTaskEvent).toHaveBeenCalledTimes(1); }); @@ -876,7 +888,7 @@ describe('TaskManagerRunner', () => { const id = _.random(1, 20).toString(); const error = new Error('Dangit!'); const onTaskEvent = jest.fn(); - const { runner, store } = testOpts({ + const { runner, store, instance: originalInstance } = testOpts({ onTaskEvent, instance: { id, @@ -899,7 +911,18 @@ describe('TaskManagerRunner', () => { const instance = store.update.args[0][0]; expect(instance.status).toBe('failed'); - expect(onTaskEvent).toHaveBeenCalledWith(asTaskRunEvent(id, asErr(error))); + expect(onTaskEvent).toHaveBeenCalledWith( + withAnyTiming( + asTaskRunEvent( + id, + asErr({ + error, + task: originalInstance, + result: TaskRunResult.Failed, + }) + ) + ) + ); expect(onTaskEvent).toHaveBeenCalledTimes(1); }); }); @@ -910,6 +933,13 @@ describe('TaskManagerRunner', () => { onTaskEvent?: (event: TaskEvent) => void; } + function withAnyTiming(taskRun: TaskRun) { + return { + ...taskRun, + timing: { start: expect.any(Number), stop: expect.any(Number) }, + }; + } + function testOpts(opts: TestOpts) { const callCluster = sinon.stub(); const createTaskRunner = sinon.stub(); diff --git a/x-pack/plugins/task_manager/server/task_runner.ts b/x-pack/plugins/task_manager/server/task_runner.ts index 3716e57e928f..c14c1266c4ad 100644 --- a/x-pack/plugins/task_manager/server/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_runner.ts @@ -16,7 +16,7 @@ import { performance } from 'perf_hooks'; import Joi from 'joi'; import { identity, defaults, flow } from 'lodash'; -import { asOk, asErr, mapErr, eitherAsync, unwrap, mapOk, Result } from './lib/result_type'; +import { asOk, asErr, mapErr, eitherAsync, unwrap, isOk, mapOk, Result } from './lib/result_type'; import { TaskRun, TaskMarkRunning, @@ -72,6 +72,21 @@ interface Opts { onTaskEvent?: (event: TaskRun | TaskMarkRunning) => void; } +export enum TaskRunResult { + // Task completed successfully + Success = 'Success', + // Recurring Task completed successfully + SuccessRescheduled = 'Success', + // // Task completed successfully after a retry + // SuccessfulRetry = 'SuccessfulRetry', + // // Recurring Task completed successfully after a retry + // SuccessfulRetryRescheduled = 'SuccessfulRetry', + // Task has failed and a retry has been scheduled + RetryScheduled = 'RetryScheduled', + // Task has failed + Failed = 'Failed', +} + /** * Runs a background task, ensures that errors are properly handled, * allows for cancellation. @@ -350,8 +365,9 @@ export class TaskManagerRunner implements TaskRunner { private async processResultForRecurringTask( result: Result - ): Promise { - const fieldUpdates = flow( + ): Promise { + const hasTaskRunFailed = isOk(result); + const fieldUpdates: Partial & Pick = flow( // if running the task has failed ,try to correct by scheduling a retry in the near future mapErr(this.rescheduleFailedRun), // if retrying is possible (new runAt) or this is an recurring task - reschedule @@ -370,7 +386,7 @@ export class TaskManagerRunner implements TaskRunner { await this.bufferedTaskStore.update( defaults( { - ...(fieldUpdates as Partial), + ...fieldUpdates, // reset fields that track the lifecycle of the concluded `task run` startedAt: null, retryAt: null, @@ -379,9 +395,15 @@ export class TaskManagerRunner implements TaskRunner { this.instance ) ); + + return fieldUpdates.status === TaskStatus.Failed + ? TaskRunResult.Failed + : hasTaskRunFailed + ? TaskRunResult.SuccessRescheduled + : TaskRunResult.RetryScheduled; } - private async processResultWhenDone(): Promise { + private async processResultWhenDone(): Promise { // not a recurring task: clean up by removing the task instance from store try { await this.bufferedTaskStore.remove(this.instance.id); @@ -392,25 +414,38 @@ export class TaskManagerRunner implements TaskRunner { throw err; } } + return TaskRunResult.Success; } private async processResult( result: Result, taskTiming: TaskTiming ): Promise> { + const task = this.instance; await eitherAsync( result, async ({ runAt }: SuccessfulRunResult) => { - if (runAt || this.instance.schedule) { - await this.processResultForRecurringTask(result); - } else { - await this.processResultWhenDone(); - } - this.onTaskEvent(asTaskRunEvent(this.id, asOk(this.instance), taskTiming)); + this.onTaskEvent( + asTaskRunEvent( + this.id, + asOk({ + task, + result: await (runAt || task.schedule + ? this.processResultForRecurringTask(result) + : this.processResultWhenDone()), + }), + taskTiming + ) + ); }, async ({ error }: FailedRunResult) => { - await this.processResultForRecurringTask(result); - this.onTaskEvent(asTaskRunEvent(this.id, asErr(error), taskTiming)); + this.onTaskEvent( + asTaskRunEvent( + this.id, + asErr({ task, result: await this.processResultForRecurringTask(result), error }), + taskTiming + ) + ); } ); return result; From 4a5d652a2c2b423cea5adb811db94808723268e9 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 1 Oct 2020 14:30:56 +0100 Subject: [PATCH 15/67] added success and failure ratio --- .../monitoring/task_run_statistics.test.ts | 6 +- .../server/monitoring/task_run_statistics.ts | 88 ++++++++++++------- .../test_suites/task_manager/health_route.ts | 15 +++- 3 files changed, 71 insertions(+), 38 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index b0a67216927f..fefe0fd62b87 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -110,7 +110,7 @@ describe('Task Run Statistics', () => { windows: Record ) { for (const [type, window] of Object.entries(windows)) { - expect(taskStat.value.duration[type]).toMatchObject({ + expect(taskStat.value.execution.duration[type]).toMatchObject({ mean: Math.round(stats.mean(window)), median: stats.median(window), }); @@ -230,7 +230,9 @@ describe('Task Run Statistics', () => { * At any given time we only keep track of the last X Polling Results * In the tests this is ocnfiugured to a window size of 5 */ - expect(taskStats.map((taskStat) => taskStat.value.taskRunResultFrequency)).toEqual([ + expect( + taskStats.map((taskStat) => taskStat.value.execution.resultFrequency['alerting:test']) + ).toEqual([ // Success { Success: 100, RetryScheduled: 0, Failed: 0 }, // Success, Success, diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index 1708542ed858..5c3c3d12972a 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -35,12 +35,17 @@ interface FillPoolStat extends JsonObject { resultFrequency: FillPoolResult[]; } +interface ExecutionStat extends JsonObject { + duration: Record; + resultFrequency: Record; +} + export interface TaskRunStat extends JsonObject { drift: number[]; - duration: Record; - taskRunResultFrequency: TaskRunResult[]; + execution: ExecutionStat; polling: FillPoolStat | Omit; } + interface FillPoolRawStat extends JsonObject { lastSuccessfulPoll: string; resultFrequency: { @@ -52,12 +57,17 @@ interface FillPoolRawStat extends JsonObject { export interface SummarizedTaskRunStat extends JsonObject { drift: AveragedStat; - duration: Record; - taskRunResultFrequency: { - [TaskRunResult.Success]: number; - [TaskRunResult.SuccessRescheduled]: number; - [TaskRunResult.RetryScheduled]: number; - [TaskRunResult.Failed]: number; + execution: { + duration: Record; + resultFrequency: Record< + string, + { + [TaskRunResult.Success]: number; + [TaskRunResult.SuccessRescheduled]: number; + [TaskRunResult.RetryScheduled]: number; + [TaskRunResult.Failed]: number; + } + >; }; polling: FillPoolRawStat | Omit; } @@ -76,33 +86,35 @@ export function createTaskRunAggregator( ); const resultFrequencyQueue = createRunningAveragedStat(runningAverageWindowSize); - const taskPollingEvents$: Observable = taskManager.events.pipe( + const taskPollingEvents$: Observable> = taskManager.events.pipe( filter( (taskEvent: TaskLifecycleEvent) => isTaskPollingCycleEvent(taskEvent) && isOk(taskEvent.event) ), map((taskEvent: TaskLifecycleEvent) => { return { - lastSuccessfulPoll: new Date().toISOString(), - resultFrequency: resultFrequencyQueue((taskEvent.event as Ok).value), + polling: { + lastSuccessfulPoll: new Date().toISOString(), + resultFrequency: resultFrequencyQueue((taskEvent.event as Ok).value), + }, }; }) ); return combineLatest( - taskRunEvents$.pipe(startWith({ duration: {}, drift: [], taskRunResultFrequency: [] })), + taskRunEvents$.pipe(startWith({ drift: [], execution: { duration: {}, resultFrequency: {} } })), taskPollingEvents$.pipe( startWith({ - resultFrequency: [], + polling: { resultFrequency: [] }, }) ) ).pipe( - map(([taskRun, polling]) => { + map(([taskRun, polling]: [Omit, Pick]) => { return { key: 'runtime', value: { ...taskRun, - polling, + ...polling, }, } as AggregatedStat; }) @@ -116,42 +128,54 @@ function hasTiming(taskEvent: TaskLifecycleEvent) { function createTaskRunEventToStat(runningAverageWindowSize: number) { const driftQueue = createRunningAveragedStat(runningAverageWindowSize); const taskRunDurationQueue = createMapOfRunningAveragedStats(runningAverageWindowSize); - const resultFrequencyQueue = createRunningAveragedStat(runningAverageWindowSize); + const resultFrequencyQueue = createMapOfRunningAveragedStats( + runningAverageWindowSize + ); return ( task: ConcreteTaskInstance, timing: TaskTiming, result: TaskRunResult ): Omit => ({ - duration: taskRunDurationQueue(task.taskType, timing!.stop - timing!.start), drift: driftQueue(timing!.start - task.runAt.getTime()), - taskRunResultFrequency: resultFrequencyQueue(result), + execution: { + duration: taskRunDurationQueue(task.taskType, timing!.stop - timing!.start), + resultFrequency: resultFrequencyQueue(task.taskType, result), + }, }); } +const DEFAULT_TASK_RUN_FREQUENCIES = { + [TaskRunResult.Success]: 0, + [TaskRunResult.SuccessRescheduled]: 0, + [TaskRunResult.RetryScheduled]: 0, + [TaskRunResult.Failed]: 0, +}; +const DEFAULT_POLLING_FREQUENCIES = { + [FillPoolResult.NoTasksClaimed]: 0, + [FillPoolResult.RanOutOfCapacity]: 0, + [FillPoolResult.PoolFilled]: 0, +}; + export function summarizeTaskRunStat({ - polling: { lastSuccessfulPoll, resultFrequency }, + polling: { lastSuccessfulPoll, resultFrequency: pollingResultFrequency }, drift, - duration, - taskRunResultFrequency, + execution: { duration, resultFrequency: executionResultFrequency }, }: TaskRunStat): SummarizedTaskRunStat { return { polling: { ...(lastSuccessfulPoll ? { lastSuccessfulPoll } : {}), resultFrequency: { - [FillPoolResult.NoTasksClaimed]: 0, - [FillPoolResult.RanOutOfCapacity]: 0, - [FillPoolResult.PoolFilled]: 0, - ...calculateFrequency(resultFrequency as FillPoolResult[]), + ...DEFAULT_POLLING_FREQUENCIES, + ...calculateFrequency(pollingResultFrequency as FillPoolResult[]), }, }, drift: calculateRunningAverage(drift), - duration: mapValues(duration, (typedDuration) => calculateRunningAverage(typedDuration)), - taskRunResultFrequency: { - [TaskRunResult.Success]: 0, - [TaskRunResult.SuccessRescheduled]: 0, - [TaskRunResult.RetryScheduled]: 0, - [TaskRunResult.Failed]: 0, - ...calculateFrequency(taskRunResultFrequency), + execution: { + duration: mapValues(duration, (typedDurations) => calculateRunningAverage(typedDurations)), + resultFrequency: mapValues(executionResultFrequency, (typedResultFrequencies) => ({ + ...DEFAULT_TASK_RUN_FREQUENCIES, + ...calculateFrequency(typedResultFrequencies), + })), }, }; } diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index d5fef2852eed..eb00f81bea62 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -26,7 +26,10 @@ interface MonitoringStats { timestamp: string; value: { drift: Record; - duration: Record>; + execution: { + duration: Record>; + resultFrequency: Record>; + }; polling: { lastSuccessfulPoll: string; resultFrequency: Record; @@ -130,7 +133,7 @@ export default function ({ getService }: FtrProviderContext) { const { runtime: { - value: { drift, polling, duration }, + value: { drift, polling, execution }, }, } = (await getHealth()).stats; @@ -142,8 +145,12 @@ export default function ({ getService }: FtrProviderContext) { expect(typeof drift.mean).to.eql('number'); expect(typeof drift.median).to.eql('number'); - expect(typeof duration.sampleTask.mean).to.eql('number'); - expect(typeof duration.sampleTask.median).to.eql('number'); + expect(typeof execution.duration.sampleTask.mean).to.eql('number'); + expect(typeof execution.duration.sampleTask.median).to.eql('number'); + + expect(typeof execution.resultFrequency.sampleTask.Success).to.eql('number'); + expect(typeof execution.resultFrequency.sampleTask.RetryScheduled).to.eql('number'); + expect(typeof execution.resultFrequency.sampleTask.Failed).to.eql('number'); }); }); } From de5a7ac4e10af82ec4731d1c39a21720f3462dbd Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 2 Oct 2020 21:56:08 +0100 Subject: [PATCH 16/67] added schedule density to Task Manager health --- .../task_manager/server/lib/intervals.test.ts | 25 ++ .../task_manager/server/lib/intervals.ts | 6 + .../monitoring/monitoring_stats_stream.ts | 7 +- .../monitoring/workload_statistics.test.ts | 358 +++++++++++++++--- .../server/monitoring/workload_statistics.ts | 183 +++++++-- .../server/queries/aggregation_clauses.ts | 153 +++++++- .../task_manager/server/task_manager.ts | 4 +- .../plugins/task_manager/server/task_store.ts | 53 ++- 8 files changed, 683 insertions(+), 106 deletions(-) diff --git a/x-pack/plugins/task_manager/server/lib/intervals.test.ts b/x-pack/plugins/task_manager/server/lib/intervals.test.ts index 3554f8d8294f..5ce6c33c5797 100644 --- a/x-pack/plugins/task_manager/server/lib/intervals.test.ts +++ b/x-pack/plugins/task_manager/server/lib/intervals.test.ts @@ -12,6 +12,7 @@ import { intervalFromDate, secondsFromNow, secondsFromDate, + asInterval, } from './intervals'; let fakeTimer: sinon.SinonFakeTimers; @@ -51,6 +52,30 @@ describe('taskIntervals', () => { }); }); + describe('asInterval', () => { + test('returns a ms interval when ms duration can only divide by ms', () => { + expect(asInterval(500)).toEqual('500ms'); + expect(asInterval(1500)).toEqual('1500ms'); + expect(asInterval(1001)).toEqual('1001ms'); + expect(asInterval(2001)).toEqual('2001ms'); + expect(asInterval(61001)).toEqual('61001ms'); + expect(asInterval(90001)).toEqual('90001ms'); + }); + + test('returns a seconds interval when ms duration divides by seconds', () => { + expect(asInterval(1000)).toEqual('1s'); + expect(asInterval(2000)).toEqual('2s'); + expect(asInterval(61000)).toEqual('61s'); + expect(asInterval(99000)).toEqual('99s'); + expect(asInterval(90000)).toEqual('90s'); + }); + + test('returns a minutes interval when ms duration divides by minutes', () => { + expect(asInterval(60000)).toEqual('1m'); + expect(asInterval(120000)).toEqual('2m'); + }); + }); + describe('intervalFromNow', () => { test('it returns the current date plus n minutes', () => { const mins = _.random(1, 100); diff --git a/x-pack/plugins/task_manager/server/lib/intervals.ts b/x-pack/plugins/task_manager/server/lib/intervals.ts index 967251e6d717..914bc35bb526 100644 --- a/x-pack/plugins/task_manager/server/lib/intervals.ts +++ b/x-pack/plugins/task_manager/server/lib/intervals.ts @@ -20,6 +20,12 @@ function isCadence(cadence: IntervalCadence | string): cadence is IntervalCadenc return VALID_CADENCE.has(cadence as IntervalCadence); } +export function asInterval(ms: number): string { + const secondsRemainder = ms % 1000; + const minutesRemainder = ms % 60000; + return secondsRemainder ? `${ms}ms` : minutesRemainder ? `${ms / 1000}s` : `${ms / 60000}m`; +} + /** * Returns a date that is the specified interval from now. Currently, * only minute-intervals and second-intervals are supported. diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts index 02ed298a047e..edb22b6d79ae 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -63,7 +63,12 @@ export function createAggregators( ): AggregatedStatProvider { return merge( createTaskRunAggregator(taskManager, config.monitored_stats_running_average_window), - createWorkloadAggregator(taskManager, config.monitored_aggregated_stats_refresh_rate, logger) + createWorkloadAggregator( + taskManager, + config.monitored_aggregated_stats_refresh_rate, + config.poll_interval, + logger + ) ); } diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index 0bcf3abfc760..a95b8d96117d 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -5,27 +5,48 @@ */ import { first, take, bufferCount } from 'rxjs/operators'; -import { createWorkloadAggregator } from './workload_statistics'; +import { createWorkloadAggregator, padBuckets } from './workload_statistics'; import { taskManagerMock } from '../task_manager.mock'; -import { AggregationResult } from '../queries/aggregation_clauses'; +import { AggregationSearchResult, KeyedAggregationBucket } from '../queries/aggregation_clauses'; import { mockLogger } from '../test_utils'; describe('Workload Statistics Aggregator', () => { test('queries the Task Store at a fixed interval for the current workload', async () => { const taskManager = taskManagerMock.create(); taskManager.aggregate.mockResolvedValue(({ - task: { - doc_count: 0, + sum: 0, + aggregations: { taskType: { buckets: [], }, schedule: { buckets: [], }, + idleTasks: { + doc_count: 0, + overdue: { + doc_count: 0, + }, + scheduleDensity: { + buckets: [ + { + key: '2020-10-02T15:18:37.274Z-2020-10-02T15:19:36.274Z', + from: 1.601651917274e12, + from_as_string: '2020-10-02T15:18:37.274Z', + to: 1.601651976274e12, + to_as_string: '2020-10-02T15:19:36.274Z', + doc_count: 0, + histogram: { + buckets: [], + }, + }, + ], + }, + }, }, - } as unknown) as AggregationResult); + } as unknown) as AggregationSearchResult); - const workloadAggregator = createWorkloadAggregator(taskManager, 10, mockLogger()); + const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); return new Promise((resolve) => { workloadAggregator.pipe(first()).subscribe(() => { @@ -44,6 +65,34 @@ describe('Workload Statistics Aggregator', () => { field: 'task.schedule.interval', }, }, + idleTasks: { + filter: { + term: { 'task.status': 'idle' }, + }, + aggs: { + scheduleDensity: { + range: { + field: 'task.runAt', + ranges: [{ from: 'now', to: 'now+1m' }], + }, + aggs: { + histogram: { + date_histogram: { + field: 'task.runAt', + fixed_interval: '3s', + }, + }, + }, + }, + overdue: { + filter: { + range: { + 'task.runAt': { lt: 'now' }, + }, + }, + }, + }, + }, }, }); resolve(); @@ -52,8 +101,8 @@ describe('Workload Statistics Aggregator', () => { }); const mockAggregatedResult = ({ - task: { - doc_count: 4, + sum: 4, + aggregations: { schedule: { doc_count_error_upper_bound: 0, sum_other_doc_count: 0, @@ -120,46 +169,36 @@ describe('Workload Statistics Aggregator', () => { }, ], }, - }, - } as unknown) as AggregationResult; - - function setTaskTypeCount( - result: AggregationResult, - taskType: string, - status: Record - ) { - const buckets = [ - ...result.task.taskType.buckets.filter(({ key }) => key !== taskType), - { - key: taskType, - doc_count: Object.values(status).reduce((sum, count) => sum + count, 0), - status: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, - buckets: Object.entries(status).map(([key, count]) => ({ - key, - doc_count: count, - })), + idleTasks: { + doc_count: 13, + overdue: { + doc_count: 6, }, - }, - ]; - return ({ - task: { - doc_count: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0), - taskType: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, - buckets, + scheduleDensity: { + buckets: [ + mockHistogram(Date.now(), Date.now() + 7 * 3000, Date.now() + 60000, 3000, [ + 2, + 2, + 5, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + ]), + ], }, }, - } as unknown) as AggregationResult; - } + }, + } as unknown) as AggregationSearchResult; test('returns a summary of the workload by task type', async () => { const taskManager = taskManagerMock.create(); taskManager.aggregate.mockResolvedValue(mockAggregatedResult); - const workloadAggregator = createWorkloadAggregator(taskManager, 10, mockLogger()); + const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); return new Promise((resolve) => { workloadAggregator.pipe(first()).subscribe((result) => { @@ -177,6 +216,45 @@ describe('Workload Statistics Aggregator', () => { }); }); + test('returns a count of the overdue workload', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + + const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); + + return new Promise((resolve) => { + workloadAggregator.pipe(first()).subscribe((result) => { + expect(result.key).toEqual('workload'); + expect(result.value).toMatchObject({ + overdue: 6, + }); + resolve(); + }); + }); + }); + + test('returns a histogram of the upcoming workload', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + + const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); + + return new Promise((resolve) => { + workloadAggregator.pipe(first()).subscribe((result) => { + expect(result.key).toEqual('workload'); + expect(result.value).toMatchObject({ + // we have intervals every 3s, so we aggregate buckets 3s apart + // in this mock, Elasticsearch found tasks scheduled in 21 (8th bucket), 24, 27 and 48s seconds from now + // 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57 + // [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0 ] + // Above you see each bucket and the number of scheduled tasks we expect to have in them + scheduleDensity: [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0], + }); + resolve(); + }); + }); + }); + test('recovers from errors fetching the workload', async () => { const taskManager = taskManagerMock.create(); taskManager.aggregate @@ -193,9 +271,9 @@ describe('Workload Statistics Aggregator', () => { }) ); const logger = mockLogger(); - const workloadAggregator = createWorkloadAggregator(taskManager, 10, logger); + const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, logger); - return new Promise((resolve) => { + return new Promise((resolve, reject) => { workloadAggregator.pipe(take(2), bufferCount(2)).subscribe((results) => { expect(results[0].key).toEqual('workload'); expect(results[0].value).toMatchObject({ @@ -216,7 +294,201 @@ describe('Workload Statistics Aggregator', () => { }, }); resolve(); - }); + }, reject); }); }); }); + +describe('padBuckets', () => { + test('returns zeroed out bucklets when there are no buckets in the histogram', async () => { + expect( + padBuckets(10, 3000, { + key: '2020-10-02T19:47:28.128Z-2020-10-02T19:48:28.128Z', + from: 1601668048128, + from_as_string: '2020-10-02T19:47:28.128Z', + to: 1601668108128, + to_as_string: '2020-10-02T19:48:28.128Z', + doc_count: 0, + histogram: { + buckets: [], + }, + }) + ).toEqual([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + }); + + test('pads buckets with zeros to fill out the entire period of time after detected buckets', async () => { + expect( + padBuckets(10, 3000, { + key: '2020-10-02T19:47:28.128Z-2020-10-02T19:48:28.128Z', + from: 1601668048128, + from_as_string: '2020-10-02T19:47:28.128Z', + to: 1601668077128, + to_as_string: '2020-10-02T19:47:57.128Z', + doc_count: 3, + histogram: { + buckets: [ + { + key_as_string: '2020-10-02T19:47:27.000Z', + key: 1601668047000, + doc_count: 1, + }, + { + key_as_string: '2020-10-02T19:47:30.000Z', + key: 1601668050000, + doc_count: 1, + }, + { + key_as_string: '2020-10-02T19:47:33.000Z', + key: 1601668053000, + doc_count: 0, + }, + { + key_as_string: '2020-10-02T19:47:36.000Z', + key: 1601668056000, + doc_count: 0, + }, + { + key_as_string: '2020-10-02T19:47:39.000Z', + key: 1601668059000, + doc_count: 0, + }, + { + key_as_string: '2020-10-02T19:47:42.000Z', + key: 1601668062000, + doc_count: 1, + }, + ], + }, + }) + ).toEqual([1, 1, 0, 0, 0, 1, 0, 0, 0, 0]); + }); + + test('pads buckets with zeros to fill out the entire period of time before detected buckets', async () => { + expect( + padBuckets(10, 3000, { + key: '2020-10-02T20:39:45.793Z-2020-10-02T20:40:14.793Z', + from: 1.601671185793e12, + from_as_string: '2020-10-02T20:39:45.793Z', + to: 1.601671214793e12, + to_as_string: '2020-10-02T20:40:14.793Z', + doc_count: 2, + histogram: { + buckets: [ + { + key_as_string: '2020-10-02T20:40:09.000Z', + key: 1601671209000, + doc_count: 1, + }, + { + key_as_string: '2020-10-02T20:40:12.000Z', + key: 1601671212000, + doc_count: 1, + }, + ], + }, + }) + ).toEqual([0, 0, 0, 0, 0, 0, 0, 0, 1, 1]); + }); + + test('pads buckets with zeros to fill out the entire period surounding the detected buckets', async () => { + expect( + padBuckets(20, 3000, { + key: '2020-10-02T20:39:45.793Z-2020-10-02T20:40:14.793Z', + from: 1.601671185793e12, + from_as_string: '2020-10-02T20:39:45.793Z', + to: 1.1601671244793, + to_as_string: '2020-10-02T20:40:44.793Z', + doc_count: 2, + histogram: { + buckets: [ + { + key_as_string: '2020-10-02T20:40:09.000Z', + key: 1601671209000, + doc_count: 1, + }, + { + key_as_string: '2020-10-02T20:40:12.000Z', + key: 1601671212000, + doc_count: 1, + }, + ], + }, + }) + ).toEqual([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + }); +}); + +function setTaskTypeCount( + { aggregations: { taskType: taskTypeAgg, ...otherAggs } }: AggregationSearchResult, + taskType: string, + status: Record +) { + const buckets = [ + ...(taskTypeAgg.buckets as KeyedAggregationBucket[]).filter(({ key }) => key !== taskType), + { + key: taskType, + doc_count: Object.values(status).reduce((sum, count) => sum + count, 0), + status: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets: Object.entries(status).map(([key, count]) => ({ + key, + doc_count: count, + })), + }, + }, + ]; + return ({ + sum: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0), + aggregations: { + taskType: { + doc_count_error_upper_bound: 0, + sum_other_doc_count: 0, + buckets, + }, + ...otherAggs, + }, + } as unknown) as AggregationSearchResult; +} + +/** * + * This creates a mock histogram as returned by Elasticsearch + * + * @param from lower bound of query + * @param findFrom the timestamp (key) of the first bucket returned + * @param to upper bound of query + * @param interval the duration that each bucket coresponds to + * @param foundBuckets the buckets identified by ES, any buckets missing before or after which + * are still in the date range are assumed to have 0 results, ES only returns 0 for + * buckets that sit in between buckets which do have results + */ +function mockHistogram( + from: number, + findFrom: number, + to: number, + interval: number, + foundBuckets: Array +) { + const fromDate = new Date(from); + const toDate = new Date(to); + return { + from, + from_as_string: fromDate.toISOString(), + to, + to_as_string: toDate.toISOString(), + doc_count: foundBuckets.reduce((sum: number, count) => sum + (count ?? 0), 0), + histogram: { + buckets: foundBuckets.reduce((histogramBuckets, count, index) => { + if (typeof count === 'number') { + const key = new Date(findFrom + index * interval); + histogramBuckets.push({ + key_as_string: key.toISOString(), + key: key.getTime(), + doc_count: count, + }); + } + return histogramBuckets; + }, [] as KeyedAggregationBucket[]), + }, + }; +} diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 669e6af16ea0..061f1a9399bc 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -12,11 +12,18 @@ import { keyBy, mapValues } from 'lodash'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; import { TaskManager } from '../task_manager'; import { - AggregationResult, + AggregationSearchResult, AggregationBucketWithSubAgg, - AggregationBucket, + isBucketedAggregation, + isAggregationBucket, + isKeyedBuckets, + isBucketsWithNumericKey, + aggregationBucketsByKey, + KeyedAggregationBucket, + getStringKeyOfBucket, + RangeAggregationBucket, } from '../queries/aggregation_clauses'; -import { parseIntervalAsSecond } from '../lib/intervals'; +import { parseIntervalAsSecond, asInterval } from '../lib/intervals'; interface StatusStat extends JsonObject { [status: string]: number; @@ -37,8 +44,16 @@ export interface WorkloadStat extends JsonObject { export function createWorkloadAggregator( taskManager: TaskManager, refreshInterval: number, + pollInterval: number, logger: Logger ): AggregatedStatProvider { + // calculate scheduleDensity going two refreshIntervals or 1 minute into into the future + // (the longer of the two) + const scheduleDensityBuckets = Math.max( + Math.round(60000 / pollInterval), + Math.round((refreshInterval * 2) / pollInterval) + ); + return timer(0, refreshInterval).pipe( concatMap(() => taskManager.aggregate({ @@ -54,39 +69,104 @@ export function createWorkloadAggregator( schedule: { terms: { field: 'task.schedule.interval' }, }, + idleTasks: { + filter: { + term: { 'task.status': 'idle' }, + }, + aggs: { + scheduleDensity: { + range: { + field: 'task.runAt', + ranges: [ + { from: `now`, to: `now+${asInterval(scheduleDensityBuckets * pollInterval)}` }, + ], + }, + aggs: { + histogram: { + date_histogram: { + field: 'task.runAt', + fixed_interval: asInterval(pollInterval), + }, + }, + }, + }, + overdue: { + filter: { + range: { + 'task.runAt': { lt: 'now' }, + }, + }, + }, + }, + }, }, }) ), - map(({ task }: AggregationResult<'task' | 'taskType' | 'schedule' | 'status'>) => { - const { - doc_count: sum = 0, - taskType: { buckets: taskTypes = [] } = {}, - schedule: { buckets: schedules = [] } = {}, - } = task; - const summary: WorkloadStat = { + map( + ({ + aggregations, sum, - taskTypes: mapValues( - keyBy>( - taskTypes as Array>, - 'key' - ), - ({ doc_count: docCount, status }) => ({ - sum: docCount, - status: mapValues(keyBy(status.buckets, 'key'), 'doc_count'), - }) - ), - schedule: (schedules as AggregationBucket[]) - .sort( - ({ key: scheduleLeft }, { key: scheduleRight }) => - parseIntervalAsSecond(scheduleLeft) - parseIntervalAsSecond(scheduleRight) + }: AggregationSearchResult< + | 'taskType' + | 'schedule' + | 'status' + | 'scheduleDensity' + | 'histogram' + | 'overdue' + | 'idleTasks' + >) => { + if ( + !isBucketedAggregation(aggregations.taskType) || + !isBucketedAggregation(aggregations.schedule) || + !( + !isBucketedAggregation(aggregations.idleTasks) && + isAggregationBucket(aggregations.idleTasks.overdue) && + isBucketedAggregation(aggregations.idleTasks.scheduleDensity) && + !isKeyedBuckets(aggregations.idleTasks.scheduleDensity.buckets) ) - .map(({ key: schedule, doc_count: count }) => [schedule, count]), - }; - return { - key: 'workload', - value: summary, - }; - }), + ) { + throw new Error(`Invalid workload: ${JSON.stringify({ aggregations, sum })}`); + } + + const { + taskType: { buckets: taskTypes = [] } = {}, + schedule: { buckets: schedules = [] } = {}, + idleTasks: { + overdue: { doc_count: overdue } = { doc_count: 0 }, + scheduleDensity: { buckets: [scheduleDensity] = [] } = {}, + } = {}, + } = aggregations; + + const summary: WorkloadStat = { + sum, + taskTypes: mapValues( + keyBy>( + taskTypes as Array>, + 'key' + ), + ({ doc_count: docCount, status }) => { + return { + sum: docCount, + status: mapValues(aggregationBucketsByKey(status), 'doc_count'), + }; + } + ), + schedule: (schedules as KeyedAggregationBucket[]) + .sort( + (scheduleLeft, scheduleRight) => + parseIntervalAsSecond(getStringKeyOfBucket(scheduleLeft)) - + parseIntervalAsSecond(getStringKeyOfBucket(scheduleRight)) + ) + .map((schedule) => [getStringKeyOfBucket(schedule), schedule.doc_count]), + overdue, + scheduleDensity: padBuckets(scheduleDensityBuckets, pollInterval, scheduleDensity), + }; + return { + key: 'workload', + value: summary, + }; + } + ), catchError((ex: Error, caught) => { logger.error(`[WorkloadAggregator]: ${ex}`); // continue to pull values from the same observable @@ -94,3 +174,44 @@ export function createWorkloadAggregator( }) ); } + +export function padBuckets( + scheduleDensityBuckets: number, + pollInterval: number, + scheduleDensity: unknown +): number[] { + const { histogram, doc_count: docCount, from } = scheduleDensity as AggregationBucketWithSubAgg< + 'histogram', + RangeAggregationBucket + >; + + if ( + docCount && + histogram && + !isKeyedBuckets(histogram.buckets) && + isBucketsWithNumericKey(histogram.buckets) + ) { + const firstBucket = histogram.buckets[0].key; + const bucketsToPadBeforeFirstBucket = bucketsBetween(from, firstBucket, pollInterval); + const bucketsToPadAfterLast = + scheduleDensityBuckets - (bucketsToPadBeforeFirstBucket + histogram.buckets.length); + return [ + ...(bucketsToPadBeforeFirstBucket > 0 + ? new Array(bucketsToPadBeforeFirstBucket).fill(0) + : []), + ...histogram.buckets.map((bucket, index) => bucket.doc_count), + ...(bucketsToPadAfterLast > 0 ? new Array(bucketsToPadAfterLast).fill(0) : []), + ]; + } + return new Array(scheduleDensityBuckets).fill(0); +} + +function bucketsBetween(from: number, to: number, interval: number) { + let fromBound = from; + let count = 0; + while (fromBound <= to) { + fromBound += interval; + count++; + } + return count; +} diff --git a/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts b/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts index 84cd9d6ae2b5..04e5bd9f89ee 100644 --- a/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts +++ b/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts @@ -4,7 +4,8 @@ * you may not use this file except in compliance with the Elastic License. */ -import { TermFilter } from './query_clauses'; +import { keyBy } from 'lodash'; +import { TermFilter, RangeFilter } from './query_clauses'; /** * Terminology @@ -30,7 +31,25 @@ import { TermFilter } from './query_clauses'; * "terms": { "field": "task.status" } * } * } - * } + * }, + * "scheduleDensity": { + * "range": { (3) + * "field": "task.runAt", + * "keyed": true, + * "ranges": [ + * { "key": "overdue", "from": "now-1m", "to": "now" }, + * { "key": "upcoming", "from": "now+1s", "to": "now+1m" } + * ] + * }, + * "aggs": { + * "histogram": { (4) + * "date_histogram": { + * "field": "task.runAt", + * "fixed_interval": "3s" + * } + * } + * } + * } * } * } * } @@ -39,21 +58,48 @@ import { TermFilter } from './query_clauses'; * These are referred to as: * (1). AggregationQuery * (2). TermAggregation + * (3). RangeAggregation + * (4). HistogramAggregation * */ export interface AggregationQuery { - [aggregationName: string]: (TermAggregation | { aggs: AggregationQuery }) & { - filter?: TermFilter; - }; + [aggregationName: string]: TypedAggregation & { aggs?: AggregationQuery }; } +type TypedAggregation = + | TermAggregation + | FilterAggregation + | RangeAggregation + | RangeAggregation + | HistogramAggregation; + interface TermAggregation { terms: { field: string; }; } +interface FilterAggregation { + filter: TermFilter | RangeFilter; +} + +interface RangeAggregation { + range: { + field: string; + keyed?: boolean; + ranges: Array<{ key?: string; from?: string; to?: string }>; + }; +} + +interface HistogramAggregation { + date_histogram: { + field: string; + fixed_interval: string; + keyed?: boolean; + }; +} + /** * Results of an Aggregation */ @@ -66,19 +112,108 @@ export type Aggregation = { }; export interface AggregationBucket { - key: string; doc_count: number; } -export type AggregationBucketWithSubAgg = AggregationBucket & +export function isAggregationBucket(bucket: unknown): bucket is AggregationBucket { + return typeof (bucket as AggregationBucket)?.doc_count === 'number'; +} + +export function isBucketsWithNumericKey( + buckets: AggregationBuckets['buckets'] +): buckets is Array< + AggregationBucket & { + key_as_string: string; + key: number; + } +> { + return ( + !isKeyedBuckets(buckets) && typeof (buckets[0] as KeyedAggregationBucket)?.key === 'number' + ); +} + +export type KeyedAggregationBucket = AggregationBucket & + ( + | { + key: string; + } + | { + key_as_string: string; + key: number; + } + ); + +export function getStringKeyOfBucket(bucket: KeyedAggregationBucket) { + return typeof bucket.key === 'string' + ? bucket.key + : (bucket as { + key_as_string: string; + }).key_as_string; +} + +export interface RangeAggregationBucket { + from: number; + to: number; + doc_count: number; +} + +export type KeyedRangeAggregationBucket = RangeAggregationBucket & { + key: string; +}; + +export function isRangeAggregationBucket(bucket: TypedBucket): bucket is RangeAggregationBucket { + return ( + typeof (bucket as RangeAggregationBucket).to !== 'number' || + typeof (bucket as RangeAggregationBucket).from !== 'number' + ); +} + +type TypedBucket = AggregationBucket | RangeAggregationBucket; +type KeyedTypedBucket = KeyedAggregationBucket | KeyedRangeAggregationBucket; + +export type AggregationBucketWithSubAgg< + Name extends AggregationNames, + AggType extends TypedBucket = TypedBucket +> = AggType & { [innerAggregation in Name]: AggregationBuckets; }; +export type KeyedBuckets = Record< + Name, + TypedBucket | AggregationBucketWithSubAgg +>; + export interface AggregationBuckets { - buckets: AggregationBucket[] | Array>; + buckets: KeyedTypedBucket[] | Array> | KeyedBuckets; +} + +export function isKeyedBuckets( + buckets: AggregationBuckets['buckets'] +): buckets is KeyedBuckets { + return !Array.isArray(buckets); +} + +export function aggregationBucketsByKey({ + buckets, +}: AggregationBuckets): KeyedBuckets { + if (isKeyedBuckets(buckets)) { + return buckets; + } + return keyBy(buckets, 'key') as KeyedBuckets; } export type AggregationResult = { - [aggregationName in Name]: Aggregation; + [aggregationName in Name]: Aggregation | AggregationBuckets; }; + +export function isBucketedAggregation( + aggregation: Aggregation | AggregationBuckets +): aggregation is AggregationBuckets { + return aggregation && Array.isArray((aggregation as AggregationBuckets).buckets); +} + +export interface AggregationSearchResult { + sum: number; + aggregations: AggregationResult; +} diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index f9fcb2d56739..af6c02a60576 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -69,7 +69,7 @@ import { import { identifyEsError } from './lib/identify_es_error'; import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; import { BufferedTaskStore } from './buffered_task_store'; -import { AggregationResult } from './queries/aggregation_clauses'; +import { AggregationSearchResult } from './queries/aggregation_clauses'; const VERSION_CONFLICT_STATUS = 409; @@ -401,7 +401,7 @@ export class TaskManager { */ public async aggregate( opts: AggregationOpts - ): Promise> { + ): Promise> { await this.waitUntilStarted(); return this.store.aggregate(opts); } diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index 17523ee9efb6..804ed1656969 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -60,7 +60,11 @@ import { SortByRunAtAndRetryAt, tasksClaimedByOwner, } from './queries/mark_available_tasks_as_claimed'; -import { AggregationQuery, AggregationResult } from './queries/aggregation_clauses'; +import { + AggregationQuery, + AggregationSearchResult, + AggregationResult, +} from './queries/aggregation_clauses'; export interface StoreOpts { callCluster: ElasticJs; @@ -82,6 +86,7 @@ export interface SearchOpts { export interface AggregationOpts { aggs: AggregationQuery; + query?: object; size?: number; } @@ -467,17 +472,25 @@ export class TaskStore { public async aggregate({ aggs, size = 0, - }: AggregationOpts): Promise> { - const result = await this.callCluster('search', { + }: AggregationOpts): Promise> { + const { + aggregations, + hits: { + total: { value: sum }, + }, + } = (await this.callCluster('search', { index: this.index, ignoreUnavailable: true, - body: { - aggs: ensureAggregationOnlyReturnsTaskObjects(aggs), + body: ensureAggregationOnlyReturnsTaskObjects({ + aggs, size, - }, - }); + }), + })) as { + aggregations: AggregationResult; + hits: { total: { value: number } }; + }; - return (result as { aggregations: AggregationResult }).aggregations; + return { aggregations, sum }; } private async updateByQuery( @@ -559,20 +572,20 @@ function ensureQueryOnlyReturnsTaskObjects(opts: SearchOpts): SearchOpts { }; } -function ensureAggregationOnlyReturnsTaskObjects( - aggs: AggregationOpts['aggs'] -): AggregationOpts['aggs'] { - const filteredAgg: AggregationQuery = { - task: { - filter: { - term: { - type: 'task', - }, - }, - aggs, +function ensureAggregationOnlyReturnsTaskObjects(opts: AggregationOpts): AggregationOpts { + const originalQuery = opts.query; + const filterToOnlyTasks = { + bool: { + filter: [{ term: { type: 'task' } }], }, }; - return filteredAgg; + const query = originalQuery + ? { bool: { must: [filterToOnlyTasks, originalQuery] } } + : filterToOnlyTasks; + return { + ...opts, + query, + }; } function isSavedObjectsUpdateResponse( From 62ceba327ecbb1dcf80008ebef4330189506bfa3 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 09:37:26 +0100 Subject: [PATCH 17/67] added schedule density to test --- .../monitoring/workload_statistics.test.ts | 30 +++++++++---------- .../server/monitoring/workload_statistics.ts | 14 +++++---- .../server/queries/aggregation_clauses.ts | 2 +- .../plugins/task_manager/server/task_store.ts | 4 +-- .../test_suites/task_manager/health_route.ts | 28 ++++++++++++++--- 5 files changed, 50 insertions(+), 28 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index a95b8d96117d..afc9d200d114 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -14,7 +14,7 @@ describe('Workload Statistics Aggregator', () => { test('queries the Task Store at a fixed interval for the current workload', async () => { const taskManager = taskManagerMock.create(); taskManager.aggregate.mockResolvedValue(({ - sum: 0, + count: 0, aggregations: { taskType: { buckets: [], @@ -101,7 +101,7 @@ describe('Workload Statistics Aggregator', () => { }); const mockAggregatedResult = ({ - sum: 4, + count: 4, aggregations: { schedule: { doc_count_error_upper_bound: 0, @@ -204,11 +204,11 @@ describe('Workload Statistics Aggregator', () => { workloadAggregator.pipe(first()).subscribe((result) => { expect(result.key).toEqual('workload'); expect(result.value).toMatchObject({ - sum: 4, + count: 4, taskTypes: { - actions_telemetry: { sum: 2, status: { idle: 2 } }, - alerting_telemetry: { sum: 1, status: { idle: 1 } }, - session_cleanup: { sum: 1, status: { idle: 1 } }, + actions_telemetry: { count: 2, status: { idle: 2 } }, + alerting_telemetry: { count: 1, status: { idle: 1 } }, + session_cleanup: { count: 1, status: { idle: 1 } }, }, }); resolve(); @@ -277,20 +277,20 @@ describe('Workload Statistics Aggregator', () => { workloadAggregator.pipe(take(2), bufferCount(2)).subscribe((results) => { expect(results[0].key).toEqual('workload'); expect(results[0].value).toMatchObject({ - sum: 5, + count: 5, taskTypes: { - actions_telemetry: { sum: 2, status: { idle: 2 } }, - alerting_telemetry: { sum: 2, status: { idle: 2 } }, - session_cleanup: { sum: 1, status: { idle: 1 } }, + actions_telemetry: { count: 2, status: { idle: 2 } }, + alerting_telemetry: { count: 2, status: { idle: 2 } }, + session_cleanup: { count: 1, status: { idle: 1 } }, }, }); expect(results[1].key).toEqual('workload'); expect(results[1].value).toMatchObject({ - sum: 5, + count: 5, taskTypes: { - actions_telemetry: { sum: 2, status: { idle: 2 } }, - alerting_telemetry: { sum: 2, status: { idle: 1, failed: 1 } }, - session_cleanup: { sum: 1, status: { idle: 1 } }, + actions_telemetry: { count: 2, status: { idle: 2 } }, + alerting_telemetry: { count: 2, status: { idle: 1, failed: 1 } }, + session_cleanup: { count: 1, status: { idle: 1 } }, }, }); resolve(); @@ -439,7 +439,7 @@ function setTaskTypeCount( }, ]; return ({ - sum: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0), + count: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0), aggregations: { taskType: { doc_count_error_upper_bound: 0, diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 061f1a9399bc..f050ab94b8fe 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -30,15 +30,17 @@ interface StatusStat extends JsonObject { } interface TaskTypeStat extends JsonObject { [taskType: string]: { - sum: number; + count: number; status: StatusStat; }; } export interface WorkloadStat extends JsonObject { - sum: number; + count: number; taskTypes: TaskTypeStat; schedule: Array<[string, number]>; + overdue: number; + scheduleDensity: number[]; } export function createWorkloadAggregator( @@ -105,7 +107,7 @@ export function createWorkloadAggregator( map( ({ aggregations, - sum, + count, }: AggregationSearchResult< | 'taskType' | 'schedule' @@ -125,7 +127,7 @@ export function createWorkloadAggregator( !isKeyedBuckets(aggregations.idleTasks.scheduleDensity.buckets) ) ) { - throw new Error(`Invalid workload: ${JSON.stringify({ aggregations, sum })}`); + throw new Error(`Invalid workload: ${JSON.stringify({ aggregations, count })}`); } const { @@ -138,7 +140,7 @@ export function createWorkloadAggregator( } = aggregations; const summary: WorkloadStat = { - sum, + count, taskTypes: mapValues( keyBy>( taskTypes as Array>, @@ -146,7 +148,7 @@ export function createWorkloadAggregator( ), ({ doc_count: docCount, status }) => { return { - sum: docCount, + count: docCount, status: mapValues(aggregationBucketsByKey(status), 'doc_count'), }; } diff --git a/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts b/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts index 04e5bd9f89ee..805be0b148b7 100644 --- a/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts +++ b/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts @@ -214,6 +214,6 @@ export function isBucketedAggregation( } export interface AggregationSearchResult { - sum: number; + count: number; aggregations: AggregationResult; } diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index 804ed1656969..ea3aa7170c86 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -476,7 +476,7 @@ export class TaskStore { const { aggregations, hits: { - total: { value: sum }, + total: { value: count }, }, } = (await this.callCluster('search', { index: this.index, @@ -490,7 +490,7 @@ export class TaskStore { hits: { total: { value: number } }; }; - return { aggregations, sum }; + return { aggregations, count }; } private async updateByQuery( diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index eb00f81bea62..188cce9e0cc6 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -20,7 +20,13 @@ interface MonitoringStats { }; workload: { timestamp: string; - value: Record; + value: { + count: number; + taskTypes: Record; + schedule: Array<[string, number]>; + overdue: number; + scheduleDensity: number[]; + }; }; runtime: { timestamp: string; @@ -83,8 +89,8 @@ export default function ({ getService }: FtrProviderContext) { const { workload } = (await getHealth()).stats; const sumSampleTaskInWorkload = (workload.value.taskTypes as { - sampleTask?: { sum: number }; - }).sampleTask?.sum ?? 0; + sampleTask?: { count: number }; + }).sampleTask?.count ?? 0; const scheduledWorkload = (mapValues( keyBy(workload.value.schedule as Array<[string, number]>, ([interval, count]) => interval), ([, count]) => count @@ -107,7 +113,7 @@ export default function ({ getService }: FtrProviderContext) { const workloadAfterScheduling = (await getHealth()).stats.workload.value; expect( - (workloadAfterScheduling.taskTypes as { sampleTask: { sum: number } }).sampleTask.sum + (workloadAfterScheduling.taskTypes as { sampleTask: { count: number } }).sampleTask.count ).to.eql(sumSampleTaskInWorkload + 2); const schedulesWorkloadAfterScheduling = (mapValues( @@ -125,6 +131,20 @@ export default function ({ getService }: FtrProviderContext) { }); }); + it('should return a breakdown of idleTasks in the task manager workload', async () => { + const { + workload: { value: workload }, + } = (await getHealth()).stats; + + expect(typeof workload.overdue).to.eql('number'); + + expect(Array.isArray(workload.scheduleDensity)).to.eql(true); + + // test run with the default poll_interval of 3s and a monitored_aggregated_stats_refresh_rate of 5s, + // so we expect the scheduleDensity to span a minute (which means 20 buckets, as 60s / 3s = 20) + expect(workload.scheduleDensity.length).to.eql(20); + }); + it('should return the task manager runtime stats', async () => { await scheduleTask({ taskType: 'sampleTask', From 1cc826041ba13355404bbb3c4e3338ab9b970ed4 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 09:54:14 +0100 Subject: [PATCH 18/67] added upper bound to schedule density --- .../monitoring/workload_statistics.test.ts | 87 ++++++++++++++++++- .../server/monitoring/workload_statistics.ts | 9 +- 2 files changed, 92 insertions(+), 4 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index afc9d200d114..a7d22c3f5c9d 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -233,7 +233,7 @@ describe('Workload Statistics Aggregator', () => { }); }); - test('returns a histogram of the upcoming workload', async () => { + test('returns a histogram of the upcoming workload for the upcoming minute when refresh rate is high', async () => { const taskManager = taskManagerMock.create(); taskManager.aggregate.mockResolvedValue(mockAggregatedResult); @@ -255,6 +255,91 @@ describe('Workload Statistics Aggregator', () => { }); }); + test('returns a histogram of the upcoming workload for twice refresh rate when rate is low', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + + const workloadAggregator = createWorkloadAggregator(taskManager, 60 * 1000, 3000, mockLogger()); + + return new Promise((resolve) => { + workloadAggregator.pipe(first()).subscribe((result) => { + expect(result.key).toEqual('workload'); + expect(result.value).toMatchObject({ + // same schedule density as in previous test, but window of 40 buckets ((60s refresh * 2) / 3s = 40) + scheduleDensity: [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 2, + 5, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ...new Array(20).fill(0), + ], + }); + resolve(); + }); + }); + }); + + test('returns a histogram of the upcoming workload maxed out at 50 buckets when rate is too low', async () => { + const taskManager = taskManagerMock.create(); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + + const workloadAggregator = createWorkloadAggregator( + taskManager, + 15 * 60 * 1000, + 3000, + mockLogger() + ); + + return new Promise((resolve) => { + workloadAggregator.pipe(first()).subscribe((result) => { + expect(result.key).toEqual('workload'); + expect(result.value).toMatchObject({ + // same schedule density as in previous test, but window of 40 buckets ((60s refresh * 2) / 3s = 40) + scheduleDensity: [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 2, + 5, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + ...new Array(30).fill(0), + ], + }); + resolve(); + }); + }); + }); + test('recovers from errors fetching the workload', async () => { const taskManager = taskManagerMock.create(); taskManager.aggregate diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index f050ab94b8fe..4840ca17f146 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -43,6 +43,9 @@ export interface WorkloadStat extends JsonObject { scheduleDensity: number[]; } +// Set an upper bound just in case a customer sets a really high refresh rate +const MAX_SHCEDULE_DENSITY_BUCKETS = 50; + export function createWorkloadAggregator( taskManager: TaskManager, refreshInterval: number, @@ -51,9 +54,9 @@ export function createWorkloadAggregator( ): AggregatedStatProvider { // calculate scheduleDensity going two refreshIntervals or 1 minute into into the future // (the longer of the two) - const scheduleDensityBuckets = Math.max( - Math.round(60000 / pollInterval), - Math.round((refreshInterval * 2) / pollInterval) + const scheduleDensityBuckets = Math.min( + Math.max(Math.round(60000 / pollInterval), Math.round((refreshInterval * 2) / pollInterval)), + MAX_SHCEDULE_DENSITY_BUCKETS ); return timer(0, refreshInterval).pipe( From 2dca67f4ca44bd0c90507ee6e96d14c40307d06d Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 12:47:53 +0100 Subject: [PATCH 19/67] use APM agg types --- .../apm/typings/elasticsearch/aggregations.ts | 21 ++ .../monitoring/workload_statistics.test.ts | 111 ++++++--- .../server/monitoring/workload_statistics.ts | 202 +++++++++------- .../server/queries/aggregation_clauses.ts | 219 ------------------ .../task_manager/server/task_manager.ts | 8 +- .../plugins/task_manager/server/task_store.ts | 34 +-- 6 files changed, 222 insertions(+), 373 deletions(-) delete mode 100644 x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts diff --git a/x-pack/plugins/apm/typings/elasticsearch/aggregations.ts b/x-pack/plugins/apm/typings/elasticsearch/aggregations.ts index 534321201938..a25782cedc3c 100644 --- a/x-pack/plugins/apm/typings/elasticsearch/aggregations.ts +++ b/x-pack/plugins/apm/typings/elasticsearch/aggregations.ts @@ -145,6 +145,15 @@ export interface AggregationOptionsByType { >; keyed?: boolean; } & AggregationSourceOptions; + range: { + field: string; + ranges: Array< + | { key?: string; from: string | number } + | { key?: string; to: string | number } + | { key?: string; from: string | number; to: string | number } + >; + keyed?: boolean; + }; auto_date_histogram: { buckets: number; } & AggregationSourceOptions; @@ -319,6 +328,18 @@ interface AggregationResponsePart< ? Record : { buckets: DateRangeBucket[] }; }; + range: { + buckets: TAggregationOptionsMap extends { range: { keyed: true } } + ? Record< + string, + DateRangeBucket & + SubAggregationResponseOf + > + : Array< + DateRangeBucket & + SubAggregationResponseOf + >; + }; auto_date_histogram: { buckets: Array< DateHistogramBucket & diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index a7d22c3f5c9d..2bf4acad25d6 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -5,16 +5,37 @@ */ import { first, take, bufferCount } from 'rxjs/operators'; -import { createWorkloadAggregator, padBuckets } from './workload_statistics'; +import { WorkloadAggregation, createWorkloadAggregator, padBuckets } from './workload_statistics'; import { taskManagerMock } from '../task_manager.mock'; -import { AggregationSearchResult, KeyedAggregationBucket } from '../queries/aggregation_clauses'; import { mockLogger } from '../test_utils'; +import { ConcreteTaskInstance } from '../task'; +import { ESSearchResponse } from '../../../apm/typings/elasticsearch'; +import { AggregationResultOf } from '../../../apm/typings/elasticsearch/aggregations'; + +type MockESResult = ESSearchResponse< + ConcreteTaskInstance, + { + body: WorkloadAggregation; + } +>; describe('Workload Statistics Aggregator', () => { test('queries the Task Store at a fixed interval for the current workload', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(({ - count: 0, + taskManager.aggregate.mockResolvedValue({ + hits: { + hits: [], + max_score: 0, + total: { value: 0, relation: 'eq' }, + }, + took: 1, + timed_out: false, + _shards: { + total: 1, + successful: 1, + skipped: 1, + failed: 0, + }, aggregations: { taskType: { buckets: [], @@ -44,7 +65,7 @@ describe('Workload Statistics Aggregator', () => { }, }, }, - } as unknown) as AggregationSearchResult); + } as MockESResult); const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); @@ -100,12 +121,22 @@ describe('Workload Statistics Aggregator', () => { }); }); - const mockAggregatedResult = ({ - count: 4, + const mockAggregatedResult: MockESResult = { + hits: { + hits: [], + max_score: 0, + total: { value: 4, relation: 'eq' }, + }, + took: 1, + timed_out: false, + _shards: { + total: 1, + successful: 1, + skipped: 1, + failed: 0, + }, aggregations: { schedule: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, buckets: [ { key: '3600s', @@ -122,15 +153,11 @@ describe('Workload Statistics Aggregator', () => { ], }, taskType: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, buckets: [ { key: 'actions_telemetry', doc_count: 2, status: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, buckets: [ { key: 'idle', @@ -143,8 +170,6 @@ describe('Workload Statistics Aggregator', () => { key: 'alerting_telemetry', doc_count: 1, status: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, buckets: [ { key: 'idle', @@ -157,8 +182,6 @@ describe('Workload Statistics Aggregator', () => { key: 'session_cleanup', doc_count: 1, status: { - doc_count_error_upper_bound: 0, - sum_other_doc_count: 0, buckets: [ { key: 'idle', @@ -192,11 +215,11 @@ describe('Workload Statistics Aggregator', () => { }, }, }, - } as unknown) as AggregationSearchResult; + }; test('returns a summary of the workload by task type', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); @@ -218,7 +241,7 @@ describe('Workload Statistics Aggregator', () => { test('returns a count of the overdue workload', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); @@ -235,7 +258,7 @@ describe('Workload Statistics Aggregator', () => { test('returns a histogram of the upcoming workload for the upcoming minute when refresh rate is high', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); @@ -257,7 +280,7 @@ describe('Workload Statistics Aggregator', () => { test('returns a histogram of the upcoming workload for twice refresh rate when rate is low', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); const workloadAggregator = createWorkloadAggregator(taskManager, 60 * 1000, 3000, mockLogger()); @@ -297,7 +320,7 @@ describe('Workload Statistics Aggregator', () => { test('returns a histogram of the upcoming workload maxed out at 50 buckets when rate is too low', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); const workloadAggregator = createWorkloadAggregator( taskManager, @@ -504,12 +527,16 @@ describe('padBuckets', () => { }); function setTaskTypeCount( - { aggregations: { taskType: taskTypeAgg, ...otherAggs } }: AggregationSearchResult, + { aggregations }: MockESResult, taskType: string, status: Record ) { + const taskTypes = aggregations!.taskType as AggregationResultOf< + WorkloadAggregation['aggs']['taskType'], + {} + >; const buckets = [ - ...(taskTypeAgg.buckets as KeyedAggregationBucket[]).filter(({ key }) => key !== taskType), + ...taskTypes.buckets.filter(({ key }) => key !== taskType), { key: taskType, doc_count: Object.values(status).reduce((sum, count) => sum + count, 0), @@ -526,14 +553,14 @@ function setTaskTypeCount( return ({ count: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0), aggregations: { + ...aggregations, taskType: { doc_count_error_upper_bound: 0, sum_other_doc_count: 0, buckets, }, - ...otherAggs, }, - } as unknown) as AggregationSearchResult; + } as {}) as MockESResult; } /** * @@ -557,23 +584,31 @@ function mockHistogram( const fromDate = new Date(from); const toDate = new Date(to); return { + key: `${fromDate.toISOString()}-${toDate.toISOString()}`, from, from_as_string: fromDate.toISOString(), to, to_as_string: toDate.toISOString(), doc_count: foundBuckets.reduce((sum: number, count) => sum + (count ?? 0), 0), histogram: { - buckets: foundBuckets.reduce((histogramBuckets, count, index) => { - if (typeof count === 'number') { - const key = new Date(findFrom + index * interval); - histogramBuckets.push({ - key_as_string: key.toISOString(), - key: key.getTime(), - doc_count: count, - }); - } - return histogramBuckets; - }, [] as KeyedAggregationBucket[]), + buckets: foundBuckets.reduce( + (histogramBuckets, count, index) => { + if (typeof count === 'number') { + const key = new Date(findFrom + index * interval); + histogramBuckets.push({ + key_as_string: key.toISOString(), + key: key.getTime(), + doc_count: count, + }); + } + return histogramBuckets; + }, + [] as Array<{ + key_as_string: string; + key: number; + doc_count: number; + }> + ), }, }; } diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 4840ca17f146..5fa2ef11c7a9 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -9,21 +9,12 @@ import { concatMap, map, catchError } from 'rxjs/operators'; import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { keyBy, mapValues } from 'lodash'; +import { ESSearchResponse } from '../../../apm/typings/elasticsearch'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; import { TaskManager } from '../task_manager'; -import { - AggregationSearchResult, - AggregationBucketWithSubAgg, - isBucketedAggregation, - isAggregationBucket, - isKeyedBuckets, - isBucketsWithNumericKey, - aggregationBucketsByKey, - KeyedAggregationBucket, - getStringKeyOfBucket, - RangeAggregationBucket, -} from '../queries/aggregation_clauses'; +import { ConcreteTaskInstance } from '../task'; import { parseIntervalAsSecond, asInterval } from '../lib/intervals'; +import { AggregationResultOf } from '../../../apm/typings/elasticsearch/aggregations'; interface StatusStat extends JsonObject { [status: string]: number; @@ -43,6 +34,56 @@ export interface WorkloadStat extends JsonObject { scheduleDensity: number[]; } +export interface WorkloadAggregation { + aggs: { + taskType: { + terms: { field: string }; + aggs: { + status: { + terms: { field: string }; + }; + }; + }; + schedule: { + terms: { field: string }; + }; + idleTasks: { + filter: { + term: { 'task.status': string }; + }; + aggs: { + scheduleDensity: { + range: { + field: string; + ranges: [{ from: string; to: string }]; + }; + aggs: { + histogram: { + date_histogram: { + field: string; + fixed_interval: string; + }; + }; + }; + }; + overdue: { + filter: { + range: { + 'task.runAt': { lt: string }; + }; + }; + }; + }; + }; + }; +} + +// The type of a bucket in the scheduleDensity range aggregation +type ScheduleDensityResult = AggregationResultOf< + WorkloadAggregation['aggs']['idleTasks']['aggs']['scheduleDensity'], + {} +>['buckets'][0]; + // Set an upper bound just in case a customer sets a really high refresh rate const MAX_SHCEDULE_DENSITY_BUCKETS = 50; @@ -61,7 +102,7 @@ export function createWorkloadAggregator( return timer(0, refreshInterval).pipe( concatMap(() => - taskManager.aggregate({ + taskManager.aggregate({ aggs: { taskType: { terms: { field: 'task.taskType' }, @@ -107,71 +148,65 @@ export function createWorkloadAggregator( }, }) ), - map( - ({ + map((result: ESSearchResponse) => { + const { aggregations, - count, - }: AggregationSearchResult< - | 'taskType' - | 'schedule' - | 'status' - | 'scheduleDensity' - | 'histogram' - | 'overdue' - | 'idleTasks' - >) => { - if ( - !isBucketedAggregation(aggregations.taskType) || - !isBucketedAggregation(aggregations.schedule) || - !( - !isBucketedAggregation(aggregations.idleTasks) && - isAggregationBucket(aggregations.idleTasks.overdue) && - isBucketedAggregation(aggregations.idleTasks.scheduleDensity) && - !isKeyedBuckets(aggregations.idleTasks.scheduleDensity.buckets) - ) - ) { - throw new Error(`Invalid workload: ${JSON.stringify({ aggregations, count })}`); - } - - const { - taskType: { buckets: taskTypes = [] } = {}, - schedule: { buckets: schedules = [] } = {}, - idleTasks: { - overdue: { doc_count: overdue } = { doc_count: 0 }, - scheduleDensity: { buckets: [scheduleDensity] = [] } = {}, - } = {}, - } = aggregations; + hits: { + total: { value: count }, + }, + } = result; - const summary: WorkloadStat = { - count, - taskTypes: mapValues( - keyBy>( - taskTypes as Array>, - 'key' - ), - ({ doc_count: docCount, status }) => { - return { - count: docCount, - status: mapValues(aggregationBucketsByKey(status), 'doc_count'), - }; - } - ), - schedule: (schedules as KeyedAggregationBucket[]) - .sort( - (scheduleLeft, scheduleRight) => - parseIntervalAsSecond(getStringKeyOfBucket(scheduleLeft)) - - parseIntervalAsSecond(getStringKeyOfBucket(scheduleRight)) - ) - .map((schedule) => [getStringKeyOfBucket(schedule), schedule.doc_count]), - overdue, - scheduleDensity: padBuckets(scheduleDensityBuckets, pollInterval, scheduleDensity), - }; - return { - key: 'workload', - value: summary, - }; + if ( + !( + aggregations?.taskType && + aggregations?.schedule && + aggregations?.idleTasks?.overdue && + aggregations?.idleTasks?.scheduleDensity + ) + ) { + throw new Error(`Invalid workload: ${JSON.stringify({ aggregations, count })}`); } - ), + + const taskTypes = (aggregations.taskType as AggregationResultOf< + WorkloadAggregation['aggs']['taskType'], + {} + >).buckets; + const schedules = (aggregations.schedule as AggregationResultOf< + WorkloadAggregation['aggs']['schedule'], + {} + >).buckets; + + const { + overdue: { doc_count: overdue }, + scheduleDensity: { buckets: [scheduleDensity] = [] } = {}, + } = aggregations.idleTasks as AggregationResultOf< + WorkloadAggregation['aggs']['idleTasks'], + {} + >; + + const summary: WorkloadStat = { + count, + taskTypes: mapValues(keyBy(taskTypes, 'key'), ({ doc_count: docCount, status }) => { + return { + count: docCount, + status: mapValues(keyBy(status, 'key'), 'doc_count'), + }; + }), + schedule: schedules + .sort( + (scheduleLeft, scheduleRight) => + parseIntervalAsSecond(scheduleLeft.key as string) - + parseIntervalAsSecond(scheduleRight.key as string) + ) + .map((schedule) => [schedule.key as string, schedule.doc_count]), + overdue, + scheduleDensity: padBuckets(scheduleDensityBuckets, pollInterval, scheduleDensity), + }; + return { + key: 'workload', + value: summary, + }; + }), catchError((ex: Error, caught) => { logger.error(`[WorkloadAggregator]: ${ex}`); // continue to pull values from the same observable @@ -183,19 +218,10 @@ export function createWorkloadAggregator( export function padBuckets( scheduleDensityBuckets: number, pollInterval: number, - scheduleDensity: unknown + scheduleDensity: ScheduleDensityResult ): number[] { - const { histogram, doc_count: docCount, from } = scheduleDensity as AggregationBucketWithSubAgg< - 'histogram', - RangeAggregationBucket - >; - - if ( - docCount && - histogram && - !isKeyedBuckets(histogram.buckets) && - isBucketsWithNumericKey(histogram.buckets) - ) { + if (scheduleDensity.from && scheduleDensity.histogram?.buckets?.length) { + const { histogram, from } = scheduleDensity; const firstBucket = histogram.buckets[0].key; const bucketsToPadBeforeFirstBucket = bucketsBetween(from, firstBucket, pollInterval); const bucketsToPadAfterLast = @@ -204,7 +230,7 @@ export function padBuckets( ...(bucketsToPadBeforeFirstBucket > 0 ? new Array(bucketsToPadBeforeFirstBucket).fill(0) : []), - ...histogram.buckets.map((bucket, index) => bucket.doc_count), + ...histogram.buckets.map((bucket) => bucket.doc_count), ...(bucketsToPadAfterLast > 0 ? new Array(bucketsToPadAfterLast).fill(0) : []), ]; } diff --git a/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts b/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts deleted file mode 100644 index 805be0b148b7..000000000000 --- a/x-pack/plugins/task_manager/server/queries/aggregation_clauses.ts +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -import { keyBy } from 'lodash'; -import { TermFilter, RangeFilter } from './query_clauses'; - -/** - * Terminology - * =========== - * The terms for the different clauses in an Elasticsearch query aggregation can be confusing, here are some - * clarifications that might help you understand the Typescript types we use here. - * - * Given the following Aggregation: - * { - * "size": 0, - * "aggs": { (1) - * "task": { - * "filter": { - * "term": { - * "type": "task" - * } - * }, - * "aggs": { (1) - * "taskType": { (2) - * "terms": { "field": "task.taskType" }, - * "aggs": { - * "status": { (2) - * "terms": { "field": "task.status" } - * } - * } - * }, - * "scheduleDensity": { - * "range": { (3) - * "field": "task.runAt", - * "keyed": true, - * "ranges": [ - * { "key": "overdue", "from": "now-1m", "to": "now" }, - * { "key": "upcoming", "from": "now+1s", "to": "now+1m" } - * ] - * }, - * "aggs": { - * "histogram": { (4) - * "date_histogram": { - * "field": "task.runAt", - * "fixed_interval": "3s" - * } - * } - * } - * } - * } - * } - * } - * } - * - * These are referred to as: - * (1). AggregationQuery - * (2). TermAggregation - * (3). RangeAggregation - * (4). HistogramAggregation - * - */ - -export interface AggregationQuery { - [aggregationName: string]: TypedAggregation & { aggs?: AggregationQuery }; -} - -type TypedAggregation = - | TermAggregation - | FilterAggregation - | RangeAggregation - | RangeAggregation - | HistogramAggregation; - -interface TermAggregation { - terms: { - field: string; - }; -} - -interface FilterAggregation { - filter: TermFilter | RangeFilter; -} - -interface RangeAggregation { - range: { - field: string; - keyed?: boolean; - ranges: Array<{ key?: string; from?: string; to?: string }>; - }; -} - -interface HistogramAggregation { - date_histogram: { - field: string; - fixed_interval: string; - keyed?: boolean; - }; -} - -/** - * Results of an Aggregation - */ -type ReservedNames = 'doc_count'; -type AggregationNames = Exclude; -export type Aggregation = { - doc_count: number; -} & { - [innerAggregation in Name]: AggregationBuckets; -}; - -export interface AggregationBucket { - doc_count: number; -} - -export function isAggregationBucket(bucket: unknown): bucket is AggregationBucket { - return typeof (bucket as AggregationBucket)?.doc_count === 'number'; -} - -export function isBucketsWithNumericKey( - buckets: AggregationBuckets['buckets'] -): buckets is Array< - AggregationBucket & { - key_as_string: string; - key: number; - } -> { - return ( - !isKeyedBuckets(buckets) && typeof (buckets[0] as KeyedAggregationBucket)?.key === 'number' - ); -} - -export type KeyedAggregationBucket = AggregationBucket & - ( - | { - key: string; - } - | { - key_as_string: string; - key: number; - } - ); - -export function getStringKeyOfBucket(bucket: KeyedAggregationBucket) { - return typeof bucket.key === 'string' - ? bucket.key - : (bucket as { - key_as_string: string; - }).key_as_string; -} - -export interface RangeAggregationBucket { - from: number; - to: number; - doc_count: number; -} - -export type KeyedRangeAggregationBucket = RangeAggregationBucket & { - key: string; -}; - -export function isRangeAggregationBucket(bucket: TypedBucket): bucket is RangeAggregationBucket { - return ( - typeof (bucket as RangeAggregationBucket).to !== 'number' || - typeof (bucket as RangeAggregationBucket).from !== 'number' - ); -} - -type TypedBucket = AggregationBucket | RangeAggregationBucket; -type KeyedTypedBucket = KeyedAggregationBucket | KeyedRangeAggregationBucket; - -export type AggregationBucketWithSubAgg< - Name extends AggregationNames, - AggType extends TypedBucket = TypedBucket -> = AggType & - { - [innerAggregation in Name]: AggregationBuckets; - }; - -export type KeyedBuckets = Record< - Name, - TypedBucket | AggregationBucketWithSubAgg ->; - -export interface AggregationBuckets { - buckets: KeyedTypedBucket[] | Array> | KeyedBuckets; -} - -export function isKeyedBuckets( - buckets: AggregationBuckets['buckets'] -): buckets is KeyedBuckets { - return !Array.isArray(buckets); -} - -export function aggregationBucketsByKey({ - buckets, -}: AggregationBuckets): KeyedBuckets { - if (isKeyedBuckets(buckets)) { - return buckets; - } - return keyBy(buckets, 'key') as KeyedBuckets; -} - -export type AggregationResult = { - [aggregationName in Name]: Aggregation | AggregationBuckets; -}; - -export function isBucketedAggregation( - aggregation: Aggregation | AggregationBuckets -): aggregation is AggregationBuckets { - return aggregation && Array.isArray((aggregation as AggregationBuckets).buckets); -} - -export interface AggregationSearchResult { - count: number; - aggregations: AggregationResult; -} diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index af6c02a60576..af1d7cbe22d6 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -12,6 +12,7 @@ import { performance } from 'perf_hooks'; import { pipe } from 'fp-ts/lib/pipeable'; import { Option, some, map as mapOptional, getOrElse } from 'fp-ts/lib/Option'; +import { ESSearchResponse } from '../../apm/typings/elasticsearch'; import { SavedObjectsSerializer, ILegacyScopedClusterClient, @@ -69,7 +70,6 @@ import { import { identifyEsError } from './lib/identify_es_error'; import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; import { BufferedTaskStore } from './buffered_task_store'; -import { AggregationSearchResult } from './queries/aggregation_clauses'; const VERSION_CONFLICT_STATUS = 409; @@ -399,11 +399,11 @@ export class TaskManager { * @param opts - The query options used to filter tasks * @returns {Promise} */ - public async aggregate( + public async aggregate( opts: AggregationOpts - ): Promise> { + ): Promise> { await this.waitUntilStarted(); - return this.store.aggregate(opts); + return this.store.aggregate(opts); } /** diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index ea3aa7170c86..c2fe44625ee8 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -60,11 +60,8 @@ import { SortByRunAtAndRetryAt, tasksClaimedByOwner, } from './queries/mark_available_tasks_as_claimed'; -import { - AggregationQuery, - AggregationSearchResult, - AggregationResult, -} from './queries/aggregation_clauses'; + +import { ESSearchResponse, ESSearchBody, ESSearchRequest } from '../../apm/typings/elasticsearch'; export interface StoreOpts { callCluster: ElasticJs; @@ -84,11 +81,8 @@ export interface SearchOpts { search_after?: unknown[]; } -export interface AggregationOpts { - aggs: AggregationQuery; - query?: object; - size?: number; -} +export type AggregationOpts = Pick, 'aggs'> & + Pick; export interface UpdateByQuerySearchOpts extends SearchOpts { script?: object; @@ -469,28 +463,20 @@ export class TaskStore { }; } - public async aggregate({ + public async aggregate({ aggs, + query, size = 0, - }: AggregationOpts): Promise> { - const { - aggregations, - hits: { - total: { value: count }, - }, - } = (await this.callCluster('search', { + }: AggregationOpts) { + return this.callCluster('search', { index: this.index, ignoreUnavailable: true, body: ensureAggregationOnlyReturnsTaskObjects({ + query, aggs, size, }), - })) as { - aggregations: AggregationResult; - hits: { total: { value: number } }; - }; - - return { aggregations, count }; + }) as Promise>; } private async updateByQuery( From ae15dc62969b135f05fdbc552a4e5a7295532049 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 13:05:32 +0100 Subject: [PATCH 20/67] fixed tests --- .../monitoring/workload_statistics.test.ts | 4 +++- .../server/monitoring/workload_statistics.ts | 4 ++-- .../task_manager/server/routes/health.test.ts | 17 +++++++++++------ .../plugins/task_manager/server/task_store.ts | 2 +- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index 2bf4acad25d6..0714401bdf4e 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -551,7 +551,9 @@ function setTaskTypeCount( }, ]; return ({ - count: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0), + hits: { + total: { value: buckets.reduce((sum, bucket) => sum + bucket.doc_count, 0) }, + }, aggregations: { ...aggregations, taskType: { diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 5fa2ef11c7a9..28f412ce7cf4 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -164,7 +164,7 @@ export function createWorkloadAggregator( aggregations?.idleTasks?.scheduleDensity ) ) { - throw new Error(`Invalid workload: ${JSON.stringify({ aggregations, count })}`); + throw new Error(`Invalid workload: ${JSON.stringify(result)}`); } const taskTypes = (aggregations.taskType as AggregationResultOf< @@ -189,7 +189,7 @@ export function createWorkloadAggregator( taskTypes: mapValues(keyBy(taskTypes, 'key'), ({ doc_count: docCount, status }) => { return { count: docCount, - status: mapValues(keyBy(status, 'key'), 'doc_count'), + status: mapValues(keyBy(status.buckets, 'key'), 'doc_count'), }; }), schedule: schedules diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 82f1717092df..52efa97ea400 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -170,20 +170,25 @@ function mockHealthStats(overrides = {}) { workload: { timestamp: new Date().toISOString(), value: { - sum: 4, + count: 4, taskTypes: { - actions_telemetry: { sum: 2, status: { idle: 2 } }, - alerting_telemetry: { sum: 1, status: { idle: 1 } }, - session_cleanup: { sum: 1, status: { idle: 1 } }, + actions_telemetry: { count: 2, status: { idle: 2 } }, + alerting_telemetry: { count: 1, status: { idle: 1 } }, + session_cleanup: { count: 1, status: { idle: 1 } }, }, + schedule: {}, + overdue: 0, + scheduleDensity: [], }, }, runtime: { timestamp: new Date().toISOString(), value: { drift: [1000, 1000], - duration: [], - taskRunResultFrequency: [], + execution: { + duration: [], + resultFrequency: [], + }, polling: { lastSuccessfulPoll: new Date().toISOString(), resultFrequency: ['NoTasksClaimed', 'NoTasksClaimed', 'NoTasksClaimed'], diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index c2fe44625ee8..af9397093774 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -61,7 +61,7 @@ import { tasksClaimedByOwner, } from './queries/mark_available_tasks_as_claimed'; -import { ESSearchResponse, ESSearchBody, ESSearchRequest } from '../../apm/typings/elasticsearch'; +import { ESSearchResponse, ESSearchBody } from '../../apm/typings/elasticsearch'; export interface StoreOpts { callCluster: ElasticJs; From 734cb12c86cbbfadd036cf5670ef9315f7c4ffab Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 13:58:18 +0100 Subject: [PATCH 21/67] fixed mock import --- .../alerts/server/alerts_client_conflict_retries.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts b/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts index 1c5edb45c80f..b1ac5ac4c678 100644 --- a/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts @@ -8,7 +8,7 @@ import { cloneDeep } from 'lodash'; import { AlertsClient, ConstructorOptions } from './alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { alertTypeRegistryMock } from './alert_type_registry.mock'; import { alertsAuthorizationMock } from './authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../encrypted_saved_objects/server/mocks'; @@ -25,7 +25,7 @@ const MockAlertId = 'alert-id'; const ConflictAfterRetries = RetryForConflictsAttempts + 1; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); From db869863363f0f53332d1850eb2764a60025e61a Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 16:11:16 +0100 Subject: [PATCH 22/67] added status on health api --- x-pack/plugins/task_manager/server/plugin.ts | 6 +- .../task_manager/server/routes/health.test.ts | 67 ++++++++++++++--- .../task_manager/server/routes/health.ts | 75 ++++++++++++------- 3 files changed, 109 insertions(+), 39 deletions(-) diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index f53418aec05a..fd922bb0da78 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -55,8 +55,10 @@ export class TaskManagerPlugin router, this.taskManager.then((tm) => createMonitoringStats(tm, config, logger)), logger, - // if health is any more stale than the pollInterval (+1s buffer) consider the system unhealthy - config.poll_interval + 1000 + // if "hot" health stats are any more stale than the pollInterval (+1s buffer) consider the system unhealthy + config.poll_interval + 1000, + // if "cold" health stats are any more stale than the configured refresh, consider the system unhealthy + config.monitored_aggregated_stats_refresh_rate ); return { diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 52efa97ea400..921acb31451f 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -39,7 +39,7 @@ describe('healthRoute', () => { const stats = Promise.resolve(new Subject()); - healthRoute(router, stats, logger, 1000); + healthRoute(router, stats, logger, 1000, 60000); const stats$ = await stats; @@ -59,11 +59,11 @@ describe('healthRoute', () => { expect(logger.debug).toHaveBeenCalledTimes(2); }); - it('returns an error response if the stats are no longer fresh', async () => { + it('returns a red status if the stats have not been updated within the required hot freshness', async () => { const router = httpServiceMock.createRouter(); const mockStat = mockHealthStats(); - healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000); + healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000, 60000); const [, handler] = router.get.mock.calls[0]; @@ -73,7 +73,8 @@ describe('healthRoute', () => { expect(await handler(context, req, res)).toMatchObject({ body: { - attributes: summarizeMonitoringStats( + status: 'red', + ...summarizeMonitoringStats( mockHealthStats({ lastUpdate: expect.any(String), stats: { @@ -94,12 +95,58 @@ describe('healthRoute', () => { }, }) ), - message: new Error('Task Manager monitored stats are out of date'), }, }); }); - it('returns an error response if the poller hasnt polled within the required freshness', async () => { + it('returns a red status if the workload stats have not been updated within the required cold freshness', async () => { + const router = httpServiceMock.createRouter(); + + const lastUpdateOfWorkload = new Date(Date.now() - 120000).toISOString(); + const mockStat = mockHealthStats({ + stats: { + workload: { + timestamp: lastUpdateOfWorkload, + }, + }, + }); + healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 5000, 60000); + + const [, handler] = router.get.mock.calls[0]; + + const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); + + await sleep(2000); + + expect(await handler(context, req, res)).toMatchObject({ + body: { + status: 'red', + ...summarizeMonitoringStats( + mockHealthStats({ + lastUpdate: expect.any(String), + stats: { + configuration: { + timestamp: expect.any(String), + }, + workload: { + timestamp: expect.any(String), + }, + runtime: { + timestamp: expect.any(String), + value: { + polling: { + lastSuccessfulPoll: expect.any(String), + }, + }, + }, + }, + }) + ), + }, + }); + }); + + it('returns a red status if the poller hasnt polled within the required hot freshness', async () => { const router = httpServiceMock.createRouter(); const lastSuccessfulPoll = new Date(Date.now() - 2000).toISOString(); @@ -114,7 +161,7 @@ describe('healthRoute', () => { }, }, }); - healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000); + healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000, 60000); const [, handler] = router.get.mock.calls[0]; @@ -122,7 +169,8 @@ describe('healthRoute', () => { expect(await handler(context, req, res)).toMatchObject({ body: { - attributes: summarizeMonitoringStats( + status: 'red', + ...summarizeMonitoringStats( mockHealthStats({ lastUpdate: expect.any(String), stats: { @@ -143,7 +191,6 @@ describe('healthRoute', () => { }, }) ), - message: new Error('Task Manager monitored stats are out of date'), }, }); }); @@ -184,7 +231,7 @@ function mockHealthStats(overrides = {}) { runtime: { timestamp: new Date().toISOString(), value: { - drift: [1000, 1000], + drift: [1000, 60000], execution: { duration: [], resultFrequency: [], diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index d48775803c78..0eb34d6960aa 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -15,18 +15,25 @@ import { Logger } from 'src/core/server'; import { Observable } from 'rxjs'; import { take } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; -import { isString } from 'lodash'; +import { isString, isNumber } from 'lodash'; import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; +enum HealthStatus { + Green = 'green', + Yellow = 'yellow', + Red = 'red', +} + export function healthRoute( router: IRouter, monitoringStats: Promise>, logger: Logger, - requiredFreshness: number + requiredHotStatsFreshness: number, + requiredColdStatsFreshness: number ) { /* Log Task Manager stats as a Debug log line at a fixed interval */ monitoringStats.then((monitoringStats$) => { - monitoringStats$.pipe(throttleTime(requiredFreshness)).subscribe((stats) => { + monitoringStats$.pipe(throttleTime(requiredHotStatsFreshness)).subscribe((stats) => { logger.debug(JSON.stringify(summarizeMonitoringStats(stats))); }); }); @@ -46,40 +53,54 @@ export function healthRoute( const timestamp = new Date(now).toISOString(); /** - * If the monitored stats aren't fresh, return an `500 internalError` with - * the stats in the body of the api call. This makes it easier for monitoring - * services to mark the service as broken + * If the monitored stats aren't fresh, return a red status */ - if ( - now - - getOldestTimestamp( - stats.lastUpdate, - stats.stats.runtime?.value.polling.lastSuccessfulPoll - ) > - requiredFreshness - ) { - return res.internalError({ - body: { - message: new Error('Task Manager monitored stats are out of date'), - attributes: { timestamp, ...summarizeMonitoringStats(stats) }, - }, - }); - } + const healthStatus = + hasExpiredHotTimestamps(stats, now, requiredHotStatsFreshness) || + hasExpiredColdTimestamps(stats, now, requiredColdStatsFreshness) + ? HealthStatus.Red + : HealthStatus.Green; + return res.ok({ - body: { timestamp, ...summarizeMonitoringStats(stats) }, + body: { timestamp, status: healthStatus, ...summarizeMonitoringStats(stats) }, }); } ); } -function getOldestTimestamp(...timestamps: unknown[]): number { - return Math.min( - ...timestamps - .map((timestamp) => (isString(timestamp) ? Date.parse(timestamp) : NaN)) - .filter((timestamp) => !isNaN(timestamp)) +/** + * If certain "hot" stats are not fresh, then the _health api will should return a Red status + * @param stats The monitored stats + * @param now The time to compare against + * @param requiredFreshness How fresh should these stats be + */ +function hasExpiredHotTimestamps( + stats: MonitoringStats, + now: number, + requiredFreshness: number +): boolean { + return ( + now - + getOldestTimestamp(stats.lastUpdate, stats.stats.runtime?.value.polling.lastSuccessfulPoll) > + requiredFreshness ); } +function hasExpiredColdTimestamps( + stats: MonitoringStats, + now: number, + requiredFreshness: number +): boolean { + return now - getOldestTimestamp(stats.stats.workload?.timestamp) > requiredFreshness; +} + +function getOldestTimestamp(...timestamps: unknown[]): number { + const validTimestamps = timestamps + .map((timestamp) => (isString(timestamp) ? Date.parse(timestamp) : NaN)) + .filter((timestamp) => !isNaN(timestamp)); + return validTimestamps.length ? Math.min(...validTimestamps) : 0; +} + async function getLatestStats(monitoringStats$: Observable) { return new Promise((resolve) => monitoringStats$.pipe(take(1)).subscribe((stats) => resolve(stats)) From 4ca65c3ec5448cbf7606c90d7599a89b100f2639 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 16:28:05 +0100 Subject: [PATCH 23/67] test status in aceptancve tests --- .../test_suites/task_manager/health_route.ts | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index 188cce9e0cc6..243ea3084dc4 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -75,7 +75,9 @@ export default function ({ getService }: FtrProviderContext) { describe('health', () => { it('should return basic configuration of task manager', async () => { - expect((await getHealth()).stats.configuration.value).to.eql({ + const health = await getHealth(); + expect(health.status).to.eql('green'); + expect(health.stats.configuration.value).to.eql({ poll_interval: 3000, max_poll_inactivity_cycles: 10, monitored_aggregated_stats_refresh_rate: monitoredAggregatedStatsRefreshRate, @@ -86,7 +88,14 @@ export default function ({ getService }: FtrProviderContext) { }); it('should return the task manager workload', async () => { - const { workload } = (await getHealth()).stats; + const health = await getHealth(); + const { + status, + stats: { workload }, + } = health; + + expect(status).to.eql('green'); + const sumSampleTaskInWorkload = (workload.value.taskTypes as { sampleTask?: { count: number }; From 44cb5789285216828e4cf8478bb2619c2f4ca4c2 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 18:54:02 +0100 Subject: [PATCH 24/67] corrected types --- x-pack/plugins/task_manager/server/plugin.ts | 2 +- x-pack/plugins/task_manager/server/routes/health.test.ts | 2 +- x-pack/plugins/task_manager/server/routes/health.ts | 2 +- .../test_suites/task_manager/health_route.ts | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index fd922bb0da78..5f627fee85f8 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -58,7 +58,7 @@ export class TaskManagerPlugin // if "hot" health stats are any more stale than the pollInterval (+1s buffer) consider the system unhealthy config.poll_interval + 1000, // if "cold" health stats are any more stale than the configured refresh, consider the system unhealthy - config.monitored_aggregated_stats_refresh_rate + config.monitored_aggregated_stats_refresh_rate + 1000 ); return { diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 921acb31451f..289a3a3b605c 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -20,7 +20,7 @@ describe('healthRoute', () => { it('registers the route', async () => { const router = httpServiceMock.createRouter(); - healthRoute(router, Promise.resolve(of()), mockLogger(), 1000); + healthRoute(router, Promise.resolve(of()), mockLogger(), 1000, 1000); const [config] = router.get.mock.calls[0]; diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 0eb34d6960aa..987f1cdbb006 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -15,7 +15,7 @@ import { Logger } from 'src/core/server'; import { Observable } from 'rxjs'; import { take } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; -import { isString, isNumber } from 'lodash'; +import { isString } from 'lodash'; import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; enum HealthStatus { diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index 243ea3084dc4..f70c4253f79f 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -13,6 +13,7 @@ import { ConcreteTaskInstance } from '../../../../plugins/task_manager/server'; interface MonitoringStats { lastUpdate: string; + status: string; stats: { configuration: { timestamp: string; From d29c866d23b9d7a9ff43eb48b98cdc9ebe86c64d Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 5 Oct 2020 19:29:41 +0100 Subject: [PATCH 25/67] change RGY to OK Error and Warn --- .../task_manager/server/routes/health.test.ts | 12 +++--- .../task_manager/server/routes/health.ts | 39 ++++++++++--------- .../test_suites/task_manager/health_route.ts | 4 +- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 289a3a3b605c..d2e2ee707ffa 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -59,7 +59,7 @@ describe('healthRoute', () => { expect(logger.debug).toHaveBeenCalledTimes(2); }); - it('returns a red status if the stats have not been updated within the required hot freshness', async () => { + it('returns a error status if the stats have not been updated within the required hot freshness', async () => { const router = httpServiceMock.createRouter(); const mockStat = mockHealthStats(); @@ -73,7 +73,7 @@ describe('healthRoute', () => { expect(await handler(context, req, res)).toMatchObject({ body: { - status: 'red', + status: 'error', ...summarizeMonitoringStats( mockHealthStats({ lastUpdate: expect.any(String), @@ -99,7 +99,7 @@ describe('healthRoute', () => { }); }); - it('returns a red status if the workload stats have not been updated within the required cold freshness', async () => { + it('returns a error status if the workload stats have not been updated within the required cold freshness', async () => { const router = httpServiceMock.createRouter(); const lastUpdateOfWorkload = new Date(Date.now() - 120000).toISOString(); @@ -120,7 +120,7 @@ describe('healthRoute', () => { expect(await handler(context, req, res)).toMatchObject({ body: { - status: 'red', + status: 'error', ...summarizeMonitoringStats( mockHealthStats({ lastUpdate: expect.any(String), @@ -146,7 +146,7 @@ describe('healthRoute', () => { }); }); - it('returns a red status if the poller hasnt polled within the required hot freshness', async () => { + it('returns a error status if the poller hasnt polled within the required hot freshness', async () => { const router = httpServiceMock.createRouter(); const lastSuccessfulPoll = new Date(Date.now() - 2000).toISOString(); @@ -169,7 +169,7 @@ describe('healthRoute', () => { expect(await handler(context, req, res)).toMatchObject({ body: { - status: 'red', + status: 'error', ...summarizeMonitoringStats( mockHealthStats({ lastUpdate: expect.any(String), diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 987f1cdbb006..36e7a20e3bb3 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -19,9 +19,9 @@ import { isString } from 'lodash'; import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; enum HealthStatus { - Green = 'green', - Yellow = 'yellow', - Red = 'red', + OK = 'OK', + Warning = 'warn', + Error = 'error', } export function healthRoute( @@ -31,10 +31,26 @@ export function healthRoute( requiredHotStatsFreshness: number, requiredColdStatsFreshness: number ) { + function calculateStatus(stats: MonitoringStats) { + const now = Date.now(); + const timestamp = new Date(now).toISOString(); + + /** + * If the monitored stats aren't fresh, return a red status + */ + const healthStatus = + hasExpiredHotTimestamps(stats, now, requiredHotStatsFreshness) || + hasExpiredColdTimestamps(stats, now, requiredColdStatsFreshness) + ? HealthStatus.Error + : HealthStatus.OK; + + return { timestamp, status: healthStatus, ...summarizeMonitoringStats(stats) }; + } + /* Log Task Manager stats as a Debug log line at a fixed interval */ monitoringStats.then((monitoringStats$) => { monitoringStats$.pipe(throttleTime(requiredHotStatsFreshness)).subscribe((stats) => { - logger.debug(JSON.stringify(summarizeMonitoringStats(stats))); + logger.debug(JSON.stringify(calculateStatus(stats))); }); }); @@ -48,21 +64,8 @@ export function healthRoute( req: KibanaRequest, res: KibanaResponseFactory ): Promise { - const stats = await getLatestStats(await monitoringStats); - const now = Date.now(); - const timestamp = new Date(now).toISOString(); - - /** - * If the monitored stats aren't fresh, return a red status - */ - const healthStatus = - hasExpiredHotTimestamps(stats, now, requiredHotStatsFreshness) || - hasExpiredColdTimestamps(stats, now, requiredColdStatsFreshness) - ? HealthStatus.Red - : HealthStatus.Green; - return res.ok({ - body: { timestamp, status: healthStatus, ...summarizeMonitoringStats(stats) }, + body: calculateStatus(await getLatestStats(await monitoringStats)), }); } ); diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index f70c4253f79f..88e591066147 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -77,7 +77,7 @@ export default function ({ getService }: FtrProviderContext) { describe('health', () => { it('should return basic configuration of task manager', async () => { const health = await getHealth(); - expect(health.status).to.eql('green'); + expect(health.status).to.eql('OK'); expect(health.stats.configuration.value).to.eql({ poll_interval: 3000, max_poll_inactivity_cycles: 10, @@ -95,7 +95,7 @@ export default function ({ getService }: FtrProviderContext) { stats: { workload }, } = health; - expect(status).to.eql('green'); + expect(status).to.eql('OK'); const sumSampleTaskInWorkload = (workload.value.taskTypes as { From 273d58d22bdf30de0b05709cbdd174275bb8c40e Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 6 Oct 2020 10:49:00 +0100 Subject: [PATCH 26/67] added readme --- .../plugins/task_manager/server/MONITORING.md | 243 ++++++++++++++++++ x-pack/plugins/task_manager/server/README.md | 8 + 2 files changed, 251 insertions(+) create mode 100644 x-pack/plugins/task_manager/server/MONITORING.md diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md new file mode 100644 index 000000000000..2fa08aa8bc1d --- /dev/null +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -0,0 +1,243 @@ +# Task Manager Monitoring + +Task Manager has an internal monitoring mechanism in which keeps track of a variety of metrics which are exposed via a `health` api endpoint and Kibana Server Log debug messaging. + +## Exposed Metrics +There are three different sections to the stats returned by the `health` api. +- `configuration`: Summarizes Task Manager's current configuration. +- `workload`: Summarizes the workload in the current deployment. +- `runtime`: Tracks Task Manager's performance. + +### Configuring the Stats +There are two new configurations: + +- `xpack.task_manager.monitored_aggregated_stats_refresh_rate` - Dictates how often we refresh the "Cold" metrics. These metrics require an aggregation against Elasticsearch and adds load to the system, hence we want to limit how often we execute these. This covers the entire `workload` section of the stats. By default this is set to `60s` +- `xpack.task_manager.monitored_stats_running_average_window`- Dictates the size of the window used to calculate the running average of various "Hot" stats, such as the time it takes to run a task, the _drift_ that tasks experience etc. These stats are collected throughout the lifecycle of tasks and this window will dictate how large the queue we keep in memory would be, and how many values we need to calculate the average against. We do not calculate the average on *every* new value, but rather only when the time comes to summarize the stats before logging them or returning them to the API endpoint. + +Other configurations are inferred from existing config values. +For example: +- The _required freshness_ of critical "Hot" stats in always `pollingInterval + 1s`, which means that if key stats (last polling cycle time, for example) haven't been refreshed within the time scale of a single interval + 1s the stat will report an `Error` status. +- The _required freshness_ of critical "Cold" stats is `monitored_aggregated_stats_refresh_rate + 1s` , which means that if these stats (workload, for example) has not been updated within the required refresh rate then the api will return an `Error` status. + +## Consuming Health Stats +Task Manager exposes a `/api/task_manager/_health` api which returns the _latest_ stats. +Calling this API is designed to be fast and doesn't actually perform any checks- rather it returns the result of the latest stats in the system, and is design in such a way that you could call it from an external service on a regular basis without worrying that you'll be adding substantial load to the system. + +Additionally, the metrics are logged out into Task Manager's `DEBUG` logger at a regular cadence (dictated by the Polling Interval). +If you wish to enable DEBUG logging in your Kibana instance, you will need to add the following to your `Kibana.yml`: +``` +logging: + loggers: + - context: plugins.taskManager + appenders: [console] + level: debug +``` + +Please bear in mind that these stats are logged as often as your `poll_interval` configuration, which means it could add substantial noise to your logs. +We would recommend only enabling this level of logging temporarily. + +### Understanding the Exposed Stats + +As mentioned above, the `health` api exposes three sections: `configuration`, `workload` and `runtime`. +Each section has a `timestamp` and a `status` which indicates when the last update to this setion took place and whether the health of this section was evaluated as `OK`, `Warning` or `Error`. + +The root has its own `status` which indicate the state of the system overall as infered from the `status` of the section. +An `Error` status in any section will cause the whole system to display as `Error`. +A `Warning` status in any section will cause the whole system to display as `Warning`. +An `OK` status will only be displayed when all sections are marked as `OK`. + +The root `timestamp` is the time in which the summary was exposed (either to the DEBUG logger or the http api) and the `lastUpdate` is the last time any one of the sections was updated. + +#### The Configuration Section +The `configuration` section summarizes Task Manager's current configuration, including dynamic configurations which change over time, such as `poll_interval` and `max_workers` which adjust in reaction to changing load on the system. + +These are "Hot" stats which are updated whenever a change happens in the configuration. + +#### The Workload Section +The `workload` which summarizes the work load in the current deployment, listing the tasks in the system, their types and what their current status is. + +It includes three sub sections: + - The number of tasks scheduled in the system, broken down by type and status. + - The number of idle `overdue` tasks, whose `runAt` has expired. + - Execution density in the next minute or so (configurable), which shows how many tasks are scheduled to execute in the scope of each polling interval. This can give us an idea of how much load there is on the current Kibana deployment. + +These are "Cold" stat which are updated at a regular cadence, configured by the `monitored_aggregated_stats_refresh_rate` config. + +#### The Runtime Section +The `runtime` tracks Task Manager's performance as it runs, making note of task execution time, _drift_ etc. +These include: + - The time it takes a task to run (mean and median, using a configurable running average window, `50` by default) + - The average _drift_ that tasks experience (mean and median, using the same configurable running average window as above). Drift tells us how long after a task's scheduled a task typically executes. + - The polling rate (the timestamp of the last time a polling cycle completed) and the result [`No tasks | Filled task pool | Unexpectedly ran out of workers`] frequency the past 50 polling cycles (using the same window size as the one used for running averages) + - The `Success | Retry | Failure ratio` by task type. This is different than the workload stats which tell you what's in the queue, but ca't keep track of retries and of non recurring tasks as they're wiped off the index when completed. + +These are "Hot" stats which are updated reactively as Tasks are executed and interacted with. + +### Example Stats + +For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might get these stats: +``` +{ + /* the time these stats were returned by the api */ + "timestamp": "2020-10-05T18:26:11.346Z", + /* the overall status of the system */ + "status": "OK", + /* last time any stat was updated in this output */ + "lastUpdate": "2020-10-05T17:57:55.411Z", + "stats": { + "configuration": { /* current configuration of TM */ + "timestamp": "2020-10-05T17:56:06.507Z", + "value": { + "max_workers": 10, + "poll_interval": 3000, + "request_capacity": 1000, + "max_poll_inactivity_cycles": 10, + "monitored_aggregated_stats_refresh_rate": 60000, + "monitored_stats_running_average_window": 50 + } + }, + "workload": { /* The workload of this deployment */ + "timestamp": "2020-10-05T17:57:06.534Z", + "value": { + "count": 6, /* count of tasks in the system */ + "taskTypes": { /* what tasks are there and what status are they in */ + "actions_telemetry": { + "count": 1, + "status": { + "idle": 1 + } + }, + "alerting_telemetry": { + "count": 1, + "status": { + "idle": 1 + } + }, + "apm-telemetry-task": { + "count": 1, + "status": { + "idle": 1 + } + }, + "endpoint:user-artifact-packager": { + "count": 1, + "status": { + "idle": 1 + } + }, + "lens_telemetry": { + "count": 1, + "status": { + "idle": 1 + } + }, + "session_cleanup": { + "count": 1, + "status": { + "idle": 1 + } + } + }, + + /* Frequency of recurring tasks schedules */ + "schedule": [ + ["60s", 1], /* 1 task, every 60s */ + ["3600s", 3], /* 3 tasks every hour */ + ["720m", 1] + ], + /* There are no overdue tasks in this system at the moment */ + "overdue": 0, + /* This is the schedule density, it shows a histogram of all the polling intervals in the next minute (or, if + pollInterval is configured unusually high it will show a min of 2 refresh intervals into the future, and a max of 50 buckets). + Here we see that on the 3rd polling interval from *now* (which is ~9 seconds from now, as pollInterval is `3s`) there is one task due to run. + We also see that there are 5 due two intervals later, which is fine as we have a max workers of `10` + */ + "scheduleDensity": [0, 0, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + } + }, + "runtime": { + "timestamp": "2020-10-05T17:57:55.411Z", + "value": { + "polling": { + /* When was the last polling cycle? */ + "lastSuccessfulPoll": "2020-10-05T17:57:55.411Z", + /* What is the frequency of polling cycle result? + Here we see 94% of "NoTasksClaimed" and 6% "PoolFilled" */ + "resultFrequency": { + "NoTasksClaimed": 94, + "RanOutOfCapacity": 0, /* This is a legacy result, we might want to rename - it tells us when a polling cycle resulted in claiming more tasks than we had workers for, butt he name doesn't make much sense outside of the context of the code */ + "PoolFilled": 6 + } + }, + /* on average, the tasks in this deployment run 1.7s after their scheduled time */ + "drift": { + "mean": 1720, + "median": 2276 + }, + "execution": { + "duration": { + /* on average, the `endpoint:user-artifact-packager` tasks take 15ms to run */ + "endpoint:user-artifact-packager": { + "mean": 15, + "median": 14.5 + }, + "session_cleanup": { + "mean": 28, + "median": 28 + }, + "lens_telemetry": { + "mean": 100, + "median": 100 + }, + "actions_telemetry": { + "mean": 135, + "median": 135 + }, + "alerting_telemetry": { + "mean": 197, + "median": 197 + }, + "apm-telemetry-task": { + "mean": 1347, + "median": 1347 + } + }, + "resultFrequency": { + /* and 100% of `endpoint:user-artifact-packager` have completed in success (within the running average window, so the past 50 runs (by default, configrable by `monitored_stats_running_average_window`) */ + "endpoint:user-artifact-packager": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "session_cleanup": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "lens_telemetry": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "actions_telemetry": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "alerting_telemetry": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "apm-telemetry-task": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + } + } + } + } + } + } +} +``` diff --git a/x-pack/plugins/task_manager/server/README.md b/x-pack/plugins/task_manager/server/README.md index fd2409a7db0a..4eb8a78cb4d9 100644 --- a/x-pack/plugins/task_manager/server/README.md +++ b/x-pack/plugins/task_manager/server/README.md @@ -48,6 +48,8 @@ The task_manager can be configured via `taskManager` config options (e.g. `taskM - `override_num_workers`: An object of `taskType: number` that overrides the `num_workers` for tasks - For example: `task_manager.override_num_workers.reporting: 2` would override the number of workers occupied by tasks of type `reporting` - This allows sysadmins to tweak the operational performance of Kibana, allowing more or fewer tasks of a specific type to run simultaneously +- `monitored_aggregated_stats_refresh_rate` - Dictates how often we refresh the "Cold" metrics. Learn More: [./MONITORING](./MONITORING.MD) +- `monitored_stats_running_average_window`- Dictates the size of the window used to calculate the running average of various "Hot" stats. Learn More: [./MONITORING](./MONITORING.MD) ## Task definitions @@ -460,3 +462,9 @@ The task manager's public API is create / delete / list. Updates aren't directly node scripts/functional_tests_server.js --config x-pack/test/plugin_api_integration/config.ts node scripts/functional_test_runner --config x-pack/test/plugin_api_integration/config.ts ``` + +## Monitoring + +Task Manager exposes runtime statistics which enable basic observability into its inner workings and makes it possible to monitor the system from external services. + +Learn More: [./MONITORING](./MONITORING.MD) \ No newline at end of file From 2efb59904060faba2a2f13683d67bdb0bfdba702 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 6 Oct 2020 10:53:50 +0100 Subject: [PATCH 27/67] updated json in readme --- x-pack/plugins/task_manager/server/MONITORING.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md index 2fa08aa8bc1d..36bb85d32612 100644 --- a/x-pack/plugins/task_manager/server/MONITORING.md +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -87,6 +87,7 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g "stats": { "configuration": { /* current configuration of TM */ "timestamp": "2020-10-05T17:56:06.507Z", + "status": "OK", "value": { "max_workers": 10, "poll_interval": 3000, @@ -98,6 +99,7 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g }, "workload": { /* The workload of this deployment */ "timestamp": "2020-10-05T17:57:06.534Z", + "status": "OK", "value": { "count": 6, /* count of tasks in the system */ "taskTypes": { /* what tasks are there and what status are they in */ @@ -157,6 +159,7 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g }, "runtime": { "timestamp": "2020-10-05T17:57:55.411Z", + "status": "OK", "value": { "polling": { /* When was the last polling cycle? */ From 911c827f0af8b544afd76b7ffd56995d5a28ac4d Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 6 Oct 2020 10:58:39 +0100 Subject: [PATCH 28/67] spaces -> tabs --- x-pack/plugins/task_manager/server/MONITORING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md index 36bb85d32612..10bf60ad7e58 100644 --- a/x-pack/plugins/task_manager/server/MONITORING.md +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -87,7 +87,7 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g "stats": { "configuration": { /* current configuration of TM */ "timestamp": "2020-10-05T17:56:06.507Z", - "status": "OK", + "status": "OK", "value": { "max_workers": 10, "poll_interval": 3000, @@ -99,7 +99,7 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g }, "workload": { /* The workload of this deployment */ "timestamp": "2020-10-05T17:57:06.534Z", - "status": "OK", + "status": "OK", "value": { "count": 6, /* count of tasks in the system */ "taskTypes": { /* what tasks are there and what status are they in */ @@ -159,7 +159,7 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g }, "runtime": { "timestamp": "2020-10-05T17:57:55.411Z", - "status": "OK", + "status": "OK", "value": { "polling": { /* When was the last polling cycle? */ From 2ab5e730337a25a1668b92572be52fd7fe0daf95 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 6 Oct 2020 15:59:40 +0100 Subject: [PATCH 29/67] Added health status in each section --- .../task_manager/server/monitoring/index.ts | 1 + .../monitoring/monitoring_stats_stream.ts | 66 +++++++++++++------ .../monitoring/task_run_statistics.test.ts | 8 +-- .../server/monitoring/task_run_statistics.ts | 32 +++++---- .../server/monitoring/workload_statistics.ts | 10 +++ .../task_manager/server/routes/health.test.ts | 34 +++++++--- .../task_manager/server/routes/health.ts | 45 ++++++++----- 7 files changed, 131 insertions(+), 65 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/index.ts b/x-pack/plugins/task_manager/server/monitoring/index.ts index ef447d6ef062..ab431978b753 100644 --- a/x-pack/plugins/task_manager/server/monitoring/index.ts +++ b/x-pack/plugins/task_manager/server/monitoring/index.ts @@ -16,6 +16,7 @@ import { export { MonitoringStats, + HealthStatus, RawMonitoringStats, summarizeMonitoringStats, createAggregators, diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts index edb22b6d79ae..2975a26977c0 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -10,8 +10,17 @@ import { pick } from 'lodash'; import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { TaskManager } from '../task_manager'; -import { createWorkloadAggregator, WorkloadStat } from './workload_statistics'; -import { createTaskRunAggregator, summarizeTaskRunStat, TaskRunStat } from './task_run_statistics'; +import { + createWorkloadAggregator, + summarizeWorkloadStat, + WorkloadStat, +} from './workload_statistics'; +import { + createTaskRunAggregator, + summarizeTaskRunStat, + TaskRunStat, + SummarizedTaskRunStat, +} from './task_run_statistics'; import { TaskManagerConfig } from '../config'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; @@ -31,29 +40,33 @@ type ConfigStat = Pick; + workload?: MonitoredStat; + runtime?: MonitoredStat; }; } -interface MonitoredStat { +export enum HealthStatus { + OK = 'OK', + Warning = 'warn', + Error = 'error', +} + +interface MonitoredStat { timestamp: string; - value: JsonObject; + value: T; } +type RawMonitoredStat = MonitoredStat & { + status: HealthStatus; +}; export interface RawMonitoringStats { lastUpdate: string; - stats: Record; + stats: { + configuration: RawMonitoredStat; + workload?: RawMonitoredStat; + runtime?: RawMonitoredStat; + }; } export function createAggregators( @@ -100,17 +113,28 @@ export function createMonitoringStatsStream( export function summarizeMonitoringStats({ lastUpdate, - stats: { runtime, ...otherStats }, + stats: { runtime, workload, configuration }, }: MonitoringStats): RawMonitoringStats { return { lastUpdate, stats: { - ...((otherStats as unknown) as RawMonitoringStats['stats']), + configuration: { + ...configuration, + status: HealthStatus.OK, + }, ...(runtime ? { runtime: { - ...runtime, - value: summarizeTaskRunStat(runtime.value), + timestamp: runtime.timestamp, + ...summarizeTaskRunStat(runtime.value), + }, + } + : {}), + ...(workload + ? { + workload: { + timestamp: workload.timestamp, + ...summarizeWorkloadStat(workload.value), }, } : {}), diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index fefe0fd62b87..247f78808e62 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -63,7 +63,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value), + value: summarizeTaskRunStat(value).value, })), take(runAtDrift.length), bufferCount(runAtDrift.length) @@ -126,7 +126,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value), + value: summarizeTaskRunStat(value).value, })), take(runDurations.length * 2), bufferCount(runDurations.length * 2) @@ -219,7 +219,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value), + value: summarizeTaskRunStat(value).value, })), take(10), bufferCount(10) @@ -292,7 +292,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value), + value: summarizeTaskRunStat(value).value, })), tap(() => { expectedTimestamp.push(new Date().toISOString()); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index 5c3c3d12972a..0e9f50b57adc 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -29,6 +29,7 @@ import { createRunningAveragedStat, createMapOfRunningAveragedStats, } from './task_run_calcultors'; +import { HealthStatus } from './monitoring_stats_stream'; interface FillPoolStat extends JsonObject { lastSuccessfulPoll: string; @@ -160,22 +161,25 @@ export function summarizeTaskRunStat({ polling: { lastSuccessfulPoll, resultFrequency: pollingResultFrequency }, drift, execution: { duration, resultFrequency: executionResultFrequency }, -}: TaskRunStat): SummarizedTaskRunStat { +}: TaskRunStat): { value: SummarizedTaskRunStat; status: HealthStatus } { return { - polling: { - ...(lastSuccessfulPoll ? { lastSuccessfulPoll } : {}), - resultFrequency: { - ...DEFAULT_POLLING_FREQUENCIES, - ...calculateFrequency(pollingResultFrequency as FillPoolResult[]), + value: { + polling: { + ...(lastSuccessfulPoll ? { lastSuccessfulPoll } : {}), + resultFrequency: { + ...DEFAULT_POLLING_FREQUENCIES, + ...calculateFrequency(pollingResultFrequency as FillPoolResult[]), + }, + }, + drift: calculateRunningAverage(drift), + execution: { + duration: mapValues(duration, (typedDurations) => calculateRunningAverage(typedDurations)), + resultFrequency: mapValues(executionResultFrequency, (typedResultFrequencies) => ({ + ...DEFAULT_TASK_RUN_FREQUENCIES, + ...calculateFrequency(typedResultFrequencies), + })), }, }, - drift: calculateRunningAverage(drift), - execution: { - duration: mapValues(duration, (typedDurations) => calculateRunningAverage(typedDurations)), - resultFrequency: mapValues(executionResultFrequency, (typedResultFrequencies) => ({ - ...DEFAULT_TASK_RUN_FREQUENCIES, - ...calculateFrequency(typedResultFrequencies), - })), - }, + status: HealthStatus.OK, }; } diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 28f412ce7cf4..a46acb013a6c 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -15,6 +15,7 @@ import { TaskManager } from '../task_manager'; import { ConcreteTaskInstance } from '../task'; import { parseIntervalAsSecond, asInterval } from '../lib/intervals'; import { AggregationResultOf } from '../../../apm/typings/elasticsearch/aggregations'; +import { HealthStatus } from './monitoring_stats_stream'; interface StatusStat extends JsonObject { [status: string]: number; @@ -246,3 +247,12 @@ function bucketsBetween(from: number, to: number, interval: number) { } return count; } + +export function summarizeWorkloadStat( + workloadStats: WorkloadStat +): { value: WorkloadStat; status: HealthStatus } { + return { + value: workloadStats, + status: HealthStatus.OK, + }; +} diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index d2e2ee707ffa..294f87dfd1f9 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -10,7 +10,7 @@ import { httpServiceMock } from 'src/core/server/mocks'; import { healthRoute } from './health'; import { mockHandlerArguments } from './_mock_handler_arguments'; import { sleep, mockLogger } from '../test_utils'; -import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; +import { MonitoringStats, summarizeMonitoringStats, HealthStatus } from '../monitoring'; describe('healthRoute', () => { beforeEach(() => { @@ -49,20 +49,34 @@ describe('healthRoute', () => { await sleep(600); stats$.next(nextMockStat); - expect(logger.debug).toHaveBeenCalledWith(JSON.stringify(summarizeMonitoringStats(mockStat))); - expect(logger.debug).not.toHaveBeenCalledWith( - JSON.stringify(summarizeMonitoringStats(skippedMockStat)) - ); - expect(logger.debug).toHaveBeenCalledWith( - JSON.stringify(summarizeMonitoringStats(nextMockStat)) - ); + const firstDebug = JSON.parse(logger.debug.mock.calls[0][0]); + expect(firstDebug).toMatchObject({ + timestamp: expect.any(String), + status: expect.any(String), + ...summarizeMonitoringStats(mockStat), + }); + + const secondDebug = JSON.parse(logger.debug.mock.calls[1][0]); + expect(secondDebug).not.toMatchObject({ + timestamp: expect.any(String), + status: expect.any(String), + ...summarizeMonitoringStats(skippedMockStat), + }); + expect(secondDebug).toMatchObject({ + timestamp: expect.any(String), + status: expect.any(String), + ...summarizeMonitoringStats(nextMockStat), + }); + expect(logger.debug).toHaveBeenCalledTimes(2); }); - it('returns a error status if the stats have not been updated within the required hot freshness', async () => { + it('returns a error status if the overall stats have not been updated within the required hot freshness', async () => { const router = httpServiceMock.createRouter(); - const mockStat = mockHealthStats(); + const mockStat = mockHealthStats({ + lastUpdate: new Date(Date.now() - 1500).toISOString(), + }); healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000, 60000); const [, handler] = router.get.mock.calls[0]; diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 36e7a20e3bb3..f614511faa98 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -16,13 +16,12 @@ import { Observable } from 'rxjs'; import { take } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; import { isString } from 'lodash'; -import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; - -enum HealthStatus { - OK = 'OK', - Warning = 'warn', - Error = 'error', -} +import { + MonitoringStats, + summarizeMonitoringStats, + HealthStatus, + RawMonitoringStats, +} from '../monitoring'; export function healthRoute( router: IRouter, @@ -31,20 +30,25 @@ export function healthRoute( requiredHotStatsFreshness: number, requiredColdStatsFreshness: number ) { - function calculateStatus(stats: MonitoringStats) { + function calculateStatus(monitoredStats: MonitoringStats) { const now = Date.now(); const timestamp = new Date(now).toISOString(); + const summarizedStats = summarizeMonitoringStats(monitoredStats); + /** * If the monitored stats aren't fresh, return a red status */ const healthStatus = - hasExpiredHotTimestamps(stats, now, requiredHotStatsFreshness) || - hasExpiredColdTimestamps(stats, now, requiredColdStatsFreshness) + hasStatus(summarizedStats.stats, HealthStatus.Error) || + hasExpiredHotTimestamps(summarizedStats, now, requiredHotStatsFreshness) || + hasExpiredColdTimestamps(summarizedStats, now, requiredColdStatsFreshness) ? HealthStatus.Error + : hasStatus(summarizedStats.stats, HealthStatus.Warning) + ? HealthStatus.Warning : HealthStatus.OK; - return { timestamp, status: healthStatus, ...summarizeMonitoringStats(stats) }; + return { timestamp, status: healthStatus, ...summarizedStats }; } /* Log Task Manager stats as a Debug log line at a fixed interval */ @@ -73,28 +77,37 @@ export function healthRoute( /** * If certain "hot" stats are not fresh, then the _health api will should return a Red status - * @param stats The monitored stats + * @param monitoringStats The monitored stats * @param now The time to compare against * @param requiredFreshness How fresh should these stats be */ function hasExpiredHotTimestamps( - stats: MonitoringStats, + monitoringStats: RawMonitoringStats, now: number, requiredFreshness: number ): boolean { return ( now - - getOldestTimestamp(stats.lastUpdate, stats.stats.runtime?.value.polling.lastSuccessfulPoll) > + getOldestTimestamp( + monitoringStats.lastUpdate, + monitoringStats.stats.runtime?.value.polling.lastSuccessfulPoll + ) > requiredFreshness ); } function hasExpiredColdTimestamps( - stats: MonitoringStats, + monitoringStats: RawMonitoringStats, now: number, requiredFreshness: number ): boolean { - return now - getOldestTimestamp(stats.stats.workload?.timestamp) > requiredFreshness; + return now - getOldestTimestamp(monitoringStats.stats.workload?.timestamp) > requiredFreshness; +} + +function hasStatus(stats: RawMonitoringStats['stats'], status: HealthStatus): boolean { + return Object.values(stats) + .map((stat) => stat?.status === status) + .includes(true); } function getOldestTimestamp(...timestamps: unknown[]): number { From 37a4041554f2bcda70165cd73a0112870623c51b Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 6 Oct 2020 16:07:02 +0100 Subject: [PATCH 30/67] removed unused import --- x-pack/plugins/task_manager/server/routes/health.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 294f87dfd1f9..2fc1d818f826 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -10,7 +10,7 @@ import { httpServiceMock } from 'src/core/server/mocks'; import { healthRoute } from './health'; import { mockHandlerArguments } from './_mock_handler_arguments'; import { sleep, mockLogger } from '../test_utils'; -import { MonitoringStats, summarizeMonitoringStats, HealthStatus } from '../monitoring'; +import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; describe('healthRoute', () => { beforeEach(() => { From 486339138822d54bd2da00214fbfeb2f00ab7a2a Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 8 Oct 2020 12:51:58 +0100 Subject: [PATCH 31/67] plug health into service status --- x-pack/plugins/task_manager/server/plugin.ts | 16 ++++- .../task_manager/server/routes/health.test.ts | 46 ++++++++++++++- .../task_manager/server/routes/health.ts | 58 +++++++++++++++---- 3 files changed, 105 insertions(+), 15 deletions(-) diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 5f627fee85f8..e93b639e2c8d 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -4,8 +4,8 @@ * you may not use this file except in compliance with the Elastic License. */ import { PluginInitializerContext, Plugin, CoreSetup, CoreStart, Logger } from 'src/core/server'; -import { Subject } from 'rxjs'; -import { first } from 'rxjs/operators'; +import { Subject, combineLatest } from 'rxjs'; +import { first, map } from 'rxjs/operators'; import { TaskDictionary, TaskDefinition } from './task'; import { TaskManager } from './task_manager'; import { TaskManagerConfig } from './config'; @@ -51,7 +51,7 @@ export class TaskManagerPlugin // Routes const router = core.http.createRouter(); - healthRoute( + const serviceStatus$ = healthRoute( router, this.taskManager.then((tm) => createMonitoringStats(tm, config, logger)), logger, @@ -61,6 +61,16 @@ export class TaskManagerPlugin config.monitored_aggregated_stats_refresh_rate + 1000 ); + core.getStartServices().then(async () => { + core.status.set( + combineLatest([core.status.derivedStatus$, serviceStatus$]).pipe( + map(([derivedStatus, serviceStatus]) => + serviceStatus.level > derivedStatus.level ? serviceStatus : derivedStatus + ) + ) + ); + }); + return { addMiddleware: (middleware: Middleware) => { this.taskManager.then((tm) => tm.addMiddleware(middleware)); diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 2fc1d818f826..6b783b915009 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -4,13 +4,15 @@ * you may not use this file except in compliance with the Elastic License. */ -import { of, Subject } from 'rxjs'; +import { Observable, of, Subject } from 'rxjs'; +import { take } from 'rxjs/operators'; import { merge } from 'lodash'; import { httpServiceMock } from 'src/core/server/mocks'; import { healthRoute } from './health'; import { mockHandlerArguments } from './_mock_handler_arguments'; import { sleep, mockLogger } from '../test_utils'; import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; +import { ServiceStatusLevels } from 'src/core/server'; describe('healthRoute', () => { beforeEach(() => { @@ -77,7 +79,14 @@ describe('healthRoute', () => { const mockStat = mockHealthStats({ lastUpdate: new Date(Date.now() - 1500).toISOString(), }); - healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000, 60000); + + const serviceStatus$ = healthRoute( + router, + Promise.resolve(of(mockStat)), + mockLogger(), + 1000, + 60000 + ); const [, handler] = router.get.mock.calls[0]; @@ -111,6 +120,35 @@ describe('healthRoute', () => { ), }, }); + + expect(await getLatest(serviceStatus$)).toMatchObject({ + level: ServiceStatusLevels.unavailable, + summary: 'Task Manager is unavailable', + meta: { + status: 'error', + ...summarizeMonitoringStats( + mockHealthStats({ + lastUpdate: expect.any(String), + stats: { + configuration: { + timestamp: expect.any(String), + }, + workload: { + timestamp: expect.any(String), + }, + runtime: { + timestamp: expect.any(String), + value: { + polling: { + lastSuccessfulPoll: expect.any(String), + }, + }, + }, + }, + }) + ), + }, + }); }); it('returns a error status if the workload stats have not been updated within the required cold freshness', async () => { @@ -261,3 +299,7 @@ function mockHealthStats(overrides = {}) { overrides ) as unknown) as MonitoringStats; } + +async function getLatest(stream$: Observable) { + return new Promise((resolve) => stream$.pipe(take(1)).subscribe((stats) => resolve(stats))); +} diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index f614511faa98..48aa5a346c43 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -11,9 +11,9 @@ import { IKibanaResponse, KibanaResponseFactory, } from 'kibana/server'; -import { Logger } from 'src/core/server'; -import { Observable } from 'rxjs'; -import { take } from 'rxjs/operators'; +import { Logger, ServiceStatus, ServiceStatusLevels } from 'src/core/server'; +import { Observable, from } from 'rxjs'; +import { take, mergeMap, map } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; import { isString } from 'lodash'; import { @@ -23,14 +23,22 @@ import { RawMonitoringStats, } from '../monitoring'; +type MonitoredHealth = RawMonitoringStats & { status: HealthStatus; timestamp: string }; + +const LEVEL_SUMMARY = { + [ServiceStatusLevels.available.toString()]: 'Task Manager is healthy', + [ServiceStatusLevels.degraded.toString()]: 'Task Manager is unhealthy', + [ServiceStatusLevels.unavailable.toString()]: 'Task Manager is unavailable', +}; + export function healthRoute( router: IRouter, monitoringStats: Promise>, logger: Logger, requiredHotStatsFreshness: number, requiredColdStatsFreshness: number -) { - function calculateStatus(monitoredStats: MonitoringStats) { +): Observable { + function calculateStatus(monitoredStats: MonitoringStats): MonitoredHealth { const now = Date.now(); const timestamp = new Date(now).toISOString(); @@ -47,15 +55,23 @@ export function healthRoute( : hasStatus(summarizedStats.stats, HealthStatus.Warning) ? HealthStatus.Warning : HealthStatus.OK; - return { timestamp, status: healthStatus, ...summarizedStats }; } + // Only calculate the summerized stats (calculates all runnign averages and evaluates state) + // when needed by throttling down to the requiredHotStatsFreshness + const throttledMonitoredStats$ = from(monitoringStats).pipe( + mergeMap((monitoringStats$) => + monitoringStats$.pipe( + throttleTime(requiredHotStatsFreshness), + map((stats) => calculateStatus(stats)) + ) + ) + ); + /* Log Task Manager stats as a Debug log line at a fixed interval */ - monitoringStats.then((monitoringStats$) => { - monitoringStats$.pipe(throttleTime(requiredHotStatsFreshness)).subscribe((stats) => { - logger.debug(JSON.stringify(calculateStatus(stats))); - }); + throttledMonitoredStats$.subscribe((stats) => { + logger.debug(JSON.stringify(stats)); }); router.get( @@ -73,6 +89,28 @@ export function healthRoute( }); } ); + + return asServiceStatus(throttledMonitoredStats$); +} + +export function asServiceStatus( + monitoredHealth$: Observable +): Observable { + return monitoredHealth$.pipe( + map((monitoredHealth) => { + const level = + monitoredHealth.status === HealthStatus.OK + ? ServiceStatusLevels.available + : monitoredHealth.status === HealthStatus.Warning + ? ServiceStatusLevels.degraded + : ServiceStatusLevels.unavailable; + return { + level, + summary: LEVEL_SUMMARY[level.toString()], + meta: monitoredHealth, + }; + }) + ); } /** From 9fc7da6dcdf1a0dfc9fa8b7084b382b967fb6e1f Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 8 Oct 2020 14:35:05 +0100 Subject: [PATCH 32/67] fixed src import --- x-pack/plugins/task_manager/server/routes/health.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 48aa5a346c43..2d255ab03550 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -11,11 +11,11 @@ import { IKibanaResponse, KibanaResponseFactory, } from 'kibana/server'; -import { Logger, ServiceStatus, ServiceStatusLevels } from 'src/core/server'; import { Observable, from } from 'rxjs'; import { take, mergeMap, map } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; import { isString } from 'lodash'; +import { Logger, ServiceStatus, ServiceStatusLevels } from '../../../../../src/core/server'; import { MonitoringStats, summarizeMonitoringStats, From 74039037adef487e5ce023b4105da029635c63cd Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 9 Oct 2020 19:57:31 +0100 Subject: [PATCH 33/67] estimate recurring tasks in schedule --- .../task_manager/server/lib/intervals.test.ts | 45 ++ .../task_manager/server/lib/intervals.ts | 12 +- .../monitoring/workload_statistics.test.ts | 452 +++++++++++------- .../server/monitoring/workload_statistics.ts | 136 +++++- 4 files changed, 461 insertions(+), 184 deletions(-) diff --git a/x-pack/plugins/task_manager/server/lib/intervals.test.ts b/x-pack/plugins/task_manager/server/lib/intervals.test.ts index 5ce6c33c5797..e79694915f92 100644 --- a/x-pack/plugins/task_manager/server/lib/intervals.test.ts +++ b/x-pack/plugins/task_manager/server/lib/intervals.test.ts @@ -8,6 +8,7 @@ import _ from 'lodash'; import sinon from 'sinon'; import { parseIntervalAsSecond, + parseIntervalAsMillisecond, intervalFromNow, intervalFromDate, secondsFromNow, @@ -50,6 +51,50 @@ describe('taskIntervals', () => { /Invalid interval "hello"\. Intervals must be of the form {number}m. Example: 5m/ ); }); + + test('returns an interval as s', () => { + expect(parseIntervalAsSecond('5s')).toEqual(5); + expect(parseIntervalAsSecond('15s')).toEqual(15); + expect(parseIntervalAsSecond('20m')).toEqual(20 * 60); + expect(parseIntervalAsSecond('61m')).toEqual(61 * 60); + expect(parseIntervalAsSecond('90m')).toEqual(90 * 60); + }); + }); + + describe('parseIntervalAsMillisecond', () => { + test('it accepts intervals in the form `Nm`', () => { + expect(() => parseIntervalAsMillisecond(`${_.random(1, 1000)}m`)).not.toThrow(); + }); + + test('it accepts intervals in the form `Ns`', () => { + expect(() => parseIntervalAsMillisecond(`${_.random(1, 1000)}s`)).not.toThrow(); + }); + + test('it rejects 0 based intervals', () => { + expect(() => parseIntervalAsMillisecond('0m')).toThrow( + /Invalid interval "0m"\. Intervals must be of the form {number}m. Example: 5m/ + ); + expect(() => parseIntervalAsMillisecond('0s')).toThrow( + /Invalid interval "0s"\. Intervals must be of the form {number}m. Example: 5m/ + ); + }); + + test('it rejects intervals are not of the form `Nm` or `Ns`', () => { + expect(() => parseIntervalAsMillisecond(`5m 2s`)).toThrow( + /Invalid interval "5m 2s"\. Intervals must be of the form {number}m. Example: 5m/ + ); + expect(() => parseIntervalAsMillisecond(`hello`)).toThrow( + /Invalid interval "hello"\. Intervals must be of the form {number}m. Example: 5m/ + ); + }); + + test('returns an interval as ms', () => { + expect(parseIntervalAsMillisecond('5s')).toEqual(5 * 1000); + expect(parseIntervalAsMillisecond('15s')).toEqual(15 * 1000); + expect(parseIntervalAsMillisecond('20m')).toEqual(20 * 60 * 1000); + expect(parseIntervalAsMillisecond('61m')).toEqual(61 * 60 * 1000); + expect(parseIntervalAsMillisecond('90m')).toEqual(90 * 60 * 1000); + }); }); describe('asInterval', () => { diff --git a/x-pack/plugins/task_manager/server/lib/intervals.ts b/x-pack/plugins/task_manager/server/lib/intervals.ts index 914bc35bb526..a28dfa62a501 100644 --- a/x-pack/plugins/task_manager/server/lib/intervals.ts +++ b/x-pack/plugins/task_manager/server/lib/intervals.ts @@ -11,9 +11,9 @@ export enum IntervalCadence { Second = 's', } const VALID_CADENCE = new Set(Object.values(IntervalCadence)); -const CADENCE_IN_SECONDS: Record = { - [IntervalCadence.Second]: 1, - [IntervalCadence.Minute]: 60, +const CADENCE_IN_MS: Record = { + [IntervalCadence.Second]: 1000, + [IntervalCadence.Minute]: 60 * 1000, }; function isCadence(cadence: IntervalCadence | string): cadence is IntervalCadence { @@ -81,6 +81,10 @@ export function secondsFromDate(date: Date, secs: number): Date { * @returns {number} The interval as seconds */ export const parseIntervalAsSecond = memoize((interval: string): number => { + return Math.round(parseIntervalAsMillisecond(interval) / 1000); +}); + +export const parseIntervalAsMillisecond = memoize((interval: string): number => { const numericAsStr: string = interval.slice(0, -1); const numeric: number = parseInt(numericAsStr, 10); const cadence: IntervalCadence | string = interval.slice(-1); @@ -89,7 +93,7 @@ export const parseIntervalAsSecond = memoize((interval: string): number => { `Invalid interval "${interval}". Intervals must be of the form {number}m. Example: 5m.` ); } - return numeric * CADENCE_IN_SECONDS[cadence]; + return numeric * CADENCE_IN_MS[cadence]; }); const isNumeric = (numAsStr: string) => /^\d+$/.test(numAsStr); diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index 0714401bdf4e..f3a0c8e7e1ae 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -5,12 +5,18 @@ */ import { first, take, bufferCount } from 'rxjs/operators'; -import { WorkloadAggregation, createWorkloadAggregator, padBuckets } from './workload_statistics'; +import { + WorkloadAggregation, + createWorkloadAggregator, + padBuckets, + estimateRecurringTaskScheduling, +} from './workload_statistics'; import { taskManagerMock } from '../task_manager.mock'; import { mockLogger } from '../test_utils'; import { ConcreteTaskInstance } from '../task'; import { ESSearchResponse } from '../../../apm/typings/elasticsearch'; import { AggregationResultOf } from '../../../apm/typings/elasticsearch/aggregations'; +import { times } from 'lodash'; type MockESResult = ESSearchResponse< ConcreteTaskInstance, @@ -102,6 +108,13 @@ describe('Workload Statistics Aggregator', () => { field: 'task.runAt', fixed_interval: '3s', }, + aggs: { + interval: { + terms: { + field: 'task.schedule.interval', + }, + }, + }, }, }, }, @@ -121,105 +134,93 @@ describe('Workload Statistics Aggregator', () => { }); }); - const mockAggregatedResult: MockESResult = { - hits: { - hits: [], - max_score: 0, - total: { value: 4, relation: 'eq' }, - }, - took: 1, - timed_out: false, - _shards: { - total: 1, - successful: 1, - skipped: 1, - failed: 0, - }, - aggregations: { - schedule: { - buckets: [ - { - key: '3600s', - doc_count: 1, - }, - { - key: '60s', - doc_count: 1, - }, - { - key: '720m', - doc_count: 1, - }, - ], + const mockAggregatedResult: () => MockESResult = () => + ({ + hits: { + hits: [], + max_score: 0, + total: { value: 4, relation: 'eq' }, }, - taskType: { - buckets: [ - { - key: 'actions_telemetry', - doc_count: 2, - status: { - buckets: [ - { - key: 'idle', - doc_count: 2, - }, - ], + took: 1, + timed_out: false, + _shards: { + total: 1, + successful: 1, + skipped: 1, + failed: 0, + }, + aggregations: { + schedule: { + buckets: [ + { + key: '3600s', + doc_count: 1, }, - }, - { - key: 'alerting_telemetry', - doc_count: 1, - status: { - buckets: [ - { - key: 'idle', - doc_count: 1, - }, - ], + { + key: '60s', + doc_count: 1, }, - }, - { - key: 'session_cleanup', - doc_count: 1, - status: { - buckets: [ - { - key: 'idle', - doc_count: 1, - }, - ], + { + key: '720m', + doc_count: 1, }, - }, - ], - }, - idleTasks: { - doc_count: 13, - overdue: { - doc_count: 6, + ], }, - scheduleDensity: { + taskType: { buckets: [ - mockHistogram(Date.now(), Date.now() + 7 * 3000, Date.now() + 60000, 3000, [ - 2, - 2, - 5, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - ]), + { + key: 'actions_telemetry', + doc_count: 2, + status: { + buckets: [ + { + key: 'idle', + doc_count: 2, + }, + ], + }, + }, + { + key: 'alerting_telemetry', + doc_count: 1, + status: { + buckets: [ + { + key: 'idle', + doc_count: 1, + }, + ], + }, + }, + { + key: 'session_cleanup', + doc_count: 1, + status: { + buckets: [ + { + key: 'idle', + doc_count: 1, + }, + ], + }, + }, ], }, + idleTasks: { + doc_count: 13, + overdue: { + doc_count: 6, + }, + scheduleDensity: { + buckets: [mockHistogram(0, 7 * 3000, 60 * 1000, 3000, [2, 2, 5, 0, 0, 0, 0, 0, 0, 1])], + }, + }, }, - }, - }; + } as MockESResult); test('returns a summary of the workload by task type', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult()); const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); @@ -241,7 +242,7 @@ describe('Workload Statistics Aggregator', () => { test('returns a count of the overdue workload', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult()); const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); @@ -258,7 +259,7 @@ describe('Workload Statistics Aggregator', () => { test('returns a histogram of the upcoming workload for the upcoming minute when refresh rate is high', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult()); const workloadAggregator = createWorkloadAggregator(taskManager, 10, 3000, mockLogger()); @@ -269,9 +270,9 @@ describe('Workload Statistics Aggregator', () => { // we have intervals every 3s, so we aggregate buckets 3s apart // in this mock, Elasticsearch found tasks scheduled in 21 (8th bucket), 24, 27 and 48s seconds from now // 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57 - // [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0 ] + // [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 ] // Above you see each bucket and the number of scheduled tasks we expect to have in them - scheduleDensity: [0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0], + scheduleDensity: [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], }); resolve(); }); @@ -280,38 +281,30 @@ describe('Workload Statistics Aggregator', () => { test('returns a histogram of the upcoming workload for twice refresh rate when rate is low', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult()); const workloadAggregator = createWorkloadAggregator(taskManager, 60 * 1000, 3000, mockLogger()); return new Promise((resolve) => { - workloadAggregator.pipe(first()).subscribe((result) => { - expect(result.key).toEqual('workload'); - expect(result.value).toMatchObject({ - // same schedule density as in previous test, but window of 40 buckets ((60s refresh * 2) / 3s = 40) - scheduleDensity: [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 2, - 2, - 5, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - ...new Array(20).fill(0), - ], + workloadAggregator.pipe(first()).subscribe(() => { + expect(taskManager.aggregate.mock.calls[0][0]).toMatchObject({ + aggs: { + idleTasks: { + aggs: { + scheduleDensity: { + range: { + field: 'task.runAt', + ranges: [ + { + from: 'now', + to: 'now+2m', + }, + ], + }, + }, + }, + }, + }, }); resolve(); }); @@ -320,7 +313,7 @@ describe('Workload Statistics Aggregator', () => { test('returns a histogram of the upcoming workload maxed out at 50 buckets when rate is too low', async () => { const taskManager = taskManagerMock.create(); - taskManager.aggregate.mockResolvedValue(mockAggregatedResult as MockESResult); + taskManager.aggregate.mockResolvedValue(mockAggregatedResult()); const workloadAggregator = createWorkloadAggregator( taskManager, @@ -331,32 +324,25 @@ describe('Workload Statistics Aggregator', () => { return new Promise((resolve) => { workloadAggregator.pipe(first()).subscribe((result) => { - expect(result.key).toEqual('workload'); - expect(result.value).toMatchObject({ - // same schedule density as in previous test, but window of 40 buckets ((60s refresh * 2) / 3s = 40) - scheduleDensity: [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 2, - 2, - 5, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - ...new Array(30).fill(0), - ], + expect(taskManager.aggregate.mock.calls[0][0]).toMatchObject({ + aggs: { + idleTasks: { + aggs: { + scheduleDensity: { + range: { + field: 'task.runAt', + ranges: [ + { + from: 'now', + // 50 buckets of 3s = 50 * 3 = 150s + to: 'now+150s', + }, + ], + }, + }, + }, + }, + }, }); resolve(); }); @@ -367,13 +353,13 @@ describe('Workload Statistics Aggregator', () => { const taskManager = taskManagerMock.create(); taskManager.aggregate .mockResolvedValueOnce( - setTaskTypeCount(mockAggregatedResult, 'alerting_telemetry', { + setTaskTypeCount(mockAggregatedResult(), 'alerting_telemetry', { idle: 2, }) ) .mockRejectedValueOnce(new Error('Elasticsearch has gone poof')) .mockResolvedValueOnce( - setTaskTypeCount(mockAggregatedResult, 'alerting_telemetry', { + setTaskTypeCount(mockAggregatedResult(), 'alerting_telemetry', { idle: 1, failed: 1, }) @@ -407,6 +393,116 @@ describe('Workload Statistics Aggregator', () => { }); }); +describe('estimateRecurringTaskScheduling', () => { + test('flattens out buckets with non recurring tasks', () => { + const now = Date.now(); + const schedule = times(10, (index) => ({ + key: index * 3000 + now, + nonRecurring: index, + })); + expect(estimateRecurringTaskScheduling(schedule, 3000)).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + }); + + test('estimates the buckets that recurring tasks might repeat in when recurring task interval equals the interval', () => { + const now = Date.now(); + const schedule: Array<{ + key: number; + nonRecurring: number; + recurring?: Array<[number, string]>; + }> = times(10, (index) => ({ + key: index * 3000 + now, + nonRecurring: 0, + })); + + schedule[0].nonRecurring = 1; + schedule[1].nonRecurring = 1; + schedule[4].recurring = [[1, '3s']]; + + expect(estimateRecurringTaskScheduling(schedule, 3000)).toEqual([1, 1, 0, 0, 1, 1, 1, 1, 1, 1]); + }); + + test('estimates the buckets that recurring tasks might repeat in when recurring task interval is larger than the interval', () => { + const now = Date.now(); + const schedule: Array<{ + key: number; + nonRecurring: number; + recurring?: Array<[number, string]>; + }> = times(10, (index) => ({ + key: index * 3000 + now, + nonRecurring: 0, + })); + + schedule[0].nonRecurring = 1; + schedule[1].nonRecurring = 1; + schedule[4].recurring = [[1, '6s']]; + + expect(estimateRecurringTaskScheduling(schedule, 3000)).toEqual([1, 1, 0, 0, 1, 0, 1, 0, 1, 0]); + }); + + test('estimates the buckets that recurring tasks might repeat in when recurring task interval doesnt divide by interval', () => { + const now = Date.now(); + const schedule: Array<{ + key: number; + nonRecurring: number; + recurring?: Array<[number, string]>; + }> = times(10, (index) => ({ + key: index * 3000 + now, + nonRecurring: 0, + })); + + schedule[0].nonRecurring = 1; + schedule[1].nonRecurring = 1; + schedule[4].recurring = [[1, '5s']]; + + expect(estimateRecurringTaskScheduling(schedule, 3000)).toEqual([1, 1, 0, 0, 1, 0, 1, 0, 1, 0]); + }); + + test('estimates the buckets that recurring tasks might repeat in when recurring tasks overlap', () => { + const now = Date.now(); + const schedule: Array<{ + key: number; + nonRecurring: number; + recurring?: Array<[number, string]>; + }> = times(20, (index) => ({ + key: index * 3000 + now, + nonRecurring: 0, + })); + + schedule[0].nonRecurring = 1; + schedule[1].nonRecurring = 1; + schedule[3].recurring = [[1, '3s']]; + schedule[4].recurring = [ + [2, '6s'], + [1, '8s'], + ]; + schedule[5].recurring = [[1, '5s']]; + schedule[6].nonRecurring = 3; + + expect(estimateRecurringTaskScheduling(schedule, 3000)).toEqual([ + 1, + 1, + 0, + 1, + 4, + 2, + 6, + 3, + 3, + 2, + 4, + 2, + 3, + 3, + 3, + 2, + 4, + 2, + 3, + 3, + ]); + }); +}); + describe('padBuckets', () => { test('returns zeroed out bucklets when there are no buckets in the histogram', async () => { expect( @@ -430,8 +526,8 @@ describe('padBuckets', () => { key: '2020-10-02T19:47:28.128Z-2020-10-02T19:48:28.128Z', from: 1601668048128, from_as_string: '2020-10-02T19:47:28.128Z', - to: 1601668077128, - to_as_string: '2020-10-02T19:47:57.128Z', + to: 1601668075128, + to_as_string: '2020-10-02T19:47:55.128Z', doc_count: 3, histogram: { buckets: [ @@ -439,31 +535,55 @@ describe('padBuckets', () => { key_as_string: '2020-10-02T19:47:27.000Z', key: 1601668047000, doc_count: 1, + interval: { + sum_other_doc_count: 0, + buckets: [], + }, }, { key_as_string: '2020-10-02T19:47:30.000Z', key: 1601668050000, doc_count: 1, + interval: { + sum_other_doc_count: 0, + buckets: [], + }, }, { key_as_string: '2020-10-02T19:47:33.000Z', key: 1601668053000, doc_count: 0, + interval: { + sum_other_doc_count: 0, + buckets: [], + }, }, { key_as_string: '2020-10-02T19:47:36.000Z', key: 1601668056000, doc_count: 0, + interval: { + sum_other_doc_count: 0, + buckets: [], + }, }, { key_as_string: '2020-10-02T19:47:39.000Z', key: 1601668059000, doc_count: 0, + interval: { + sum_other_doc_count: 0, + buckets: [], + }, }, { key_as_string: '2020-10-02T19:47:42.000Z', key: 1601668062000, doc_count: 1, + interval: { + sum_other_doc_count: 0, + buckets: [], + }, }, ], }, @@ -486,11 +606,13 @@ describe('padBuckets', () => { key_as_string: '2020-10-02T20:40:09.000Z', key: 1601671209000, doc_count: 1, + interval: { buckets: [] }, }, { key_as_string: '2020-10-02T20:40:12.000Z', key: 1601671212000, doc_count: 1, + interval: { buckets: [] }, }, ], }, @@ -502,10 +624,10 @@ describe('padBuckets', () => { expect( padBuckets(20, 3000, { key: '2020-10-02T20:39:45.793Z-2020-10-02T20:40:14.793Z', - from: 1.601671185793e12, + from: 1601671185793, from_as_string: '2020-10-02T20:39:45.793Z', - to: 1.1601671244793, - to_as_string: '2020-10-02T20:40:44.793Z', + to: 1601671242793, + to_as_string: '2020-10-02T20:40:42.793Z', doc_count: 2, histogram: { buckets: [ @@ -513,11 +635,13 @@ describe('padBuckets', () => { key_as_string: '2020-10-02T20:40:09.000Z', key: 1601671209000, doc_count: 1, + interval: { buckets: [] }, }, { key_as_string: '2020-10-02T20:40:12.000Z', key: 1601671212000, doc_count: 1, + interval: { buckets: [] }, }, ], }, @@ -541,7 +665,6 @@ function setTaskTypeCount( key: taskType, doc_count: Object.values(status).reduce((sum, count) => sum + count, 0), status: { - doc_count_error_upper_bound: 0, sum_other_doc_count: 0, buckets: Object.entries(status).map(([key, count]) => ({ key, @@ -557,7 +680,6 @@ function setTaskTypeCount( aggregations: { ...aggregations, taskType: { - doc_count_error_upper_bound: 0, sum_other_doc_count: 0, buckets, }, @@ -583,24 +705,26 @@ function mockHistogram( interval: number, foundBuckets: Array ) { - const fromDate = new Date(from); - const toDate = new Date(to); + const now = Date.now(); + const fromDate = new Date(now + from); + const toDate = new Date(now + to); return { key: `${fromDate.toISOString()}-${toDate.toISOString()}`, - from, + from: now + from, from_as_string: fromDate.toISOString(), - to, + to: now + to, to_as_string: toDate.toISOString(), doc_count: foundBuckets.reduce((sum: number, count) => sum + (count ?? 0), 0), histogram: { buckets: foundBuckets.reduce( (histogramBuckets, count, index) => { if (typeof count === 'number') { - const key = new Date(findFrom + index * interval); + const key = new Date(now + findFrom + index * interval); histogramBuckets.push({ key_as_string: key.toISOString(), key: key.getTime(), doc_count: count, + interval: { buckets: [] }, }); } return histogramBuckets; @@ -609,6 +733,12 @@ function mockHistogram( key_as_string: string; key: number; doc_count: number; + interval: { + buckets: Array<{ + key: string; + doc_count: number; + }>; + }; }> ), }, diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index a46acb013a6c..8ab16eaf5fdc 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -13,7 +13,7 @@ import { ESSearchResponse } from '../../../apm/typings/elasticsearch'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; import { TaskManager } from '../task_manager'; import { ConcreteTaskInstance } from '../task'; -import { parseIntervalAsSecond, asInterval } from '../lib/intervals'; +import { parseIntervalAsSecond, asInterval, parseIntervalAsMillisecond } from '../lib/intervals'; import { AggregationResultOf } from '../../../apm/typings/elasticsearch/aggregations'; import { HealthStatus } from './monitoring_stats_stream'; @@ -64,6 +64,11 @@ export interface WorkloadAggregation { field: string; fixed_interval: string; }; + aggs: { + interval: { + terms: { field: string }; + }; + }; }; }; }; @@ -84,6 +89,7 @@ type ScheduleDensityResult = AggregationResultOf< WorkloadAggregation['aggs']['idleTasks']['aggs']['scheduleDensity'], {} >['buckets'][0]; +type ScheduledIntervals = ScheduleDensityResult['histogram']['buckets'][0]; // Set an upper bound just in case a customer sets a really high refresh rate const MAX_SHCEDULE_DENSITY_BUCKETS = 50; @@ -122,6 +128,7 @@ export function createWorkloadAggregator( }, aggs: { scheduleDensity: { + // create a window of upcoming tasks range: { field: 'task.runAt', ranges: [ @@ -129,11 +136,18 @@ export function createWorkloadAggregator( ], }, aggs: { + // create histogram of scheduling in the window, with each bucket being a polling interval histogram: { date_histogram: { field: 'task.runAt', fixed_interval: asInterval(pollInterval), }, + // break down each bucket in the historgram by schedule + aggs: { + interval: { + terms: { field: 'task.schedule.interval' }, + }, + }, }, }, }, @@ -216,38 +230,122 @@ export function createWorkloadAggregator( ); } +interface IntervalTaskCountTouple { + nonRecurring?: number; + recurring?: Array<[number, string]>; + key: number; +} + export function padBuckets( scheduleDensityBuckets: number, pollInterval: number, scheduleDensity: ScheduleDensityResult ): number[] { - if (scheduleDensity.from && scheduleDensity.histogram?.buckets?.length) { - const { histogram, from } = scheduleDensity; + if (scheduleDensity.from && scheduleDensity.to && scheduleDensity.histogram?.buckets?.length) { + const { histogram, from, to } = scheduleDensity; const firstBucket = histogram.buckets[0].key; - const bucketsToPadBeforeFirstBucket = bucketsBetween(from, firstBucket, pollInterval); - const bucketsToPadAfterLast = - scheduleDensityBuckets - (bucketsToPadBeforeFirstBucket + histogram.buckets.length); - return [ - ...(bucketsToPadBeforeFirstBucket > 0 - ? new Array(bucketsToPadBeforeFirstBucket).fill(0) - : []), - ...histogram.buckets.map((bucket) => bucket.doc_count), - ...(bucketsToPadAfterLast > 0 ? new Array(bucketsToPadAfterLast).fill(0) : []), - ]; + const lastBucket = histogram.buckets[histogram.buckets.length - 1].key; + const bucketsToPadBeforeFirstBucket = calculateBucketsBetween(from, firstBucket, pollInterval); + + const bucketsToPadAfterLast = calculateBucketsBetween( + lastBucket + pollInterval, + to, + pollInterval + ); + + return estimateRecurringTaskScheduling( + [ + ...bucketsToPadBeforeFirstBucket, + ...histogram.buckets.map(countByIntervalInBucket), + ...bucketsToPadAfterLast, + ], + pollInterval + ); } return new Array(scheduleDensityBuckets).fill(0); } -function bucketsBetween(from: number, to: number, interval: number) { +function countByIntervalInBucket(bucket: ScheduledIntervals): IntervalTaskCountTouple { + if (bucket.doc_count === 0) { + return { nonRecurring: 0, key: bucket.key }; + } + const recurring: Array<[number, string]> = []; + let nonRecurring = bucket.doc_count; + for (const intervalBucket of bucket.interval.buckets) { + recurring.push([intervalBucket.doc_count, intervalBucket.key as string]); + nonRecurring -= intervalBucket.doc_count; + } + + return { nonRecurring, recurring, key: bucket.key }; +} + +function calculateBucketsBetween( + from: number, + to: number, + interval: number, + bucketInterval: number = interval +): Array<{ key: number }> { + // as task interval might not divide by the pollInterval (aka the bucket interval) + // we have to adjust for the "drift" that occurs when estimating when the next + // bucket the task might actually get scheduled in + const actualInterval = Math.ceil(interval / bucketInterval) * bucketInterval; + + const buckets: Array<{ key: number }> = []; let fromBound = from; - let count = 0; - while (fromBound <= to) { - fromBound += interval; - count++; + while (fromBound < to) { + buckets.push({ key: fromBound }); + fromBound += actualInterval; } - return count; + + return buckets; +} + +export function estimateRecurringTaskScheduling( + scheduleDensity: IntervalTaskCountTouple[], + pollInterval: number +) { + const lastKey = scheduleDensity[scheduleDensity.length - 1].key; + + return scheduleDensity.map((bucket, currentBucketIndex) => { + for (const [count, interval] of bucket.recurring ?? []) { + for (const recurrance of calculateBucketsBetween( + bucket.key, + // `calculateBucketsBetween` uses the `to` as a non-inclusive upper bound + // but lastKey is a bucket we wish to include + lastKey + pollInterval, + parseIntervalAsMillisecond(interval), + pollInterval + )) { + const recurranceBucketIndex = + currentBucketIndex + Math.ceil((recurrance.key - bucket.key) / pollInterval); + + if (recurranceBucketIndex < scheduleDensity.length) { + scheduleDensity[recurranceBucketIndex].nonRecurring = + count + (scheduleDensity[recurranceBucketIndex].nonRecurring ?? 0); + } + } + } + return bucket.nonRecurring ?? 0; + }); } +// function estimateDriftInExecutionDueToPollInterval( +// scheduledExecutions: number[], +// pollInterval: number +// ) { +// const recuranceBeginsAt = scheduledExecutions[0]; +// let drift = 0; +// return scheduledExecutions.map((scheduledExecution, cycle) => { +// const estimatedExectionCycleTime = cycle * pollInterval; +// const estimatedExecution = scheduledExecution + drift; + +// drift = estimatedExectionCycleTime > estimatedExecution ? () +// // drift = (scheduledExecution - estimatedExecution) % pollInterval; + +// return estimatedExecution; +// }); +// } + export function summarizeWorkloadStat( workloadStats: WorkloadStat ): { value: WorkloadStat; status: HealthStatus } { From e47b6194b705464d99b2889560af3c6123eb38b5 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 12 Oct 2020 11:06:44 +0100 Subject: [PATCH 34/67] make hot stats refresh rate configurable --- .../task_manager/server/config.test.ts | 29 +++++ x-pack/plugins/task_manager/server/config.ts | 119 ++++++++++-------- x-pack/plugins/task_manager/server/plugin.ts | 5 +- 3 files changed, 101 insertions(+), 52 deletions(-) diff --git a/x-pack/plugins/task_manager/server/config.test.ts b/x-pack/plugins/task_manager/server/config.test.ts index f0c193763899..cb78bc794529 100644 --- a/x-pack/plugins/task_manager/server/config.test.ts +++ b/x-pack/plugins/task_manager/server/config.test.ts @@ -16,6 +16,7 @@ describe('config validation', () => { "max_poll_inactivity_cycles": 10, "max_workers": 10, "monitored_aggregated_stats_refresh_rate": 60000, + "monitored_stats_required_freshness": 4000, "monitored_stats_running_average_window": 50, "poll_interval": 3000, "request_capacity": 1000, @@ -33,4 +34,32 @@ describe('config validation', () => { `"[index]: \\".tasks\\" is an invalid Kibana Task Manager index, as it is already in use by the ElasticSearch Tasks Manager"` ); }); + + test('the required freshness of the monitored stats config must always be less-than-equal to the poll interval', () => { + const config: Record = { + monitored_stats_required_freshness: 100, + }; + expect(() => { + configSchema.validate(config); + }).toThrowErrorMatchingInlineSnapshot( + `"The specified monitored_stats_required_freshness (100) is invalid, as it is below the poll_interval (3000)"` + ); + }); + test('the default required freshness of the monitored stats is poll interval with a slight buffer', () => { + const config: Record = {}; + expect(configSchema.validate(config)).toMatchInlineSnapshot(` + Object { + "enabled": true, + "index": ".kibana_task_manager", + "max_attempts": 3, + "max_poll_inactivity_cycles": 10, + "max_workers": 10, + "monitored_aggregated_stats_refresh_rate": 60000, + "monitored_stats_required_freshness": 4000, + "monitored_stats_running_average_window": 50, + "poll_interval": 3000, + "request_capacity": 1000, + } + `); + }); }); diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index f2de10927371..de82a6ffa888 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -12,60 +12,79 @@ export const DEFAULT_MAX_POLL_INACTIVITY_CYCLES = 10; // Monitoring Constants // =================== -// Refresh "pull based" monitored stats at a default rate of once a minute +// Refresh aggregated monitored stats at a default rate of once a minute export const DEFAULT_MONITORING_REFRESH_RATE = 60 * 1000; export const DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW = 50; -export const configSchema = schema.object({ - enabled: schema.boolean({ defaultValue: true }), - /* The maximum number of times a task will be attempted before being abandoned as failed */ - max_attempts: schema.number({ - defaultValue: 3, - min: 1, - }), - /* How often, in milliseconds, the task manager will look for more work. */ - poll_interval: schema.number({ - defaultValue: DEFAULT_POLL_INTERVAL, - min: 100, - }), - /* How many poll interval cycles can work take before it's timed out. */ - max_poll_inactivity_cycles: schema.number({ - defaultValue: DEFAULT_MAX_POLL_INACTIVITY_CYCLES, - min: 1, - }), - /* How many requests can Task Manager buffer before it rejects new requests. */ - request_capacity: schema.number({ - // a nice round contrived number, feel free to change as we learn how it behaves - defaultValue: 1000, - min: 1, - }), - /* The name of the index used to store task information. */ - index: schema.string({ - defaultValue: '.kibana_task_manager', - validate: (val) => { - if (val.toLowerCase() === '.tasks') { - return `"${val}" is an invalid Kibana Task Manager index, as it is already in use by the ElasticSearch Tasks Manager`; +export const configSchema = schema.object( + { + enabled: schema.boolean({ defaultValue: true }), + /* The maximum number of times a task will be attempted before being abandoned as failed */ + max_attempts: schema.number({ + defaultValue: 3, + min: 1, + }), + /* How often, in milliseconds, the task manager will look for more work. */ + poll_interval: schema.number({ + defaultValue: DEFAULT_POLL_INTERVAL, + min: 100, + }), + /* How many poll interval cycles can work take before it's timed out. */ + max_poll_inactivity_cycles: schema.number({ + defaultValue: DEFAULT_MAX_POLL_INACTIVITY_CYCLES, + min: 1, + }), + /* How many requests can Task Manager buffer before it rejects new requests. */ + request_capacity: schema.number({ + // a nice round contrived number, feel free to change as we learn how it behaves + defaultValue: 1000, + min: 1, + }), + /* The name of the index used to store task information. */ + index: schema.string({ + defaultValue: '.kibana_task_manager', + validate: (val) => { + if (val.toLowerCase() === '.tasks') { + return `"${val}" is an invalid Kibana Task Manager index, as it is already in use by the ElasticSearch Tasks Manager`; + } + }, + }), + /* The maximum number of tasks that this Kibana instance will run simultaneously. */ + max_workers: schema.number({ + defaultValue: DEFAULT_MAX_WORKERS, + // disable the task manager rather than trying to specify it with 0 workers + min: 1, + }), + /* The rate at emit fresh monitored stats. By default we'll use the poll_interval (+ a slight buffer) */ + monitored_stats_required_freshness: schema.number({ + defaultValue: (config?: unknown) => + ((config as { poll_interval: number })?.poll_interval ?? DEFAULT_POLL_INTERVAL) + 1000, + min: 100, + }), + /* The rate at which we refresh monitored stats that require aggregation queries against ES. */ + monitored_aggregated_stats_refresh_rate: schema.number({ + defaultValue: DEFAULT_MONITORING_REFRESH_RATE, + /* don't run monitored stat aggregations any faster than once every 5 seconds */ + min: 5000, + }), + /* The size of the running average window for monitored stats. */ + monitored_stats_running_average_window: schema.number({ + defaultValue: DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW, + max: 100, + min: 10, + }), + }, + { + validate: (config) => { + if ( + config.monitored_stats_required_freshness && + config.poll_interval && + config.monitored_stats_required_freshness < config.poll_interval + ) { + return `The specified monitored_stats_required_freshness (${config.monitored_stats_required_freshness}) is invalid, as it is below the poll_interval (${config.poll_interval})`; } }, - }), - /* The maximum number of tasks that this Kibana instance will run simultaneously. */ - max_workers: schema.number({ - defaultValue: DEFAULT_MAX_WORKERS, - // disable the task manager rather than trying to specify it with 0 workers - min: 1, - }), - /* The rate at which we refresh monitored stats that require aggregation queries against ES. */ - monitored_aggregated_stats_refresh_rate: schema.number({ - defaultValue: DEFAULT_MONITORING_REFRESH_RATE, - /* don't run monitored stat aggregations any faster than once every 5 seconds */ - min: 5000, - }), - /* The size of the running average window for monitored stats. */ - monitored_stats_running_average_window: schema.number({ - defaultValue: DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW, - max: 100, - min: 10, - }), -}); + } +); export type TaskManagerConfig = TypeOf; diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index e93b639e2c8d..359e2ed3979b 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -55,8 +55,9 @@ export class TaskManagerPlugin router, this.taskManager.then((tm) => createMonitoringStats(tm, config, logger)), logger, - // if "hot" health stats are any more stale than the pollInterval (+1s buffer) consider the system unhealthy - config.poll_interval + 1000, + // if "hot" health stats are any more stale than monitored_stats_required_freshness (pollInterval +1s buffer by default) + // consider the system unhealthy + config.monitored_stats_required_freshness, // if "cold" health stats are any more stale than the configured refresh, consider the system unhealthy config.monitored_aggregated_stats_refresh_rate + 1000 ); From 7798875e52d54cb5fbd8a75740a99293598154bd Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 12 Oct 2020 12:06:00 +0100 Subject: [PATCH 35/67] ensure we dont aggregate workload before tm is readyt --- .../task_manager/server/monitoring/workload_statistics.ts | 6 +++++- x-pack/plugins/task_manager/server/routes/health.ts | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 8ab16eaf5fdc..82b61583e96e 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -5,7 +5,7 @@ */ import { timer } from 'rxjs'; -import { concatMap, map, catchError } from 'rxjs/operators'; +import { concatMap, map, filter, catchError } from 'rxjs/operators'; import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { keyBy, mapValues } from 'lodash'; @@ -108,6 +108,10 @@ export function createWorkloadAggregator( ); return timer(0, refreshInterval).pipe( + // Setup might occurr before Kibana is entirely setup + // To avoid erros due to ES not being ready, we'll wait until Start + // to begin polling for the workload + filter(() => taskManager.isStarted), concatMap(() => taskManager.aggregate({ aggs: { diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 2d255ab03550..9f84cc881d67 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -15,6 +15,7 @@ import { Observable, from } from 'rxjs'; import { take, mergeMap, map } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; import { isString } from 'lodash'; +import { JsonValue } from 'src/plugins/kibana_utils/common'; import { Logger, ServiceStatus, ServiceStatusLevels } from '../../../../../src/core/server'; import { MonitoringStats, @@ -148,7 +149,7 @@ function hasStatus(stats: RawMonitoringStats['stats'], status: HealthStatus): bo .includes(true); } -function getOldestTimestamp(...timestamps: unknown[]): number { +function getOldestTimestamp(...timestamps: Array): number { const validTimestamps = timestamps .map((timestamp) => (isString(timestamp) ? Date.parse(timestamp) : NaN)) .filter((timestamp) => !isNaN(timestamp)); From a47c7aa688b25eccfffb86df5eaaf394dcdba3fa Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 12 Oct 2020 15:45:32 +0100 Subject: [PATCH 36/67] fixed config mocks --- .../server/monitoring/monitoring_stats_stream.test.ts | 1 + x-pack/plugins/task_manager/server/task_manager.test.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts index 063947f2ecad..b8bcf15101d2 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts @@ -21,6 +21,7 @@ describe('createMonitoringStatsStream', () => { index: 'foo', max_attempts: 9, poll_interval: 6000000, + monitored_stats_required_freshness: 6000000, max_poll_inactivity_cycles: 10, request_capacity: 1000, monitored_aggregated_stats_refresh_rate: 5000, diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index 52a3beaf174d..f8e25edcc0ae 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -41,6 +41,7 @@ describe('TaskManager', () => { index: 'foo', max_attempts: 9, poll_interval: 6000000, + monitored_stats_required_freshness: 6000000, max_poll_inactivity_cycles: 10, monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_running_average_window: 50, From 689d0147c5be8e629e3501acd58c6ef7085484cf Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 12 Oct 2020 15:55:07 +0100 Subject: [PATCH 37/67] updated docs --- x-pack/plugins/task_manager/server/MONITORING.md | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md index 10bf60ad7e58..13120fb41b53 100644 --- a/x-pack/plugins/task_manager/server/MONITORING.md +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -9,16 +9,12 @@ There are three different sections to the stats returned by the `health` api. - `runtime`: Tracks Task Manager's performance. ### Configuring the Stats -There are two new configurations: +There are three new configurations: -- `xpack.task_manager.monitored_aggregated_stats_refresh_rate` - Dictates how often we refresh the "Cold" metrics. These metrics require an aggregation against Elasticsearch and adds load to the system, hence we want to limit how often we execute these. This covers the entire `workload` section of the stats. By default this is set to `60s` +- `xpack.task_manager.monitored_stats_required_freshness` - The _required freshness_ of critical "Hot" stats, which means that if key stats (last polling cycle time, for example) haven't been refreshed within the specified duration, the `_health` endpoint and service will report an `Error` status. By default this is inferred from the configured `poll_interval` and is set to `poll_interval` plus a `1s` buffer. +- `xpack.task_manager.monitored_aggregated_stats_refresh_rate` - Dictates how often we refresh the "Cold" metrics. These metrics require an aggregation against Elasticsearch and add load to the system, hence we want to limit how often we execute these. We also inffer the _required freshness_ of these "Cold" metrics from this configuration, which means that if these stats have not been updated within the required duration then the `_health` endpoint and service will report an `Error` status. This covers the entire `workload` section of the stats. By default this is configured to `60s`, and as a result the _required freshness_ defaults to `61s` (refresh plus a `1s` buffer). - `xpack.task_manager.monitored_stats_running_average_window`- Dictates the size of the window used to calculate the running average of various "Hot" stats, such as the time it takes to run a task, the _drift_ that tasks experience etc. These stats are collected throughout the lifecycle of tasks and this window will dictate how large the queue we keep in memory would be, and how many values we need to calculate the average against. We do not calculate the average on *every* new value, but rather only when the time comes to summarize the stats before logging them or returning them to the API endpoint. -Other configurations are inferred from existing config values. -For example: -- The _required freshness_ of critical "Hot" stats in always `pollingInterval + 1s`, which means that if key stats (last polling cycle time, for example) haven't been refreshed within the time scale of a single interval + 1s the stat will report an `Error` status. -- The _required freshness_ of critical "Cold" stats is `monitored_aggregated_stats_refresh_rate + 1s` , which means that if these stats (workload, for example) has not been updated within the required refresh rate then the api will return an `Error` status. - ## Consuming Health Stats Task Manager exposes a `/api/task_manager/_health` api which returns the _latest_ stats. Calling this API is designed to be fast and doesn't actually perform any checks- rather it returns the result of the latest stats in the system, and is design in such a way that you could call it from an external service on a regular basis without worrying that you'll be adding substantial load to the system. From e9db32e02fd371bf2bb580cb24d799e841e45ba5 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 12 Oct 2020 19:16:36 +0100 Subject: [PATCH 38/67] fixed bvucketing --- .../monitoring/workload_statistics.test.ts | 26 +++++++------- .../server/monitoring/workload_statistics.ts | 34 +++++++------------ 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index f3a0c8e7e1ae..1dbaf93eefb9 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -212,7 +212,9 @@ describe('Workload Statistics Aggregator', () => { doc_count: 6, }, scheduleDensity: { - buckets: [mockHistogram(0, 7 * 3000, 60 * 1000, 3000, [2, 2, 5, 0, 0, 0, 0, 0, 0, 1])], + buckets: [ + mockHistogram(0, 7 * 3000 + 500, 60 * 1000, 3000, [2, 2, 5, 0, 0, 0, 0, 0, 0, 1]), + ], }, }, }, @@ -524,10 +526,10 @@ describe('padBuckets', () => { expect( padBuckets(10, 3000, { key: '2020-10-02T19:47:28.128Z-2020-10-02T19:48:28.128Z', - from: 1601668048128, - from_as_string: '2020-10-02T19:47:28.128Z', - to: 1601668075128, - to_as_string: '2020-10-02T19:47:55.128Z', + from: 1601668046000, + from_as_string: '2020-10-02T19:47:26.000Z', + to: 1601668076000, + to_as_string: '2020-10-02T19:47:56.000Z', doc_count: 3, histogram: { buckets: [ @@ -595,10 +597,10 @@ describe('padBuckets', () => { expect( padBuckets(10, 3000, { key: '2020-10-02T20:39:45.793Z-2020-10-02T20:40:14.793Z', - from: 1.601671185793e12, - from_as_string: '2020-10-02T20:39:45.793Z', - to: 1.601671214793e12, - to_as_string: '2020-10-02T20:40:14.793Z', + from: 1601671183000, + from_as_string: '2020-10-02T20:39:43.000Z', + to: 1601671213000, + to_as_string: '2020-10-02T20:40:13.000Z', doc_count: 2, histogram: { buckets: [ @@ -626,8 +628,8 @@ describe('padBuckets', () => { key: '2020-10-02T20:39:45.793Z-2020-10-02T20:40:14.793Z', from: 1601671185793, from_as_string: '2020-10-02T20:39:45.793Z', - to: 1601671242793, - to_as_string: '2020-10-02T20:40:42.793Z', + to: 1601671245793, + to_as_string: '2020-10-02T20:40:45.793Z', doc_count: 2, histogram: { buckets: [ @@ -646,7 +648,7 @@ describe('padBuckets', () => { ], }, }) - ).toEqual([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); + ).toEqual([0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); }); }); diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 82b61583e96e..fcf7a0cca87d 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -249,8 +249,8 @@ export function padBuckets( const { histogram, from, to } = scheduleDensity; const firstBucket = histogram.buckets[0].key; const lastBucket = histogram.buckets[histogram.buckets.length - 1].key; - const bucketsToPadBeforeFirstBucket = calculateBucketsBetween(from, firstBucket, pollInterval); + const bucketsToPadBeforeFirstBucket = calculateBucketsBetween(firstBucket, from, pollInterval); const bucketsToPadAfterLast = calculateBucketsBetween( lastBucket + pollInterval, to, @@ -289,19 +289,28 @@ function calculateBucketsBetween( interval: number, bucketInterval: number = interval ): Array<{ key: number }> { + const calcForwardInTime = from < to; + // as task interval might not divide by the pollInterval (aka the bucket interval) // we have to adjust for the "drift" that occurs when estimating when the next // bucket the task might actually get scheduled in const actualInterval = Math.ceil(interval / bucketInterval) * bucketInterval; const buckets: Array<{ key: number }> = []; - let fromBound = from; - while (fromBound < to) { + const toBound = calcForwardInTime ? to : -(to + actualInterval); + let fromBound = calcForwardInTime ? from : -from; + + while (fromBound < toBound) { buckets.push({ key: fromBound }); fromBound += actualInterval; } - return buckets; + return calcForwardInTime + ? buckets + : buckets.reverse().map((bucket) => { + bucket.key = Math.abs(bucket.key); + return bucket; + }); } export function estimateRecurringTaskScheduling( @@ -333,23 +342,6 @@ export function estimateRecurringTaskScheduling( }); } -// function estimateDriftInExecutionDueToPollInterval( -// scheduledExecutions: number[], -// pollInterval: number -// ) { -// const recuranceBeginsAt = scheduledExecutions[0]; -// let drift = 0; -// return scheduledExecutions.map((scheduledExecution, cycle) => { -// const estimatedExectionCycleTime = cycle * pollInterval; -// const estimatedExecution = scheduledExecution + drift; - -// drift = estimatedExectionCycleTime > estimatedExecution ? () -// // drift = (scheduledExecution - estimatedExecution) % pollInterval; - -// return estimatedExecution; -// }); -// } - export function summarizeWorkloadStat( workloadStats: WorkloadStat ): { value: WorkloadStat; status: HealthStatus } { From 5ff008dad810e137b26b3f23f1e837478ef594f5 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 13 Oct 2020 10:43:01 +0100 Subject: [PATCH 39/67] renamed schedule density --- x-pack/plugins/task_manager/server/MONITORING.md | 2 +- .../monitoring/workload_statistics.test.ts | 2 +- .../server/monitoring/workload_statistics.ts | 4 ++-- x-pack/plugins/task_manager/server/plugin.ts | 1 + .../task_manager/server/routes/health.test.ts | 16 +++++++++++----- .../plugins/task_manager/server/routes/health.ts | 5 +++-- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md index 13120fb41b53..eaa9013ccad3 100644 --- a/x-pack/plugins/task_manager/server/MONITORING.md +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -150,7 +150,7 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g Here we see that on the 3rd polling interval from *now* (which is ~9 seconds from now, as pollInterval is `3s`) there is one task due to run. We also see that there are 5 due two intervals later, which is fine as we have a max workers of `10` */ - "scheduleDensity": [0, 0, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + "estimatedScheduleDensity": [0, 0, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } }, "runtime": { diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index 1dbaf93eefb9..7037205b63c5 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -274,7 +274,7 @@ describe('Workload Statistics Aggregator', () => { // 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57 // [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 ] // Above you see each bucket and the number of scheduled tasks we expect to have in them - scheduleDensity: [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], + estimatedScheduleDensity: [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], }); resolve(); }); diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index fcf7a0cca87d..eff110bbf6de 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -32,7 +32,7 @@ export interface WorkloadStat extends JsonObject { taskTypes: TaskTypeStat; schedule: Array<[string, number]>; overdue: number; - scheduleDensity: number[]; + estimatedScheduleDensity: number[]; } export interface WorkloadAggregation { @@ -219,7 +219,7 @@ export function createWorkloadAggregator( ) .map((schedule) => [schedule.key as string, schedule.doc_count]), overdue, - scheduleDensity: padBuckets(scheduleDensityBuckets, pollInterval, scheduleDensity), + estimatedScheduleDensity: padBuckets(scheduleDensityBuckets, pollInterval, scheduleDensity), }; return { key: 'workload', diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 359e2ed3979b..dc293291ac6f 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -55,6 +55,7 @@ export class TaskManagerPlugin router, this.taskManager.then((tm) => createMonitoringStats(tm, config, logger)), logger, + this.taskManagerId, // if "hot" health stats are any more stale than monitored_stats_required_freshness (pollInterval +1s buffer by default) // consider the system unhealthy config.monitored_stats_required_freshness, diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 6b783b915009..4089f4d37c02 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -7,6 +7,7 @@ import { Observable, of, Subject } from 'rxjs'; import { take } from 'rxjs/operators'; import { merge } from 'lodash'; +import uuid from 'uuid'; import { httpServiceMock } from 'src/core/server/mocks'; import { healthRoute } from './health'; import { mockHandlerArguments } from './_mock_handler_arguments'; @@ -22,7 +23,7 @@ describe('healthRoute', () => { it('registers the route', async () => { const router = httpServiceMock.createRouter(); - healthRoute(router, Promise.resolve(of()), mockLogger(), 1000, 1000); + healthRoute(router, Promise.resolve(of()), mockLogger(), uuid.v4(), 1000, 1000); const [config] = router.get.mock.calls[0]; @@ -41,7 +42,8 @@ describe('healthRoute', () => { const stats = Promise.resolve(new Subject()); - healthRoute(router, stats, logger, 1000, 60000); + const id = uuid.v4(); + healthRoute(router, stats, logger, id, 1000, 60000); const stats$ = await stats; @@ -53,6 +55,7 @@ describe('healthRoute', () => { const firstDebug = JSON.parse(logger.debug.mock.calls[0][0]); expect(firstDebug).toMatchObject({ + id, timestamp: expect.any(String), status: expect.any(String), ...summarizeMonitoringStats(mockStat), @@ -60,11 +63,13 @@ describe('healthRoute', () => { const secondDebug = JSON.parse(logger.debug.mock.calls[1][0]); expect(secondDebug).not.toMatchObject({ + id, timestamp: expect.any(String), status: expect.any(String), ...summarizeMonitoringStats(skippedMockStat), }); expect(secondDebug).toMatchObject({ + id, timestamp: expect.any(String), status: expect.any(String), ...summarizeMonitoringStats(nextMockStat), @@ -84,6 +89,7 @@ describe('healthRoute', () => { router, Promise.resolve(of(mockStat)), mockLogger(), + uuid.v4(), 1000, 60000 ); @@ -162,7 +168,7 @@ describe('healthRoute', () => { }, }, }); - healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 5000, 60000); + healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), uuid.v4(), 5000, 60000); const [, handler] = router.get.mock.calls[0]; @@ -213,7 +219,7 @@ describe('healthRoute', () => { }, }, }); - healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), 1000, 60000); + healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), uuid.v4(), 1000, 60000); const [, handler] = router.get.mock.calls[0]; @@ -277,7 +283,7 @@ function mockHealthStats(overrides = {}) { }, schedule: {}, overdue: 0, - scheduleDensity: [], + estimatedScheduleDensity: [], }, }, runtime: { diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 9f84cc881d67..d0df07af4f1b 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -24,7 +24,7 @@ import { RawMonitoringStats, } from '../monitoring'; -type MonitoredHealth = RawMonitoringStats & { status: HealthStatus; timestamp: string }; +type MonitoredHealth = RawMonitoringStats & { id: string; status: HealthStatus; timestamp: string }; const LEVEL_SUMMARY = { [ServiceStatusLevels.available.toString()]: 'Task Manager is healthy', @@ -36,6 +36,7 @@ export function healthRoute( router: IRouter, monitoringStats: Promise>, logger: Logger, + taskManagerId: string, requiredHotStatsFreshness: number, requiredColdStatsFreshness: number ): Observable { @@ -56,7 +57,7 @@ export function healthRoute( : hasStatus(summarizedStats.stats, HealthStatus.Warning) ? HealthStatus.Warning : HealthStatus.OK; - return { timestamp, status: healthStatus, ...summarizedStats }; + return { id: taskManagerId, timestamp, status: healthStatus, ...summarizedStats }; } // Only calculate the summerized stats (calculates all runnign averages and evaluates state) From 9d9205504ddd9b7e9cebde1f7437a563bb9f41d3 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 13 Oct 2020 12:54:22 +0100 Subject: [PATCH 40/67] changed to p metrics --- .../monitoring/task_run_calcultors.test.ts | 12 ++- .../server/monitoring/task_run_calcultors.ts | 12 ++- .../monitoring/task_run_statistics.test.ts | 12 ++- .../server/monitoring/workload_statistics.ts | 9 +- .../task_manager/server/routes/health.test.ts | 63 ++++++++----- .../task_manager/server/routes/health.ts | 88 ++++++++++--------- .../test_suites/task_manager/health_route.ts | 20 +++-- 7 files changed, 128 insertions(+), 88 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts index e2994dd1098f..eb8cabd9f3a8 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.test.ts @@ -15,10 +15,14 @@ import { describe('calculateRunningAverage', () => { test('calculates the running average and median of a window of values', async () => { - expect(calculateRunningAverage([2, 2, 4, 6, 6])).toEqual({ - mean: 4, - median: 4, - }); + expect(calculateRunningAverage([2, 2, 4, 6, 6])).toMatchInlineSnapshot(` + Object { + "p50": 4, + "p90": 6, + "p95": 6, + "p99": 6, + } + `); }); }); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts index bda99e8735eb..524615058427 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts @@ -9,14 +9,18 @@ import { JsonObject } from 'src/plugins/kibana_utils/common'; import { isUndefined, countBy, mapValues } from 'lodash'; export interface AveragedStat extends JsonObject { - mean: number; - median: number; + p50: number; + p90: number; + p95: number; + p99: number; } export function calculateRunningAverage(values: number[]): AveragedStat { return { - mean: Math.round(stats.mean(values)), - median: stats.median(values), + p50: stats.percentile(values, 0.5), + p90: stats.percentile(values, 0.9), + p95: stats.percentile(values, 0.95), + p99: stats.percentile(values, 0.99), }; } diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index 247f78808e62..4cc82160f580 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -49,8 +49,10 @@ describe('Task Run Statistics', () => { window: number[] ) { expect(taskStat.value.drift).toMatchObject({ - mean: Math.round(stats.mean(window)), - median: stats.median(window), + p50: stats.percentile(window, 0.5), + p90: stats.percentile(window, 0.9), + p95: stats.percentile(window, 0.95), + p99: stats.percentile(window, 0.99), }); } @@ -111,8 +113,10 @@ describe('Task Run Statistics', () => { ) { for (const [type, window] of Object.entries(windows)) { expect(taskStat.value.execution.duration[type]).toMatchObject({ - mean: Math.round(stats.mean(window)), - median: stats.median(window), + p50: stats.percentile(window, 0.5), + p90: stats.percentile(window, 0.9), + p95: stats.percentile(window, 0.95), + p99: stats.percentile(window, 0.99), }); } } diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index eff110bbf6de..7c69f250e7f5 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -5,7 +5,7 @@ */ import { timer } from 'rxjs'; -import { concatMap, map, filter, catchError } from 'rxjs/operators'; +import { mergeMap, map, filter, catchError } from 'rxjs/operators'; import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { keyBy, mapValues } from 'lodash'; @@ -112,7 +112,7 @@ export function createWorkloadAggregator( // To avoid erros due to ES not being ready, we'll wait until Start // to begin polling for the workload filter(() => taskManager.isStarted), - concatMap(() => + mergeMap(() => taskManager.aggregate({ aggs: { taskType: { @@ -136,7 +136,10 @@ export function createWorkloadAggregator( range: { field: 'task.runAt', ranges: [ - { from: `now`, to: `now+${asInterval(scheduleDensityBuckets * pollInterval)}` }, + { + from: `now`, + to: `now+${asInterval(scheduleDensityBuckets * pollInterval)}`, + }, ], }, aggs: { diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 4089f4d37c02..b36d3cd2447f 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -81,24 +81,30 @@ describe('healthRoute', () => { it('returns a error status if the overall stats have not been updated within the required hot freshness', async () => { const router = httpServiceMock.createRouter(); - const mockStat = mockHealthStats({ - lastUpdate: new Date(Date.now() - 1500).toISOString(), - }); + const stats$ = new Subject(); const serviceStatus$ = healthRoute( router, - Promise.resolve(of(mockStat)), + Promise.resolve(stats$), mockLogger(), uuid.v4(), 1000, 60000 ); + const serviceStatus = getLatest(serviceStatus$); + const [, handler] = router.get.mock.calls[0]; const [context, req, res] = mockHandlerArguments({}, {}, ['ok', 'internalError']); - await sleep(2000); + await sleep(0); + + stats$.next( + mockHealthStats({ + lastUpdate: new Date(Date.now() - 1500).toISOString(), + }) + ); expect(await handler(context, req, res)).toMatchObject({ body: { @@ -127,7 +133,7 @@ describe('healthRoute', () => { }, }); - expect(await getLatest(serviceStatus$)).toMatchObject({ + expect(await serviceStatus).toMatchObject({ level: ServiceStatusLevels.unavailable, summary: 'Task Manager is unavailable', meta: { @@ -160,15 +166,22 @@ describe('healthRoute', () => { it('returns a error status if the workload stats have not been updated within the required cold freshness', async () => { const router = httpServiceMock.createRouter(); + const stats$ = new Subject(); + + healthRoute(router, Promise.resolve(stats$), mockLogger(), uuid.v4(), 5000, 60000); + + await sleep(0); + const lastUpdateOfWorkload = new Date(Date.now() - 120000).toISOString(); - const mockStat = mockHealthStats({ - stats: { - workload: { - timestamp: lastUpdateOfWorkload, + stats$.next( + mockHealthStats({ + stats: { + workload: { + timestamp: lastUpdateOfWorkload, + }, }, - }, - }); - healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), uuid.v4(), 5000, 60000); + }) + ); const [, handler] = router.get.mock.calls[0]; @@ -207,19 +220,25 @@ describe('healthRoute', () => { it('returns a error status if the poller hasnt polled within the required hot freshness', async () => { const router = httpServiceMock.createRouter(); + const stats$ = new Subject(); + healthRoute(router, Promise.resolve(stats$), mockLogger(), uuid.v4(), 1000, 60000); + + await sleep(0); + const lastSuccessfulPoll = new Date(Date.now() - 2000).toISOString(); - const mockStat = mockHealthStats({ - stats: { - runtime: { - value: { - polling: { - lastSuccessfulPoll, + stats$.next( + mockHealthStats({ + stats: { + runtime: { + value: { + polling: { + lastSuccessfulPoll, + }, }, }, }, - }, - }); - healthRoute(router, Promise.resolve(of(mockStat)), mockLogger(), uuid.v4(), 1000, 60000); + }) + ); const [, handler] = router.get.mock.calls[0]; diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index d0df07af4f1b..471b3f060086 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -11,8 +11,8 @@ import { IKibanaResponse, KibanaResponseFactory, } from 'kibana/server'; -import { Observable, from } from 'rxjs'; -import { take, mergeMap, map } from 'rxjs/operators'; +import { Observable, from, Subject } from 'rxjs'; +import { take, mergeMap, tap, map } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; import { isString } from 'lodash'; import { JsonValue } from 'src/plugins/kibana_utils/common'; @@ -43,7 +43,6 @@ export function healthRoute( function calculateStatus(monitoredStats: MonitoringStats): MonitoredHealth { const now = Date.now(); const timestamp = new Date(now).toISOString(); - const summarizedStats = summarizeMonitoringStats(monitoredStats); /** @@ -60,21 +59,30 @@ export function healthRoute( return { id: taskManagerId, timestamp, status: healthStatus, ...summarizedStats }; } - // Only calculate the summerized stats (calculates all runnign averages and evaluates state) - // when needed by throttling down to the requiredHotStatsFreshness - const throttledMonitoredStats$ = from(monitoringStats).pipe( - mergeMap((monitoringStats$) => - monitoringStats$.pipe( - throttleTime(requiredHotStatsFreshness), - map((stats) => calculateStatus(stats)) - ) - ) - ); + const serviceStatus$: Subject = new Subject(); + + /* keep track of last health summary, as we'll return that to the next call to _health */ + let lastMonitoredStats: MonitoringStats | null = null; /* Log Task Manager stats as a Debug log line at a fixed interval */ - throttledMonitoredStats$.subscribe((stats) => { - logger.debug(JSON.stringify(stats)); - }); + from(monitoringStats) + .pipe( + mergeMap((monitoringStats$) => + monitoringStats$.pipe( + throttleTime(requiredHotStatsFreshness), + tap((stats) => { + lastMonitoredStats = stats; + }), + // Only calculate the summerized stats (calculates all runnign averages and evaluates state) + // when needed by throttling down to the requiredHotStatsFreshness + map((stats) => withServiceStatus(calculateStatus(stats))) + ) + ) + ) + .subscribe(([monitoredHealth, serviceStatus]) => { + serviceStatus$.next(serviceStatus); + logger.debug(JSON.stringify(monitoredHealth)); + }); router.get( { @@ -87,32 +95,32 @@ export function healthRoute( res: KibanaResponseFactory ): Promise { return res.ok({ - body: calculateStatus(await getLatestStats(await monitoringStats)), + body: lastMonitoredStats + ? calculateStatus(lastMonitoredStats) + : { id: taskManagerId, timestamp: new Date().toISOString(), status: HealthStatus.Error }, }); } ); - - return asServiceStatus(throttledMonitoredStats$); + return serviceStatus$; } -export function asServiceStatus( - monitoredHealth$: Observable -): Observable { - return monitoredHealth$.pipe( - map((monitoredHealth) => { - const level = - monitoredHealth.status === HealthStatus.OK - ? ServiceStatusLevels.available - : monitoredHealth.status === HealthStatus.Warning - ? ServiceStatusLevels.degraded - : ServiceStatusLevels.unavailable; - return { - level, - summary: LEVEL_SUMMARY[level.toString()], - meta: monitoredHealth, - }; - }) - ); +export function withServiceStatus( + monitoredHealth: MonitoredHealth +): [MonitoredHealth, ServiceStatus] { + const level = + monitoredHealth.status === HealthStatus.OK + ? ServiceStatusLevels.available + : monitoredHealth.status === HealthStatus.Warning + ? ServiceStatusLevels.degraded + : ServiceStatusLevels.unavailable; + return [ + monitoredHealth, + { + level, + summary: LEVEL_SUMMARY[level.toString()], + meta: monitoredHealth, + }, + ]; } /** @@ -156,9 +164,3 @@ function getOldestTimestamp(...timestamps: Array): number .filter((timestamp) => !isNaN(timestamp)); return validTimestamps.length ? Math.min(...validTimestamps) : 0; } - -async function getLatestStats(monitoringStats$: Observable) { - return new Promise((resolve) => - monitoringStats$.pipe(take(1)).subscribe((stats) => resolve(stats)) - ); -} diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index 88e591066147..270371acdcbf 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -26,7 +26,7 @@ interface MonitoringStats { taskTypes: Record; schedule: Array<[string, number]>; overdue: number; - scheduleDensity: number[]; + estimatedScheduleDensity: number[]; }; }; runtime: { @@ -148,11 +148,11 @@ export default function ({ getService }: FtrProviderContext) { expect(typeof workload.overdue).to.eql('number'); - expect(Array.isArray(workload.scheduleDensity)).to.eql(true); + expect(Array.isArray(workload.estimatedScheduleDensity)).to.eql(true); // test run with the default poll_interval of 3s and a monitored_aggregated_stats_refresh_rate of 5s, - // so we expect the scheduleDensity to span a minute (which means 20 buckets, as 60s / 3s = 20) - expect(workload.scheduleDensity.length).to.eql(20); + // so we expect the estimatedScheduleDensity to span a minute (which means 20 buckets, as 60s / 3s = 20) + expect(workload.estimatedScheduleDensity.length).to.eql(20); }); it('should return the task manager runtime stats', async () => { @@ -172,11 +172,15 @@ export default function ({ getService }: FtrProviderContext) { expect(typeof polling.resultFrequency.RanOutOfCapacity).to.eql('number'); expect(typeof polling.resultFrequency.PoolFilled).to.eql('number'); - expect(typeof drift.mean).to.eql('number'); - expect(typeof drift.median).to.eql('number'); + expect(typeof drift.p50).to.eql('number'); + expect(typeof drift.p90).to.eql('number'); + expect(typeof drift.p95).to.eql('number'); + expect(typeof drift.p99).to.eql('number'); - expect(typeof execution.duration.sampleTask.mean).to.eql('number'); - expect(typeof execution.duration.sampleTask.median).to.eql('number'); + expect(typeof execution.duration.sampleTask.p50).to.eql('number'); + expect(typeof execution.duration.sampleTask.p90).to.eql('number'); + expect(typeof execution.duration.sampleTask.p95).to.eql('number'); + expect(typeof execution.duration.sampleTask.p99).to.eql('number'); expect(typeof execution.resultFrequency.sampleTask.Success).to.eql('number'); expect(typeof execution.resultFrequency.sampleTask.RetryScheduled).to.eql('number'); From ac27d4fcc8251477ee0741407e8de4b8998322fd Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 13 Oct 2020 13:49:18 +0100 Subject: [PATCH 41/67] removed unused import --- x-pack/plugins/task_manager/server/routes/health.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 471b3f060086..c31ce6f484c2 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -12,7 +12,7 @@ import { KibanaResponseFactory, } from 'kibana/server'; import { Observable, from, Subject } from 'rxjs'; -import { take, mergeMap, tap, map } from 'rxjs/operators'; +import { mergeMap, tap, map } from 'rxjs/operators'; import { throttleTime } from 'rxjs/operators'; import { isString } from 'lodash'; import { JsonValue } from 'src/plugins/kibana_utils/common'; From 311103e30d33464f5cd79d8abc02574af3c58b14 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 13 Oct 2020 18:00:02 +0100 Subject: [PATCH 42/67] extract task dictionary from TM --- .../managed_configuration.test.ts | 5 +- .../server/lib/sanitize_task_definitions.ts | 25 ----- x-pack/plugins/task_manager/server/plugin.ts | 33 ++++--- x-pack/plugins/task_manager/server/task.ts | 7 -- .../task_manager/server/task_manager.test.ts | 15 ++- .../task_manager/server/task_manager.ts | 28 +----- .../task_manager/server/task_runner.ts | 8 +- .../task_manager/server/task_store.test.ts | 99 ++++++++++--------- .../plugins/task_manager/server/task_store.ts | 22 ++--- ...s.test.ts => task_type_dictionary.test.ts} | 9 +- .../server/task_type_dictionary.ts | 90 +++++++++++++++++ 11 files changed, 195 insertions(+), 146 deletions(-) delete mode 100644 x-pack/plugins/task_manager/server/lib/sanitize_task_definitions.ts rename x-pack/plugins/task_manager/server/{lib/sanitize_task_definitions.test.ts => task_type_dictionary.test.ts} (89%) create mode 100644 x-pack/plugins/task_manager/server/task_type_dictionary.ts diff --git a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts index 443c81146900..9f80233a8b91 100644 --- a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts @@ -7,6 +7,7 @@ import sinon from 'sinon'; import { mockLogger } from '../test_utils'; import { TaskManager } from '../task_manager'; +import { TaskTypeDictionary } from '../task_type_dictionary'; import { savedObjectsRepositoryMock } from '../../../../../src/core/server/mocks'; import { SavedObjectsSerializer, @@ -36,6 +37,7 @@ describe('managed configuration', () => { jest.resetAllMocks(); callAsInternalUser.mockResolvedValue({ total: 0, updated: 0, version_conflicts: 0 }); clock = sinon.useFakeTimers(); + const definitions = new TaskTypeDictionary(logger); taskManager = new TaskManager({ config, logger, @@ -43,8 +45,9 @@ describe('managed configuration', () => { callAsInternalUser, taskManagerId: 'some-uuid', savedObjectsRepository: savedObjectsClient, + definitions, }); - taskManager.registerTaskDefinitions({ + definitions.registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', diff --git a/x-pack/plugins/task_manager/server/lib/sanitize_task_definitions.ts b/x-pack/plugins/task_manager/server/lib/sanitize_task_definitions.ts deleted file mode 100644 index f5856aa6fac3..000000000000 --- a/x-pack/plugins/task_manager/server/lib/sanitize_task_definitions.ts +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -import Joi from 'joi'; -import { TaskDefinition, TaskDictionary, validateTaskDefinition } from '../task'; - -/** - * Sanitizes the system's task definitions. Task definitions have optional properties, and - * this ensures they all are given a reasonable default. - * - * @param taskDefinitions - The Kibana task definitions dictionary - */ -export function sanitizeTaskDefinitions( - taskDefinitions: TaskDictionary = {} -): TaskDictionary { - return Object.keys(taskDefinitions).reduce((acc, type) => { - const rawDefinition = taskDefinitions[type]; - rawDefinition.type = type; - acc[type] = Joi.attempt(rawDefinition, validateTaskDefinition) as TaskDefinition; - return acc; - }, {} as TaskDictionary); -} diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index d7dcf779376b..e2dcf462387c 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -3,19 +3,18 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -import { PluginInitializerContext, Plugin, CoreSetup, CoreStart } from 'src/core/server'; +import { PluginInitializerContext, Plugin, CoreSetup, Logger, CoreStart } from 'src/core/server'; import { Subject } from 'rxjs'; import { first } from 'rxjs/operators'; -import { TaskDictionary, TaskDefinition } from './task'; +import { TaskDefinition } from './task'; import { TaskManager } from './task_manager'; import { TaskManagerConfig } from './config'; import { Middleware } from './lib/middleware'; import { setupSavedObjects } from './saved_objects'; +import { TaskTypeDictionary } from './task_type_dictionary'; -export type TaskManagerSetupContract = Pick< - TaskManager, - 'addMiddleware' | 'registerTaskDefinitions' ->; +export type TaskManagerSetupContract = Pick & + Pick; export type TaskManagerStartContract = Pick< TaskManager, @@ -24,15 +23,17 @@ export type TaskManagerStartContract = Pick< export class TaskManagerPlugin implements Plugin { - legacyTaskManager$: Subject = new Subject(); - taskManager: Promise = this.legacyTaskManager$.pipe(first()).toPromise(); - currentConfig: TaskManagerConfig; - taskManagerId?: string; - config?: TaskManagerConfig; + private legacyTaskManager$: Subject = new Subject(); + private taskManager: Promise = this.legacyTaskManager$.pipe(first()).toPromise(); + private taskManagerId?: string; + private config?: TaskManagerConfig; + private logger: Logger; + private definitions: TaskTypeDictionary; constructor(private readonly initContext: PluginInitializerContext) { this.initContext = initContext; - this.currentConfig = {} as TaskManagerConfig; + this.logger = initContext.logger.get('taskManager'); + this.definitions = new TaskTypeDictionary(this.logger); } public async setup(core: CoreSetup): Promise { @@ -48,14 +49,13 @@ export class TaskManagerPlugin addMiddleware: (middleware: Middleware) => { this.taskManager.then((tm) => tm.addMiddleware(middleware)); }, - registerTaskDefinitions: (taskDefinition: TaskDictionary) => { - this.taskManager.then((tm) => tm.registerTaskDefinitions(taskDefinition)); + registerTaskDefinitions: (taskDefinition: Record) => { + this.definitions.registerTaskDefinitions(taskDefinition); }, }; } public start({ savedObjects, elasticsearch }: CoreStart): TaskManagerStartContract { - const logger = this.initContext.logger.get('taskManager'); const savedObjectsRepository = savedObjects.createInternalRepository(['task']); this.legacyTaskManager$.next( @@ -63,9 +63,10 @@ export class TaskManagerPlugin taskManagerId: this.taskManagerId!, config: this.config!, savedObjectsRepository, + definitions: this.definitions, serializer: savedObjects.createSerializer(), callAsInternalUser: elasticsearch.legacy.client.callAsInternalUser, - logger, + logger: this.logger, }) ); this.legacyTaskManager$.complete(); diff --git a/x-pack/plugins/task_manager/server/task.ts b/x-pack/plugins/task_manager/server/task.ts index 4cb080288741..83f73ca2f4ad 100644 --- a/x-pack/plugins/task_manager/server/task.ts +++ b/x-pack/plugins/task_manager/server/task.ts @@ -154,13 +154,6 @@ export const validateTaskDefinition = Joi.object({ getRetry: Joi.func().optional(), }).default(); -/** - * A dictionary mapping task types to their definitions. - */ -export interface TaskDictionary { - [taskType: string]: T; -} - export enum TaskStatus { Idle = 'idle', Claiming = 'claiming', diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index cf7f9e2a7cff..bb8b6f3e445a 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -27,6 +27,7 @@ import { mockLogger } from './test_utils'; import { asErr, asOk } from './lib/result_type'; import { ConcreteTaskInstance, TaskLifecycleResult, TaskStatus } from './task'; import { Middleware } from './lib/middleware'; +import { TaskTypeDictionary } from './task_type_dictionary'; const savedObjectsClient = savedObjectsRepositoryMock.create(); const serializer = new SavedObjectsSerializer(new SavedObjectTypeRegistry()); @@ -43,6 +44,8 @@ describe('TaskManager', () => { max_poll_inactivity_cycles: 10, request_capacity: 1000, }; + + const taskManagerLogger = mockLogger(); const taskManagerOpts = { config, savedObjectsRepository: savedObjectsClient, @@ -50,10 +53,12 @@ describe('TaskManager', () => { callAsInternalUser: jest.fn(), logger: mockLogger(), taskManagerId: 'some-uuid', + definitions: new TaskTypeDictionary(taskManagerLogger), }; beforeEach(() => { clock = sinon.useFakeTimers(); + taskManagerOpts.definitions = new TaskTypeDictionary(taskManagerLogger); }); afterEach(() => clock.restore()); @@ -71,7 +76,7 @@ describe('TaskManager', () => { test('allows and queues scheduling tasks before starting', async () => { const client = new TaskManager(taskManagerOpts); - client.registerTaskDefinitions({ + taskManagerOpts.definitions.registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', @@ -98,7 +103,7 @@ describe('TaskManager', () => { test('allows scheduling tasks after starting', async () => { const client = new TaskManager(taskManagerOpts); - client.registerTaskDefinitions({ + taskManagerOpts.definitions.registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', @@ -123,7 +128,7 @@ describe('TaskManager', () => { test('allows scheduling existing tasks that may have already been scheduled', async () => { const client = new TaskManager(taskManagerOpts); - client.registerTaskDefinitions({ + taskManagerOpts.definitions.registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', @@ -148,7 +153,7 @@ describe('TaskManager', () => { test('doesnt ignore failure to scheduling existing tasks for reasons other than already being scheduled', async () => { const client = new TaskManager(taskManagerOpts); - client.registerTaskDefinitions({ + taskManagerOpts.definitions.registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', @@ -175,7 +180,7 @@ describe('TaskManager', () => { test('doesnt allow naively rescheduling existing tasks that have already been scheduled', async () => { const client = new TaskManager(taskManagerOpts); - client.registerTaskDefinitions({ + taskManagerOpts.definitions.registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index cc611e124ea7..82c672df2466 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -33,11 +33,8 @@ import { } from './task_events'; import { fillPool, FillPoolResult } from './lib/fill_pool'; import { addMiddlewareToChain, BeforeSaveMiddlewareParams, Middleware } from './lib/middleware'; -import { sanitizeTaskDefinitions } from './lib/sanitize_task_definitions'; import { intervalFromNow } from './lib/intervals'; import { - TaskDefinition, - TaskDictionary, ConcreteTaskInstance, RunContext, TaskInstanceWithId, @@ -65,11 +62,13 @@ import { import { identifyEsError } from './lib/identify_es_error'; import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; import { BufferedTaskStore } from './buffered_task_store'; +import { TaskTypeDictionary } from './task_type_dictionary'; const VERSION_CONFLICT_STATUS = 409; export interface TaskManagerOpts { logger: Logger; + definitions: TaskTypeDictionary; config: TaskManagerConfig; callAsInternalUser: ILegacyScopedClusterClient['callAsInternalUser']; savedObjectsRepository: ISavedObjectsRepository; @@ -97,7 +96,7 @@ export type TaskLifecycleEvent = TaskMarkRunning | TaskRun | TaskClaim | TaskRun * The public interface into the task manager system. */ export class TaskManager { - private definitions: TaskDictionary = {}; + private definitions: TaskTypeDictionary; private store: TaskStore; private bufferedStore: BufferedTaskStore; @@ -138,6 +137,7 @@ export class TaskManager { this.logger.info(`TaskManager is identified by the Kibana UUID: ${taskManagerId}`); } + this.definitions = opts.definitions; this.store = new TaskStore({ serializer: opts.serializer, savedObjectsRepository: opts.savedObjectsRepository, @@ -284,26 +284,6 @@ export class TaskManager { } } - /** - * Method for allowing consumers to register task definitions into the system. - * @param taskDefinitions - The Kibana task definitions dictionary - */ - public registerTaskDefinitions(taskDefinitions: TaskDictionary) { - this.assertUninitialized('register task definitions', Object.keys(taskDefinitions).join(', ')); - const duplicate = Object.keys(taskDefinitions).find((k) => !!this.definitions[k]); - if (duplicate) { - throw new Error(`Task ${duplicate} is already defined!`); - } - - try { - const sanitized = sanitizeTaskDefinitions(taskDefinitions); - - Object.assign(this.definitions, sanitized); - } catch (e) { - this.logger.error('Could not sanitize task definitions'); - } - } - /** * Adds middleware to the task manager, such as adding security layers, loggers, etc. * diff --git a/x-pack/plugins/task_manager/server/task_runner.ts b/x-pack/plugins/task_manager/server/task_runner.ts index ebf13fac2f31..0516bfb70925 100644 --- a/x-pack/plugins/task_manager/server/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_runner.ts @@ -29,10 +29,10 @@ import { FailedRunResult, FailedTaskResult, TaskDefinition, - TaskDictionary, validateRunResult, TaskStatus, } from './task'; +import { TaskTypeDictionary } from './task_type_dictionary'; const defaultBackoffPerFailure = 5 * 60 * 1000; const EMPTY_RUN_RESULT: SuccessfulRunResult = {}; @@ -57,7 +57,7 @@ export interface Updatable { interface Opts { logger: Logger; - definitions: TaskDictionary; + definitions: TaskTypeDictionary; instance: ConcreteTaskInstance; store: Updatable; beforeRun: BeforeRunFunction; @@ -76,7 +76,7 @@ interface Opts { export class TaskManagerRunner implements TaskRunner { private task?: CancellableTask; private instance: ConcreteTaskInstance; - private definitions: TaskDictionary; + private definitions: TaskTypeDictionary; private logger: Logger; private bufferedTaskStore: Updatable; private beforeRun: BeforeRunFunction; @@ -129,7 +129,7 @@ export class TaskManagerRunner implements TaskRunner { * Gets the task defintion from the dictionary. */ public get definition() { - return this.definitions[this.taskType]; + return this.definitions.get(this.taskType); } /** diff --git a/x-pack/plugins/task_manager/server/task_store.test.ts b/x-pack/plugins/task_manager/server/task_store.test.ts index 5a3ee12d593c..5c1eeba223c4 100644 --- a/x-pack/plugins/task_manager/server/task_store.test.ts +++ b/x-pack/plugins/task_manager/server/task_store.test.ts @@ -11,14 +11,13 @@ import { filter, take, first } from 'rxjs/operators'; import { Option, some, none } from 'fp-ts/lib/Option'; import { - TaskDictionary, - TaskDefinition, TaskInstance, TaskStatus, TaskLifecycleResult, SerializedConcreteTaskInstance, ConcreteTaskInstance, } from './task'; +import { mockLogger } from './test_utils'; import { StoreOpts, OwnershipClaimingOpts, TaskStore, SearchOpts } from './task_store'; import { savedObjectsRepositoryMock } from 'src/core/server/mocks'; import { @@ -29,24 +28,7 @@ import { } from 'src/core/server'; import { asTaskClaimEvent, TaskEvent } from './task_events'; import { asOk, asErr } from './lib/result_type'; - -const taskDefinitions: TaskDictionary = { - report: { - type: 'report', - title: '', - createTaskRunner: jest.fn(), - }, - dernstraight: { - type: 'dernstraight', - title: '', - createTaskRunner: jest.fn(), - }, - yawn: { - type: 'yawn', - title: '', - createTaskRunner: jest.fn(), - }, -}; +import { TaskTypeDictionary } from './task_type_dictionary'; const savedObjectsClient = savedObjectsRepositoryMock.create(); const serializer = new SavedObjectsSerializer(new SavedObjectTypeRegistry()); @@ -64,6 +46,25 @@ const mockedDate = new Date('2019-02-12T21:01:22.479Z'); } }; +const taskDefinitions = new TaskTypeDictionary(mockLogger()); +taskDefinitions.registerTaskDefinitions({ + report: { + type: 'report', + title: 'report', + createTaskRunner: jest.fn(), + }, + dernstraight: { + type: 'dernstraight', + title: 'dernstraight', + createTaskRunner: jest.fn(), + }, + yawn: { + type: 'yawn', + title: 'yawn', + createTaskRunner: jest.fn(), + }, +}); + describe('TaskStore', () => { describe('schedule', () => { let store: TaskStore; @@ -335,6 +336,22 @@ describe('TaskStore', () => { test('it filters claimed tasks down by supported types, maxAttempts, status, and runAt', async () => { const maxAttempts = _.random(2, 43); const customMaxAttempts = _.random(44, 100); + + const definitions = new TaskTypeDictionary(mockLogger()); + definitions.registerTaskDefinitions({ + foo: { + type: 'foo', + title: 'foo', + createTaskRunner: jest.fn(), + }, + bar: { + type: 'bar', + title: 'bar', + maxAttempts: customMaxAttempts, + createTaskRunner: jest.fn(), + }, + }); + const { args: { updateByQuery: { @@ -344,19 +361,7 @@ describe('TaskStore', () => { } = await testClaimAvailableTasks({ opts: { maxAttempts, - definitions: { - foo: { - type: 'foo', - title: '', - createTaskRunner: jest.fn(), - }, - bar: { - type: 'bar', - title: '', - maxAttempts: customMaxAttempts, - createTaskRunner: jest.fn(), - }, - }, + definitions, }, claimingOpts: { claimOwnershipUntil: new Date(), size: 10 }, }); @@ -465,6 +470,20 @@ describe('TaskStore', () => { test('it supports claiming specific tasks by id', async () => { const maxAttempts = _.random(2, 43); const customMaxAttempts = _.random(44, 100); + const definitions = new TaskTypeDictionary(mockLogger()); + definitions.registerTaskDefinitions({ + foo: { + type: 'foo', + title: 'foo', + createTaskRunner: jest.fn(), + }, + bar: { + type: 'bar', + title: 'bar', + maxAttempts: customMaxAttempts, + createTaskRunner: jest.fn(), + }, + }); const { args: { updateByQuery: { @@ -474,19 +493,7 @@ describe('TaskStore', () => { } = await testClaimAvailableTasks({ opts: { maxAttempts, - definitions: { - foo: { - type: 'foo', - title: '', - createTaskRunner: jest.fn(), - }, - bar: { - type: 'bar', - title: '', - maxAttempts: customMaxAttempts, - createTaskRunner: jest.fn(), - }, - }, + definitions, }, claimingOpts: { claimOwnershipUntil: new Date(), diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index 15261be3d89a..8ffeceb8e561 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -27,8 +27,6 @@ import { asOk, asErr, Result } from './lib/result_type'; import { ConcreteTaskInstance, ElasticJs, - TaskDefinition, - TaskDictionary, TaskInstance, TaskLifecycle, TaskLifecycleResult, @@ -60,13 +58,14 @@ import { SortByRunAtAndRetryAt, tasksClaimedByOwner, } from './queries/mark_available_tasks_as_claimed'; +import { TaskTypeDictionary } from './task_type_dictionary'; export interface StoreOpts { callCluster: ElasticJs; index: string; taskManagerId: string; maxAttempts: number; - definitions: TaskDictionary; + definitions: TaskTypeDictionary; savedObjectsRepository: ISavedObjectsRepository; serializer: SavedObjectsSerializer; } @@ -124,7 +123,7 @@ export class TaskStore { public readonly errors$ = new Subject(); private callCluster: ElasticJs; - private definitions: TaskDictionary; + private definitions: TaskTypeDictionary; private savedObjectsRepository: ISavedObjectsRepository; private serializer: SavedObjectsSerializer; private events$: Subject; @@ -164,13 +163,7 @@ export class TaskStore { * @param task - The task being scheduled. */ public async schedule(taskInstance: TaskInstance): Promise { - if (!this.definitions[taskInstance.taskType]) { - throw new Error( - `Unsupported task type "${taskInstance.taskType}". Supported types are ${Object.keys( - this.definitions - ).join(', ')}` - ); - } + this.definitions.ensureHas(taskInstance.taskType); let savedObject; try { @@ -265,6 +258,9 @@ export class TaskStore { claimTasksById: OwnershipClaimingOpts['claimTasksById'], size: OwnershipClaimingOpts['size'] ): Promise { + const tasksWithRemainingAttempts = [...this.definitions].map(([type, { maxAttempts }]) => + taskWithLessThanMaxAttempts(type, maxAttempts || this.maxAttempts) + ); const queryForScheduledTasks = mustBeAllOf( // Either a task with idle status and runAt <= now or // status running or claiming with a retryAt <= now. @@ -272,9 +268,7 @@ export class TaskStore { // Either task has a schedule or the attempts < the maximum configured shouldBeOneOf( TaskWithSchedule, - ...Object.entries(this.definitions).map(([type, { maxAttempts }]) => - taskWithLessThanMaxAttempts(type, maxAttempts || this.maxAttempts) - ) + ...tasksWithRemainingAttempts ) ); diff --git a/x-pack/plugins/task_manager/server/lib/sanitize_task_definitions.test.ts b/x-pack/plugins/task_manager/server/task_type_dictionary.test.ts similarity index 89% rename from x-pack/plugins/task_manager/server/lib/sanitize_task_definitions.test.ts rename to x-pack/plugins/task_manager/server/task_type_dictionary.test.ts index 650eb36347c8..6afb2241b1ef 100644 --- a/x-pack/plugins/task_manager/server/lib/sanitize_task_definitions.test.ts +++ b/x-pack/plugins/task_manager/server/task_type_dictionary.test.ts @@ -5,8 +5,8 @@ */ import { get } from 'lodash'; -import { RunContext, TaskDictionary, TaskDefinition } from '../task'; -import { sanitizeTaskDefinitions } from './sanitize_task_definitions'; +import { RunContext, TaskDefinition } from './task'; +import { sanitizeTaskDefinitions } from './task_type_dictionary'; interface Opts { numTasks: number; @@ -35,10 +35,11 @@ const getMockTaskDefinitions = (opts: Opts) => { }, }; } - return (tasks as unknown) as TaskDictionary; + return (tasks as unknown) as Record; }; -describe('sanitizeTaskDefinitions', () => { +describe('taskTypeDictionary', () => { + describe('sanitizeTaskDefinitions', () => {}); it('provides tasks with defaults', () => { const taskDefinitions = getMockTaskDefinitions({ numTasks: 3 }); const result = sanitizeTaskDefinitions(taskDefinitions); diff --git a/x-pack/plugins/task_manager/server/task_type_dictionary.ts b/x-pack/plugins/task_manager/server/task_type_dictionary.ts new file mode 100644 index 000000000000..8feebaad3af0 --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_type_dictionary.ts @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +import { mapValues } from 'lodash'; +import Joi from 'joi'; +import { TaskDefinition, validateTaskDefinition } from './task'; +import { Logger } from './types'; + +/* + * The TaskManager is the public interface into the task manager system. This glues together + * all of the disparate modules in one integration point. The task manager operates in two different ways: + * + * - pre-init, it allows middleware registration, but disallows task manipulation + * - post-init, it disallows middleware registration, but allows task manipulation + * + * Due to its complexity, this is mostly tested by integration tests (see readme). + */ + +/** + * The public interface into the task manager system. + */ +export class TaskTypeDictionary { + private definitions = new Map(); + private logger: Logger; + + constructor(logger: Logger) { + this.logger = logger; + } + + [Symbol.iterator]() { + return this.definitions.entries(); + } + + public has(type: string) { + return this.definitions.has(type); + } + + public get(type: string): TaskDefinition { + this.ensureHas(type); + return this.definitions.get(type)!; + } + + public ensureHas(type: string) { + if (!this.has(type)) { + throw new Error( + `Unsupported task type "${type}". Supported types are ${[...this.definitions.keys()].join( + ', ' + )}` + ); + } + } + + /** + * Method for allowing consumers to register task definitions into the system. + * @param taskDefinitions - The Kibana task definitions dictionary + */ + public registerTaskDefinitions(taskDefinitions: Record) { + const duplicate = Object.keys(taskDefinitions).find((type) => this.definitions.has(type)); + if (duplicate) { + throw new Error(`Task ${duplicate} is already defined!`); + } + + try { + for (const [type, sanitizedDefinition] of Object.entries( + sanitizeTaskDefinitions(taskDefinitions) + )) { + this.definitions.set(type, sanitizedDefinition); + } + } catch (e) { + this.logger.error('Could not sanitize task definitions'); + } + } +} + +/** + * Sanitizes the system's task definitions. Task definitions have optional properties, and + * this ensures they all are given a reasonable default. + * + * @param taskDefinitions - The Kibana task definitions dictionary + */ +export function sanitizeTaskDefinitions( + taskDefinitions: Record = {} +): Record { + return mapValues(taskDefinitions, (rawDefinition, type) => { + rawDefinition.type = type; + return Joi.attempt(rawDefinition, validateTaskDefinition); + }); +} From 83968120f85c478f8c70b630b0a775416f049ba8 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 15 Oct 2020 11:10:13 +0100 Subject: [PATCH 43/67] fixed typing --- .../mark_available_tasks_as_claimed.test.ts | 10 +- .../task_manager/server/task_runner.test.ts | 109 ++++++++++++++---- 2 files changed, 94 insertions(+), 25 deletions(-) diff --git a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts index ac98fbbda5aa..024c006fdc05 100644 --- a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts +++ b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts @@ -23,11 +23,13 @@ import { SortByRunAtAndRetryAt, } from './mark_available_tasks_as_claimed'; -import { TaskDictionary, TaskDefinition } from '../task'; +import { TaskTypeDictionary } from '../task_type_dictionary'; +import { mockLogger } from '../test_utils'; describe('mark_available_tasks_as_claimed', () => { test('generates query matching tasks to be claimed when polling for tasks', () => { - const definitions: TaskDictionary = { + const definitions = new TaskTypeDictionary(mockLogger()); + definitions.registerTaskDefinitions({ sampleTask: { type: 'sampleTask', title: 'title', @@ -39,7 +41,7 @@ describe('mark_available_tasks_as_claimed', () => { title: 'title', createTaskRunner: () => ({ run: () => Promise.resolve() }), }, - }; + }); const defaultMaxAttempts = 1; const taskManagerId = '3478fg6-82374f6-83467gf5-384g6f'; const claimOwnershipUntil = '2019-02-12T21:01:22.479Z'; @@ -53,7 +55,7 @@ describe('mark_available_tasks_as_claimed', () => { // Either task has an schedule or the attempts < the maximum configured shouldBeOneOf( TaskWithSchedule, - ...Object.entries(definitions).map(([type, { maxAttempts }]) => + ...Array.from(definitions).map(([type, { maxAttempts }]) => taskWithLessThanMaxAttempts(type, maxAttempts || defaultMaxAttempts) ) ) diff --git a/x-pack/plugins/task_manager/server/task_runner.test.ts b/x-pack/plugins/task_manager/server/task_runner.test.ts index c3191dbb349e..733922256673 100644 --- a/x-pack/plugins/task_manager/server/task_runner.test.ts +++ b/x-pack/plugins/task_manager/server/task_runner.test.ts @@ -9,11 +9,12 @@ import sinon from 'sinon'; import { minutesFromNow } from './lib/intervals'; import { asOk, asErr } from './lib/result_type'; import { TaskEvent, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; -import { ConcreteTaskInstance, TaskStatus, TaskDictionary, TaskDefinition } from './task'; +import { ConcreteTaskInstance, TaskStatus, TaskDefinition, RunResult } from './task'; import { TaskManagerRunner } from './task_runner'; import { mockLogger } from './test_utils'; import { SavedObjectsErrorHelpers } from '../../../../src/core/server'; import moment from 'moment'; +import { TaskTypeDictionary } from './task_type_dictionary'; let fakeTimer: sinon.SinonFakeTimers; @@ -67,6 +68,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { throw new Error('Dangit!'); @@ -96,9 +99,11 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { - return; + return { state: {} }; }, }), }, @@ -124,10 +129,12 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `1m`, createTaskRunner: () => ({ async run() { - return; + return { state: {} }; }, }), }, @@ -150,10 +157,12 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `1m`, createTaskRunner: () => ({ async run() { - return; + return { state: {} }; }, }), }, @@ -171,9 +180,11 @@ describe('TaskManagerRunner', () => { const { runner, store } = testOpts({ definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { - return { runAt }; + return { runAt, state: {} }; }, }), }, @@ -194,9 +205,11 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { - return { runAt }; + return { runAt, state: {} }; }, }), }, @@ -218,6 +231,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { return undefined; @@ -238,6 +253,8 @@ describe('TaskManagerRunner', () => { const { runner, logger } = testOpts({ definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { const promise = new Promise((r) => setTimeout(r, 1000)); @@ -265,6 +282,8 @@ describe('TaskManagerRunner', () => { const { runner, logger } = testOpts({ definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ run: async () => undefined, }), @@ -291,6 +310,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `${timeoutMinutes}m`, createTaskRunner: () => ({ run: async () => undefined, @@ -325,6 +346,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', getRetry: getRetryStub, createTaskRunner: () => ({ async run() { @@ -356,6 +379,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', getRetry: getRetryStub, createTaskRunner: () => ({ async run() { @@ -388,6 +413,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', getRetry: getRetryStub, createTaskRunner: () => ({ async run() { @@ -421,6 +448,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', getRetry: getRetryStub, createTaskRunner: () => ({ async run() { @@ -456,6 +485,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `${timeoutMinutes}m`, getRetry: getRetryStub, createTaskRunner: () => ({ @@ -490,6 +521,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `${timeoutMinutes}m`, getRetry: getRetryStub, createTaskRunner: () => ({ @@ -522,6 +555,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `${timeoutMinutes}m`, getRetry: getRetryStub, createTaskRunner: () => ({ @@ -557,6 +592,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `${timeoutMinutes}m`, getRetry: getRetryStub, createTaskRunner: () => ({ @@ -592,6 +629,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `${timeoutMinutes}m`, getRetry: getRetryStub, createTaskRunner: () => ({ @@ -625,6 +664,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `${timeoutMinutes}m`, getRetry: getRetryStub, createTaskRunner: () => ({ @@ -655,6 +696,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', maxAttempts: 3, createTaskRunner: () => ({ run: async () => { @@ -688,6 +731,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', maxAttempts: 3, createTaskRunner: () => ({ run: async () => { @@ -720,8 +765,10 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `1m`, - getRetry: () => {}, + // getRetry: () => {}, createTaskRunner: () => ({ run: async () => undefined, }), @@ -748,8 +795,10 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', timeout: `1m`, - getRetry: () => {}, + // getRetry: () => {}, createTaskRunner: () => ({ run: async () => undefined, }), @@ -777,9 +826,11 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { - return {}; + return { state: {} }; }, }), }, @@ -803,9 +854,11 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { - return { runAt }; + return { runAt, state: {} }; }, }), }, @@ -828,6 +881,8 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { throw error; @@ -855,9 +910,11 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ async run() { - return { error }; + return { error, state: {} }; }, }), }, @@ -882,10 +939,12 @@ describe('TaskManagerRunner', () => { }, definitions: { bar: { + type: 'bar', + title: 'bar', getRetry: () => false, createTaskRunner: () => ({ async run() { - return { error }; + return { error, state: {} }; }, }), }, @@ -904,7 +963,7 @@ describe('TaskManagerRunner', () => { interface TestOpts { instance?: Partial; - definitions?: unknown; + definitions?: Record; onTaskEvent?: (event: TaskEvent) => void; } @@ -942,19 +1001,25 @@ describe('TaskManagerRunner', () => { store.update.returns(instance); + const definitions = new TaskTypeDictionary(logger); + definitions.registerTaskDefinitions({ + testbar: { + type: 'bar', + title: 'Bar!', + createTaskRunner, + }, + }); + if (opts.definitions) { + definitions.registerTaskDefinitions(opts.definitions); + } + const runner = new TaskManagerRunner({ beforeRun: (context) => Promise.resolve(context), beforeMarkRunning: (context) => Promise.resolve(context), logger, store, instance, - definitions: Object.assign(opts.definitions || {}, { - testbar: { - type: 'bar', - title: 'Bar!', - createTaskRunner, - }, - }) as TaskDictionary, + definitions, onTaskEvent: opts.onTaskEvent, }); @@ -972,8 +1037,10 @@ describe('TaskManagerRunner', () => { const { runner, logger } = testOpts({ definitions: { bar: { + type: 'bar', + title: 'bar', createTaskRunner: () => ({ - run: async () => result, + run: async () => result as RunResult, }), }, }, From 177b6b9467e0e1c0ed653aa82e7ae0926ed9bf29 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 15 Oct 2020 13:38:38 +0100 Subject: [PATCH 44/67] extract middleware and store from taskManager --- .../managed_configuration.test.ts | 73 ++++++----- .../lib/create_managed_configuration.ts | 2 +- .../task_manager/server/lib/middleware.ts | 8 ++ x-pack/plugins/task_manager/server/plugin.ts | 122 +++++++++++++----- .../task_manager/server/task_manager.test.ts | 91 +++---------- .../task_manager/server/task_manager.ts | 79 +++--------- .../task_manager/server/task_store.mock.ts | 12 +- 7 files changed, 181 insertions(+), 206 deletions(-) diff --git a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts index 9f80233a8b91..2deb2a55a0c3 100644 --- a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts @@ -5,56 +5,49 @@ */ import sinon from 'sinon'; -import { mockLogger } from '../test_utils'; -import { TaskManager } from '../task_manager'; -import { TaskTypeDictionary } from '../task_type_dictionary'; import { savedObjectsRepositoryMock } from '../../../../../src/core/server/mocks'; -import { - SavedObjectsSerializer, - SavedObjectTypeRegistry, - SavedObjectsErrorHelpers, -} from '../../../../../src/core/server'; +import { SavedObjectsErrorHelpers, PluginInitializerContext } from '../../../../../src/core/server'; import { ADJUST_THROUGHPUT_INTERVAL } from '../lib/create_managed_configuration'; +import { TaskManagerPlugin, TaskManagerStartContract } from '../plugin'; +import { coreMock } from '../../../../../src/core/server/mocks'; +import { TaskManagerConfig } from '../config'; describe('managed configuration', () => { - let taskManager: TaskManager; + let taskManagerPlugin: TaskManagerPlugin; + let pluginInitializerContext: PluginInitializerContext; + let taskManagerStart: TaskManagerStartContract; + let clock: sinon.SinonFakeTimers; - const callAsInternalUser = jest.fn(); - const logger = mockLogger(); - const serializer = new SavedObjectsSerializer(new SavedObjectTypeRegistry()); const savedObjectsClient = savedObjectsRepositoryMock.create(); - const config = { - enabled: true, - max_workers: 10, - index: 'foo', - max_attempts: 9, - poll_interval: 3000, - max_poll_inactivity_cycles: 10, - request_capacity: 1000, - }; - - beforeEach(() => { + + beforeEach(async () => { jest.resetAllMocks(); - callAsInternalUser.mockResolvedValue({ total: 0, updated: 0, version_conflicts: 0 }); clock = sinon.useFakeTimers(); - const definitions = new TaskTypeDictionary(logger); - taskManager = new TaskManager({ - config, - logger, - serializer, - callAsInternalUser, - taskManagerId: 'some-uuid', - savedObjectsRepository: savedObjectsClient, - definitions, + + pluginInitializerContext = coreMock.createPluginInitializerContext({ + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 3000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, }); - definitions.registerTaskDefinitions({ + + taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); + (await taskManagerPlugin.setup(coreMock.createSetup())).registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', createTaskRunner: jest.fn(), }, }); - taskManager.start(); + + const coreStart = coreMock.createStart(); + coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient); + + taskManagerStart = await taskManagerPlugin.start(coreStart); + // force rxjs timers to fire when they are scheduled for setTimeout(0) as the // sinon fake timers cause them to stall clock.tick(0); @@ -66,15 +59,18 @@ describe('managed configuration', () => { savedObjectsClient.create.mockRejectedValueOnce( SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b') ); + // Cause "too many requests" error to be thrown await expect( - taskManager.schedule({ + taskManagerStart.schedule({ taskType: 'foo', state: {}, params: {}, }) ).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`); clock.tick(ADJUST_THROUGHPUT_INTERVAL); + + const logger = pluginInitializerContext.logger.get('taskManager'); expect(logger.warn).toHaveBeenCalledWith( 'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).' ); @@ -88,15 +84,18 @@ describe('managed configuration', () => { savedObjectsClient.create.mockRejectedValueOnce( SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b') ); + // Cause "too many requests" error to be thrown await expect( - taskManager.schedule({ + taskManagerStart.schedule({ taskType: 'foo', state: {}, params: {}, }) ).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`); clock.tick(ADJUST_THROUGHPUT_INTERVAL); + + const logger = pluginInitializerContext.logger.get('taskManager'); expect(logger.warn).toHaveBeenCalledWith( 'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).' ); diff --git a/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts b/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts index 3dc5fd50d3ca..a8cb1b178d24 100644 --- a/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts +++ b/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts @@ -31,7 +31,7 @@ interface ManagedConfigurationOpts { errors$: Observable; } -interface ManagedConfiguration { +export interface ManagedConfiguration { maxWorkersConfiguration$: Observable; pollIntervalConfiguration$: Observable; } diff --git a/x-pack/plugins/task_manager/server/lib/middleware.ts b/x-pack/plugins/task_manager/server/lib/middleware.ts index d367c8ca56c0..e9662b50048c 100644 --- a/x-pack/plugins/task_manager/server/lib/middleware.ts +++ b/x-pack/plugins/task_manager/server/lib/middleware.ts @@ -52,3 +52,11 @@ export function addMiddlewareToChain(prevMiddleware: Middleware, middleware: Mid beforeMarkRunning, }; } + +export function createInitialMiddleware(): Middleware { + return { + beforeSave: async (saveOpts: BeforeSaveMiddlewareParams) => saveOpts, + beforeRun: async (runOpts: RunContext) => runOpts, + beforeMarkRunning: async (runOpts: RunContext) => runOpts, + }; +} diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index e2dcf462387c..85600215be89 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -4,17 +4,20 @@ * you may not use this file except in compliance with the Elastic License. */ import { PluginInitializerContext, Plugin, CoreSetup, Logger, CoreStart } from 'src/core/server'; -import { Subject } from 'rxjs'; import { first } from 'rxjs/operators'; -import { TaskDefinition } from './task'; +import { ElasticJs, TaskDefinition } from './task'; import { TaskManager } from './task_manager'; import { TaskManagerConfig } from './config'; -import { Middleware } from './lib/middleware'; +import { createInitialMiddleware, addMiddlewareToChain, Middleware } from './lib/middleware'; import { setupSavedObjects } from './saved_objects'; import { TaskTypeDictionary } from './task_type_dictionary'; +import { TaskStore } from './task_store'; +import { createManagedConfiguration } from './lib/create_managed_configuration'; -export type TaskManagerSetupContract = Pick & - Pick; +export type TaskManagerSetupContract = { addMiddleware: (middleware: Middleware) => void } & Pick< + TaskTypeDictionary, + 'registerTaskDefinitions' +>; export type TaskManagerStartContract = Pick< TaskManager, @@ -23,12 +26,14 @@ export type TaskManagerStartContract = Pick< export class TaskManagerPlugin implements Plugin { - private legacyTaskManager$: Subject = new Subject(); - private taskManager: Promise = this.legacyTaskManager$.pipe(first()).toPromise(); + private pluginLifecycle: 'start' | 'setup' | 'init' | 'stop' = 'init'; + + private taskManager?: TaskManager; private taskManagerId?: string; private config?: TaskManagerConfig; private logger: Logger; private definitions: TaskTypeDictionary; + private middleware: Middleware = createInitialMiddleware(); constructor(private readonly initContext: PluginInitializerContext) { this.initContext = initContext; @@ -36,60 +41,113 @@ export class TaskManagerPlugin this.definitions = new TaskTypeDictionary(this.logger); } - public async setup(core: CoreSetup): Promise { + public async setup({ savedObjects }: CoreSetup): Promise { + this.pluginLifecycle = 'setup'; this.config = await this.initContext.config .create() .pipe(first()) .toPromise(); - setupSavedObjects(core.savedObjects, this.config); + setupSavedObjects(savedObjects, this.config); this.taskManagerId = this.initContext.env.instanceUuid; return { + /** + * Adds middleware to the task manager, such as adding security layers, loggers, etc. + * + * @param {Middleware} middleware - The middlware being added. + */ addMiddleware: (middleware: Middleware) => { - this.taskManager.then((tm) => tm.addMiddleware(middleware)); + this.ensurePluginLifecycle('setup', 'add Middleware'); + this.middleware = addMiddlewareToChain(this.middleware, middleware); }, registerTaskDefinitions: (taskDefinition: Record) => { + this.ensurePluginLifecycle('setup', 'register task definitions'); this.definitions.registerTaskDefinitions(taskDefinition); }, }; } public start({ savedObjects, elasticsearch }: CoreStart): TaskManagerStartContract { + this.pluginLifecycle = 'start'; const savedObjectsRepository = savedObjects.createInternalRepository(['task']); - this.legacyTaskManager$.next( - new TaskManager({ - taskManagerId: this.taskManagerId!, - config: this.config!, - savedObjectsRepository, - definitions: this.definitions, - serializer: savedObjects.createSerializer(), - callAsInternalUser: elasticsearch.legacy.client.callAsInternalUser, - logger: this.logger, - }) - ); - this.legacyTaskManager$.complete(); + const taskStore = new TaskStore({ + serializer: savedObjects.createSerializer(), + savedObjectsRepository, + callCluster: (elasticsearch.legacy.client.callAsInternalUser as unknown) as ElasticJs, + index: this.config!.index, + maxAttempts: this.config!.max_attempts, + definitions: this.definitions, + taskManagerId: `kibana:${this.taskManagerId!}`, + }); + + const { maxWorkersConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({ + logger: this.logger, + errors$: taskStore.errors$, + startingMaxWorkers: this.config!.max_workers, + startingPollInterval: this.config!.poll_interval, + }); + + const taskManager = new TaskManager({ + taskManagerId: this.taskManagerId!, + config: this.config!, + definitions: this.definitions, + logger: this.logger, + taskStore, + middleware: this.middleware, + maxWorkersConfiguration$, + pollIntervalConfiguration$, + }); + this.taskManager = taskManager; // we need to "drain" any calls made to the seup API // before `starting` TaskManager. This is a legacy relic // of the old API that should be resolved once we split // Task manager into two services, setup and start, instead // of the single instance of TaskManager - this.taskManager.then((tm) => tm.start()); + taskManager.start(); return { - fetch: (...args) => this.taskManager.then((tm) => tm.fetch(...args)), - get: (...args) => this.taskManager.then((tm) => tm.get(...args)), - remove: (...args) => this.taskManager.then((tm) => tm.remove(...args)), - schedule: (...args) => this.taskManager.then((tm) => tm.schedule(...args)), - runNow: (...args) => this.taskManager.then((tm) => tm.runNow(...args)), - ensureScheduled: (...args) => this.taskManager.then((tm) => tm.ensureScheduled(...args)), + fetch: (...args) => { + this.ensurePluginLifecycle('start', 'fetch tasks'); + return taskManager.fetch(...args); + }, + get: (...args) => { + this.ensurePluginLifecycle('start', 'get tasks'); + return taskManager.get(...args); + }, + remove: (...args) => { + this.ensurePluginLifecycle('start', 'remove tasks'); + return taskManager.remove(...args); + }, + schedule: (...args) => { + this.ensurePluginLifecycle('start', 'schedule tasks'); + return taskManager.schedule(...args); + }, + ensureScheduled: (...args) => { + this.ensurePluginLifecycle('start', 'schedule tasks'); + return taskManager.ensureScheduled(...args); + }, + runNow: (...args) => { + this.ensurePluginLifecycle('start', 'run tasks'); + return taskManager.runNow(...args); + }, }; } + public stop() { - this.taskManager.then((tm) => { - tm.stop(); - }); + this.pluginLifecycle = 'stop'; + if (this.taskManager) { + this.taskManager.stop(); + } + } + + private ensurePluginLifecycle(lifecycle: 'start' | 'setup' | 'init' | 'stop', operation: string) { + if (this.pluginLifecycle !== lifecycle) { + throw new Error( + `Cannot ${operation} outside of the "${lifecycle}" lifecycle stage (Task Manager is in "${this.pluginLifecycle})"` + ); + } } } diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index bb8b6f3e445a..29a6fc6612b0 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -6,7 +6,7 @@ import _ from 'lodash'; import sinon from 'sinon'; -import { Subject } from 'rxjs'; +import { of, Subject } from 'rxjs'; import { none } from 'fp-ts/lib/Option'; import { @@ -21,16 +21,12 @@ import { awaitTaskRunResult, TaskLifecycleEvent, } from './task_manager'; -import { savedObjectsRepositoryMock } from '../../../../src/core/server/mocks'; -import { SavedObjectsSerializer, SavedObjectTypeRegistry } from '../../../../src/core/server'; import { mockLogger } from './test_utils'; import { asErr, asOk } from './lib/result_type'; import { ConcreteTaskInstance, TaskLifecycleResult, TaskStatus } from './task'; -import { Middleware } from './lib/middleware'; +import { createInitialMiddleware } from './lib/middleware'; import { TaskTypeDictionary } from './task_type_dictionary'; - -const savedObjectsClient = savedObjectsRepositoryMock.create(); -const serializer = new SavedObjectsSerializer(new SavedObjectTypeRegistry()); +import { taskStoreMock } from './task_store.mock'; describe('TaskManager', () => { let clock: sinon.SinonFakeTimers; @@ -46,14 +42,16 @@ describe('TaskManager', () => { }; const taskManagerLogger = mockLogger(); + const mockTaskStore = taskStoreMock.create({}); const taskManagerOpts = { config, - savedObjectsRepository: savedObjectsClient, - serializer, - callAsInternalUser: jest.fn(), + taskStore: mockTaskStore, logger: mockLogger(), taskManagerId: 'some-uuid', definitions: new TaskTypeDictionary(taskManagerLogger), + middleware: createInitialMiddleware(), + maxWorkersConfiguration$: of(100), + pollIntervalConfiguration$: of(100), }; beforeEach(() => { @@ -88,17 +86,11 @@ describe('TaskManager', () => { params: {}, state: {}, }; - savedObjectsClient.create.mockResolvedValueOnce({ - id: '1', - type: 'task', - attributes: {}, - references: [], - }); const promise = client.schedule(task); client.start(); await promise; - expect(savedObjectsClient.create).toHaveBeenCalled(); + expect(mockTaskStore.schedule).toHaveBeenCalled(); }); test('allows scheduling tasks after starting', async () => { @@ -116,14 +108,8 @@ describe('TaskManager', () => { params: {}, state: {}, }; - savedObjectsClient.create.mockResolvedValueOnce({ - id: '1', - type: 'task', - attributes: {}, - references: [], - }); await client.schedule(task); - expect(savedObjectsClient.create).toHaveBeenCalled(); + expect(mockTaskStore.schedule).toHaveBeenCalled(); }); test('allows scheduling existing tasks that may have already been scheduled', async () => { @@ -135,7 +121,7 @@ describe('TaskManager', () => { createTaskRunner: jest.fn(), }, }); - savedObjectsClient.create.mockRejectedValueOnce({ + mockTaskStore.schedule.mockRejectedValueOnce({ statusCode: 409, }); @@ -160,7 +146,7 @@ describe('TaskManager', () => { createTaskRunner: jest.fn(), }, }); - savedObjectsClient.create.mockRejectedValueOnce({ + mockTaskStore.schedule.mockRejectedValueOnce({ statusCode: 500, }); @@ -187,7 +173,7 @@ describe('TaskManager', () => { createTaskRunner: jest.fn(), }, }); - savedObjectsClient.create.mockRejectedValueOnce({ + mockTaskStore.schedule.mockRejectedValueOnce({ statusCode: 409, }); @@ -207,74 +193,33 @@ describe('TaskManager', () => { test('allows and queues removing tasks before starting', async () => { const client = new TaskManager(taskManagerOpts); - savedObjectsClient.delete.mockResolvedValueOnce({}); const promise = client.remove('1'); client.start(); await promise; - expect(savedObjectsClient.delete).toHaveBeenCalled(); + expect(mockTaskStore.remove).toHaveBeenCalled(); }); test('allows removing tasks after starting', async () => { const client = new TaskManager(taskManagerOpts); client.start(); - savedObjectsClient.delete.mockResolvedValueOnce({}); await client.remove('1'); - expect(savedObjectsClient.delete).toHaveBeenCalled(); + expect(mockTaskStore.remove).toHaveBeenCalled(); }); test('allows and queues fetching tasks before starting', async () => { const client = new TaskManager(taskManagerOpts); - taskManagerOpts.callAsInternalUser.mockResolvedValue({ - hits: { - total: { - value: 0, - }, - hits: [], - }, - }); const promise = client.fetch({}); + expect(mockTaskStore.fetch).not.toHaveBeenCalled(); client.start(); await promise; - expect(taskManagerOpts.callAsInternalUser).toHaveBeenCalled(); + expect(mockTaskStore.fetch).toHaveBeenCalled(); }); test('allows fetching tasks after starting', async () => { const client = new TaskManager(taskManagerOpts); client.start(); - taskManagerOpts.callAsInternalUser.mockResolvedValue({ - hits: { - total: { - value: 0, - }, - hits: [], - }, - }); await client.fetch({}); - expect(taskManagerOpts.callAsInternalUser).toHaveBeenCalled(); - }); - - test('allows middleware registration before starting', () => { - const client = new TaskManager(taskManagerOpts); - const middleware: Middleware = { - beforeSave: jest.fn(async (saveOpts) => saveOpts), - beforeRun: jest.fn(async (runOpts) => runOpts), - beforeMarkRunning: jest.fn(async (runOpts) => runOpts), - }; - expect(() => client.addMiddleware(middleware)).not.toThrow(); - }); - - test('disallows middleware registration after starting', async () => { - const client = new TaskManager(taskManagerOpts); - const middleware: Middleware = { - beforeSave: jest.fn(async (saveOpts) => saveOpts), - beforeRun: jest.fn(async (runOpts) => runOpts), - beforeMarkRunning: jest.fn(async (runOpts) => runOpts), - }; - - client.start(); - expect(() => client.addMiddleware(middleware)).toThrow( - /Cannot add middleware after the task manager is initialized/i - ); + expect(mockTaskStore.fetch).toHaveBeenCalled(); }); describe('runNow', () => { diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index 82c672df2466..3d35c5bdf40b 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -11,13 +11,8 @@ import { performance } from 'perf_hooks'; import { pipe } from 'fp-ts/lib/pipeable'; import { Option, some, map as mapOptional, getOrElse } from 'fp-ts/lib/Option'; -import { - SavedObjectsSerializer, - ILegacyScopedClusterClient, - ISavedObjectsRepository, -} from '../../../../src/core/server'; import { Result, asOk, asErr, either, map, mapErr, promiseResult } from './lib/result_type'; -import { createManagedConfiguration } from './lib/create_managed_configuration'; +import { ManagedConfiguration } from './lib/create_managed_configuration'; import { TaskManagerConfig } from './config'; import { Logger } from './types'; @@ -32,17 +27,15 @@ import { asTaskRunRequestEvent, } from './task_events'; import { fillPool, FillPoolResult } from './lib/fill_pool'; -import { addMiddlewareToChain, BeforeSaveMiddlewareParams, Middleware } from './lib/middleware'; +import { Middleware } from './lib/middleware'; import { intervalFromNow } from './lib/intervals'; import { ConcreteTaskInstance, - RunContext, TaskInstanceWithId, TaskInstanceWithDeprecatedFields, TaskLifecycle, TaskLifecycleResult, TaskStatus, - ElasticJs, } from './task'; import { createTaskPoller, @@ -66,15 +59,14 @@ import { TaskTypeDictionary } from './task_type_dictionary'; const VERSION_CONFLICT_STATUS = 409; -export interface TaskManagerOpts { +export type TaskManagerOpts = { logger: Logger; definitions: TaskTypeDictionary; + taskStore: TaskStore; config: TaskManagerConfig; - callAsInternalUser: ILegacyScopedClusterClient['callAsInternalUser']; - savedObjectsRepository: ISavedObjectsRepository; - serializer: SavedObjectsSerializer; taskManagerId: string; -} + middleware: Middleware; +} & ManagedConfiguration; interface RunNowResult { id: string; @@ -113,11 +105,7 @@ export class TaskManager { private pollingSubscription: Subscription = Subscription.EMPTY; private startQueue: Array<() => void> = []; - private middleware = { - beforeSave: async (saveOpts: BeforeSaveMiddlewareParams) => saveOpts, - beforeRun: async (runOpts: RunContext) => runOpts, - beforeMarkRunning: async (runOpts: RunContext) => runOpts, - }; + private middleware: Middleware; /** * Initializes the task manager, preventing any further addition of middleware, @@ -125,9 +113,16 @@ export class TaskManager { * mechanism. */ constructor(opts: TaskManagerOpts) { - this.logger = opts.logger; + const { + logger, + taskManagerId, + middleware, + maxWorkersConfiguration$, + pollIntervalConfiguration$, + } = opts; + this.logger = logger; + this.middleware = middleware; - const { taskManagerId } = opts; if (!taskManagerId) { this.logger.error( `TaskManager is unable to start as there the Kibana UUID is invalid (value of the "server.uuid" configuration is ${taskManagerId})` @@ -138,25 +133,10 @@ export class TaskManager { } this.definitions = opts.definitions; - this.store = new TaskStore({ - serializer: opts.serializer, - savedObjectsRepository: opts.savedObjectsRepository, - callCluster: (opts.callAsInternalUser as unknown) as ElasticJs, - index: opts.config.index, - maxAttempts: opts.config.max_attempts, - definitions: this.definitions, - taskManagerId: `kibana:${taskManagerId}`, - }); + this.store = opts.taskStore; // pipe store events into the TaskManager's event stream this.store.events.subscribe((event) => this.events$.next(event)); - const { maxWorkersConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({ - logger: this.logger, - errors$: this.store.errors$, - startingMaxWorkers: opts.config.max_workers, - startingPollInterval: opts.config.poll_interval, - }); - this.bufferedStore = new BufferedTaskStore(this.store, { bufferMaxOperations: opts.config.max_workers, logger: this.logger, @@ -284,17 +264,6 @@ export class TaskManager { } } - /** - * Adds middleware to the task manager, such as adding security layers, loggers, etc. - * - * @param {Middleware} middleware - The middlware being added. - */ - public addMiddleware(middleware: Middleware) { - this.assertUninitialized('add middleware'); - const prevMiddleWare = this.middleware; - this.middleware = addMiddlewareToChain(prevMiddleWare, middleware); - } - /** * Schedules a task. * @@ -382,20 +351,6 @@ export class TaskManager { await this.waitUntilStarted(); return this.store.remove(id); } - - /** - * Ensures task manager IS NOT already initialized - * - * @param {string} message shown if task manager is already initialized - * @returns void - */ - private assertUninitialized(message: string, context?: string) { - if (this.isStarted) { - throw new Error( - `${context ? `[${context}] ` : ''}Cannot ${message} after the task manager is initialized` - ); - } - } } export async function claimAvailableTasks( diff --git a/x-pack/plugins/task_manager/server/task_store.mock.ts b/x-pack/plugins/task_manager/server/task_store.mock.ts index 86db695bc5e2..9b82a3e3ee7a 100644 --- a/x-pack/plugins/task_manager/server/task_store.mock.ts +++ b/x-pack/plugins/task_manager/server/task_store.mock.ts @@ -4,15 +4,24 @@ * you may not use this file except in compliance with the Elastic License. */ +import { Observable, Subject } from 'rxjs'; +import { TaskClaim } from './task_events'; + import { TaskStore } from './task_store'; interface TaskStoreOptions { maxAttempts?: number; index?: string; taskManagerId?: string; + events?: Observable; } export const taskStoreMock = { - create({ maxAttempts = 0, index = '', taskManagerId = '' }: TaskStoreOptions) { + create({ + maxAttempts = 0, + index = '', + taskManagerId = '', + events = new Subject(), + }: TaskStoreOptions) { const mocked = ({ update: jest.fn(), remove: jest.fn(), @@ -25,6 +34,7 @@ export const taskStoreMock = { maxAttempts, index, taskManagerId, + events, } as unknown) as jest.Mocked; return mocked; }, From 3d2cde8d671ccdf214d01cc29b89a63e0dd49ef1 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 15 Oct 2020 13:50:52 +0100 Subject: [PATCH 45/67] extracted store APIs from TM --- x-pack/plugins/task_manager/server/plugin.ts | 43 ++++++++++----- .../task_manager/server/task_manager.test.ts | 54 +------------------ .../task_manager/server/task_manager.ts | 48 ----------------- 3 files changed, 30 insertions(+), 115 deletions(-) diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 85600215be89..d43a991e5768 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -11,7 +11,7 @@ import { TaskManagerConfig } from './config'; import { createInitialMiddleware, addMiddlewareToChain, Middleware } from './lib/middleware'; import { setupSavedObjects } from './saved_objects'; import { TaskTypeDictionary } from './task_type_dictionary'; -import { TaskStore } from './task_store'; +import { FetchResult, SearchOpts, TaskStore } from './task_store'; import { createManagedConfiguration } from './lib/create_managed_configuration'; export type TaskManagerSetupContract = { addMiddleware: (middleware: Middleware) => void } & Pick< @@ -21,8 +21,9 @@ export type TaskManagerSetupContract = { addMiddleware: (middleware: Middleware) export type TaskManagerStartContract = Pick< TaskManager, - 'fetch' | 'get' | 'remove' | 'schedule' | 'runNow' | 'ensureScheduled' ->; + 'schedule' | 'runNow' | 'ensureScheduled' +> & + Pick; export class TaskManagerPlugin implements Plugin { @@ -101,25 +102,39 @@ export class TaskManagerPlugin }); this.taskManager = taskManager; - // we need to "drain" any calls made to the seup API - // before `starting` TaskManager. This is a legacy relic - // of the old API that should be resolved once we split - // Task manager into two services, setup and start, instead - // of the single instance of TaskManager + // start polling for work taskManager.start(); return { - fetch: (...args) => { + /** + * Fetches a list of scheduled tasks. + * + * @param opts - The query options used to filter tasks + * @returns {Promise} + */ + fetch: (opts: SearchOpts): Promise => { this.ensurePluginLifecycle('start', 'fetch tasks'); - return taskManager.fetch(...args); + return taskStore.fetch(opts); }, - get: (...args) => { + /** + * Get the current state of a specified task. + * + * @param {string} id + * @returns {Promise} + */ + get: (id: string) => { this.ensurePluginLifecycle('start', 'get tasks'); - return taskManager.get(...args); + return taskStore.get(id); }, - remove: (...args) => { + /** + * Removes the specified task from the index. + * + * @param {string} id + * @returns {Promise} + */ + remove: (id: string) => { this.ensurePluginLifecycle('start', 'remove tasks'); - return taskManager.remove(...args); + return taskStore.remove(id); }, schedule: (...args) => { this.ensurePluginLifecycle('start', 'schedule tasks'); diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index 29a6fc6612b0..6ddd8b85e717 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -72,28 +72,7 @@ describe('TaskManager', () => { ); }); - test('allows and queues scheduling tasks before starting', async () => { - const client = new TaskManager(taskManagerOpts); - taskManagerOpts.definitions.registerTaskDefinitions({ - foo: { - type: 'foo', - title: 'Foo', - createTaskRunner: jest.fn(), - }, - }); - const task = { - taskType: 'foo', - params: {}, - state: {}, - }; - const promise = client.schedule(task); - client.start(); - await promise; - - expect(mockTaskStore.schedule).toHaveBeenCalled(); - }); - - test('allows scheduling tasks after starting', async () => { + test('allows scheduling tasks', async () => { const client = new TaskManager(taskManagerOpts); taskManagerOpts.definitions.registerTaskDefinitions({ foo: { @@ -191,37 +170,6 @@ describe('TaskManager', () => { }); }); - test('allows and queues removing tasks before starting', async () => { - const client = new TaskManager(taskManagerOpts); - const promise = client.remove('1'); - client.start(); - await promise; - expect(mockTaskStore.remove).toHaveBeenCalled(); - }); - - test('allows removing tasks after starting', async () => { - const client = new TaskManager(taskManagerOpts); - client.start(); - await client.remove('1'); - expect(mockTaskStore.remove).toHaveBeenCalled(); - }); - - test('allows and queues fetching tasks before starting', async () => { - const client = new TaskManager(taskManagerOpts); - const promise = client.fetch({}); - expect(mockTaskStore.fetch).not.toHaveBeenCalled(); - client.start(); - await promise; - expect(mockTaskStore.fetch).toHaveBeenCalled(); - }); - - test('allows fetching tasks after starting', async () => { - const client = new TaskManager(taskManagerOpts); - client.start(); - await client.fetch({}); - expect(mockTaskStore.fetch).toHaveBeenCalled(); - }); - describe('runNow', () => { describe('awaitTaskRunResult', () => { test('resolves when the task run succeeds', () => { diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index 3d35c5bdf40b..284c99493e51 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -104,7 +104,6 @@ export class TaskManager { // our subscription to the poller private pollingSubscription: Subscription = Subscription.EMPTY; - private startQueue: Array<() => void> = []; private middleware: Middleware; /** @@ -228,10 +227,6 @@ export class TaskManager { */ public start() { if (!this.isStarted) { - // Some calls are waiting until task manager is started - this.startQueue.forEach((fn) => fn()); - this.startQueue = []; - this.pollingSubscription = this.poller$.subscribe( mapErr((error: PollingError) => { if (error.type === PollingErrorType.RequestCapacityReached) { @@ -246,14 +241,6 @@ export class TaskManager { } } - private async waitUntilStarted() { - if (!this.isStarted) { - await new Promise((resolve) => { - this.startQueue.push(resolve); - }); - } - } - /** * Stops the task manager and cancels running tasks. */ @@ -274,7 +261,6 @@ export class TaskManager { taskInstance: TaskInstanceWithDeprecatedFields, options?: Record ): Promise { - await this.waitUntilStarted(); const { taskInstance: modifiedTask } = await this.middleware.beforeSave({ ...options, taskInstance: ensureDeprecatedFieldsAreCorrected(taskInstance, this.logger), @@ -289,7 +275,6 @@ export class TaskManager { * @returns {Promise} */ public async runNow(taskId: string): Promise { - await this.waitUntilStarted(); return new Promise(async (resolve, reject) => { awaitTaskRunResult(taskId, this.events$, this.store.getLifecycle.bind(this.store)) .then(resolve) @@ -318,39 +303,6 @@ export class TaskManager { throw err; } } - - /** - * Fetches a list of scheduled tasks. - * - * @param opts - The query options used to filter tasks - * @returns {Promise} - */ - public async fetch(opts: SearchOpts): Promise { - await this.waitUntilStarted(); - return this.store.fetch(opts); - } - - /** - * Get the current state of a specified task. - * - * @param {string} id - * @returns {Promise} - */ - public async get(id: string): Promise { - await this.waitUntilStarted(); - return this.store.get(id); - } - - /** - * Removes the specified task from the index. - * - * @param {string} id - * @returns {Promise} - */ - public async remove(id: string): Promise { - await this.waitUntilStarted(); - return this.store.remove(id); - } } export async function claimAvailableTasks( From afcdfd64556a6e6df08921638b2bcbe041004bad Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 15 Oct 2020 16:15:39 +0100 Subject: [PATCH 46/67] removed redundant lifecycle checks --- x-pack/plugins/task_manager/server/plugin.ts | 53 ++++++------------- .../task_manager/server/task_manager.ts | 8 +-- 2 files changed, 18 insertions(+), 43 deletions(-) diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index d43a991e5768..b05334a7c618 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -27,8 +27,6 @@ export type TaskManagerStartContract = Pick< export class TaskManagerPlugin implements Plugin { - private pluginLifecycle: 'start' | 'setup' | 'init' | 'stop' = 'init'; - private taskManager?: TaskManager; private taskManagerId?: string; private config?: TaskManagerConfig; @@ -43,7 +41,6 @@ export class TaskManagerPlugin } public async setup({ savedObjects }: CoreSetup): Promise { - this.pluginLifecycle = 'setup'; this.config = await this.initContext.config .create() .pipe(first()) @@ -59,18 +56,17 @@ export class TaskManagerPlugin * @param {Middleware} middleware - The middlware being added. */ addMiddleware: (middleware: Middleware) => { - this.ensurePluginLifecycle('setup', 'add Middleware'); + this.assertStillInSetup('add Middleware'); this.middleware = addMiddlewareToChain(this.middleware, middleware); }, registerTaskDefinitions: (taskDefinition: Record) => { - this.ensurePluginLifecycle('setup', 'register task definitions'); + this.assertStillInSetup('register task definitions'); this.definitions.registerTaskDefinitions(taskDefinition); }, }; } public start({ savedObjects, elasticsearch }: CoreStart): TaskManagerStartContract { - this.pluginLifecycle = 'start'; const savedObjectsRepository = savedObjects.createInternalRepository(['task']); const taskStore = new TaskStore({ @@ -112,57 +108,42 @@ export class TaskManagerPlugin * @param opts - The query options used to filter tasks * @returns {Promise} */ - fetch: (opts: SearchOpts): Promise => { - this.ensurePluginLifecycle('start', 'fetch tasks'); - return taskStore.fetch(opts); - }, + fetch: (opts: SearchOpts): Promise => taskStore.fetch(opts), /** * Get the current state of a specified task. * * @param {string} id * @returns {Promise} */ - get: (id: string) => { - this.ensurePluginLifecycle('start', 'get tasks'); - return taskStore.get(id); - }, + get: (id: string) => taskStore.get(id), /** * Removes the specified task from the index. * * @param {string} id * @returns {Promise} */ - remove: (id: string) => { - this.ensurePluginLifecycle('start', 'remove tasks'); - return taskStore.remove(id); - }, - schedule: (...args) => { - this.ensurePluginLifecycle('start', 'schedule tasks'); - return taskManager.schedule(...args); - }, - ensureScheduled: (...args) => { - this.ensurePluginLifecycle('start', 'schedule tasks'); - return taskManager.ensureScheduled(...args); - }, - runNow: (...args) => { - this.ensurePluginLifecycle('start', 'run tasks'); - return taskManager.runNow(...args); - }, + remove: (id: string) => taskStore.remove(id), + schedule: (...args) => taskManager.schedule(...args), + ensureScheduled: (...args) => taskManager.ensureScheduled(...args), + runNow: (...args) => taskManager.runNow(...args), }; } public stop() { - this.pluginLifecycle = 'stop'; if (this.taskManager) { this.taskManager.stop(); } } - private ensurePluginLifecycle(lifecycle: 'start' | 'setup' | 'init' | 'stop', operation: string) { - if (this.pluginLifecycle !== lifecycle) { - throw new Error( - `Cannot ${operation} outside of the "${lifecycle}" lifecycle stage (Task Manager is in "${this.pluginLifecycle})"` - ); + /** + * Ensures task manager hasn't started + * + * @param {string} the name of the operation being executed + * @returns void + */ + private assertStillInSetup(operation: string) { + if (this.taskManager?.isStarted) { + throw new Error(`Cannot ${operation} after the task manager has started`); } } } diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index 284c99493e51..f8572cc2dbf7 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -45,13 +45,7 @@ import { } from './polling'; import { TaskPool } from './task_pool'; import { TaskManagerRunner, TaskRunner } from './task_runner'; -import { - FetchResult, - TaskStore, - OwnershipClaimingOpts, - ClaimOwnershipResult, - SearchOpts, -} from './task_store'; +import { TaskStore, OwnershipClaimingOpts, ClaimOwnershipResult } from './task_store'; import { identifyEsError } from './lib/identify_es_error'; import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; import { BufferedTaskStore } from './buffered_task_store'; From bbd445ae51201cc3d090d4e71361da9c04022989 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 15 Oct 2020 17:37:56 +0100 Subject: [PATCH 47/67] extract scheduling from task_manager --- .../server/action_type_registry.test.ts | 4 +- .../actions/server/actions_client.test.ts | 4 +- .../server/builtin_action_types/index.test.ts | 4 +- .../server/create_execute_function.test.ts | 4 +- .../usage/actions_usage_collector.test.ts | 4 +- .../alerts/server/alert_type_registry.test.ts | 4 +- .../server/alerts_client/tests/create.test.ts | 4 +- .../server/alerts_client/tests/delete.test.ts | 4 +- .../alerts_client/tests/disable.test.ts | 4 +- .../server/alerts_client/tests/enable.test.ts | 4 +- .../server/alerts_client/tests/find.test.ts | 4 +- .../server/alerts_client/tests/get.test.ts | 4 +- .../tests/get_alert_instance_summary.test.ts | 4 +- .../tests/get_alert_state.test.ts | 4 +- .../tests/list_alert_types.test.ts | 4 +- .../alerts_client/tests/mute_all.test.ts | 4 +- .../alerts_client/tests/mute_instance.test.ts | 4 +- .../alerts_client/tests/unmute_all.test.ts | 4 +- .../tests/unmute_instance.test.ts | 4 +- .../server/alerts_client/tests/update.test.ts | 4 +- .../tests/update_api_key.test.ts | 4 +- .../alerts_client_conflict_retries.test.ts | 4 +- .../server/alerts_client_factory.test.ts | 4 +- .../usage/alerts_usage_collector.test.ts | 4 +- .../task_manager/server/plugin.test.ts | 32 ++ x-pack/plugins/task_manager/server/plugin.ts | 26 +- .../task_manager/server/task_manager.mock.ts | 44 +-- .../task_manager/server/task_manager.test.ts | 299 +--------------- .../task_manager/server/task_manager.ts | 174 +--------- .../server/task_scheduling.mock.ts | 19 ++ .../server/task_scheduling.test.ts | 321 ++++++++++++++++++ .../task_manager/server/task_scheduling.ts | 177 ++++++++++ 32 files changed, 660 insertions(+), 528 deletions(-) create mode 100644 x-pack/plugins/task_manager/server/plugin.test.ts create mode 100644 x-pack/plugins/task_manager/server/task_scheduling.mock.ts create mode 100644 x-pack/plugins/task_manager/server/task_scheduling.test.ts create mode 100644 x-pack/plugins/task_manager/server/task_scheduling.ts diff --git a/x-pack/plugins/actions/server/action_type_registry.test.ts b/x-pack/plugins/actions/server/action_type_registry.test.ts index b25e33400df5..52cf6b6473f7 100644 --- a/x-pack/plugins/actions/server/action_type_registry.test.ts +++ b/x-pack/plugins/actions/server/action_type_registry.test.ts @@ -4,7 +4,7 @@ * you may not use this file except in compliance with the Elastic License. */ -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { ActionTypeRegistry, ActionTypeRegistryOpts } from './action_type_registry'; import { ActionType, ExecutorType } from './types'; import { ActionExecutor, ExecutorError, ILicenseState, TaskRunnerFactory } from './lib'; @@ -12,7 +12,7 @@ import { actionsConfigMock } from './actions_config.mock'; import { licenseStateMock } from './lib/license_state.mock'; import { ActionsConfigurationUtilities } from './actions_config'; -const mockTaskManager = taskManagerMock.setup(); +const mockTaskManager = taskManagerMock.createSetup(); let mockedLicenseState: jest.Mocked; let mockedActionsConfig: jest.Mocked; let actionTypeRegistryParams: ActionTypeRegistryOpts; diff --git a/x-pack/plugins/actions/server/actions_client.test.ts b/x-pack/plugins/actions/server/actions_client.test.ts index adef12454f2d..7ea215c0a592 100644 --- a/x-pack/plugins/actions/server/actions_client.test.ts +++ b/x-pack/plugins/actions/server/actions_client.test.ts @@ -10,7 +10,7 @@ import { ActionTypeRegistry, ActionTypeRegistryOpts } from './action_type_regist import { ActionsClient } from './actions_client'; import { ExecutorType } from './types'; import { ActionExecutor, TaskRunnerFactory, ILicenseState } from './lib'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { actionsConfigMock } from './actions_config.mock'; import { getActionsConfigurationUtilities } from './actions_config'; import { licenseStateMock } from './lib/license_state.mock'; @@ -33,7 +33,7 @@ const authorization = actionsAuthorizationMock.create(); const executionEnqueuer = jest.fn(); const request = {} as KibanaRequest; -const mockTaskManager = taskManagerMock.setup(); +const mockTaskManager = taskManagerMock.createSetup(); let actionsClient: ActionsClient; let mockedLicenseState: jest.Mocked; diff --git a/x-pack/plugins/actions/server/builtin_action_types/index.test.ts b/x-pack/plugins/actions/server/builtin_action_types/index.test.ts index acab6dd41b4b..1088a4e0d906 100644 --- a/x-pack/plugins/actions/server/builtin_action_types/index.test.ts +++ b/x-pack/plugins/actions/server/builtin_action_types/index.test.ts @@ -6,7 +6,7 @@ import { ActionExecutor, TaskRunnerFactory } from '../lib'; import { ActionTypeRegistry } from '../action_type_registry'; -import { taskManagerMock } from '../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../task_manager/server/mocks'; import { registerBuiltInActionTypes } from './index'; import { Logger } from '../../../../../src/core/server'; import { loggingSystemMock } from '../../../../../src/core/server/mocks'; @@ -21,7 +21,7 @@ export function createActionTypeRegistry(): { } { const logger = loggingSystemMock.create().get() as jest.Mocked; const actionTypeRegistry = new ActionTypeRegistry({ - taskManager: taskManagerMock.setup(), + taskManager: taskManagerMock.createSetup(), taskRunnerFactory: new TaskRunnerFactory( new ActionExecutor({ isESOUsingEphemeralEncryptionKey: false }) ), diff --git a/x-pack/plugins/actions/server/create_execute_function.test.ts b/x-pack/plugins/actions/server/create_execute_function.test.ts index 7682f01ed769..d0500e37ceed 100644 --- a/x-pack/plugins/actions/server/create_execute_function.test.ts +++ b/x-pack/plugins/actions/server/create_execute_function.test.ts @@ -6,7 +6,7 @@ import { KibanaRequest } from 'src/core/server'; import uuid from 'uuid'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { createExecutionEnqueuerFunction } from './create_execute_function'; import { savedObjectsClientMock } from '../../../../src/core/server/mocks'; import { actionTypeRegistryMock } from './action_type_registry.mock'; @@ -15,7 +15,7 @@ import { asSavedObjectExecutionSource, } from './lib/action_execution_source'; -const mockTaskManager = taskManagerMock.start(); +const mockTaskManager = taskManagerMock.createStart(); const savedObjectsClient = savedObjectsClientMock.create(); const request = {} as KibanaRequest; diff --git a/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts b/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts index 2e2944aab425..0e6c2ff37eb0 100644 --- a/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts +++ b/x-pack/plugins/actions/server/usage/actions_usage_collector.test.ts @@ -6,9 +6,9 @@ import { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; import { registerActionsUsageCollector } from './actions_usage_collector'; -import { taskManagerMock } from '../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../task_manager/server/mocks'; -const mockTaskManagerStart = taskManagerMock.start(); +const mockTaskManagerStart = taskManagerMock.createStart(); beforeEach(() => jest.resetAllMocks()); diff --git a/x-pack/plugins/alerts/server/alert_type_registry.test.ts b/x-pack/plugins/alerts/server/alert_type_registry.test.ts index 048cc3d5a444..020b4f55619b 100644 --- a/x-pack/plugins/alerts/server/alert_type_registry.test.ts +++ b/x-pack/plugins/alerts/server/alert_type_registry.test.ts @@ -7,9 +7,9 @@ import { TaskRunnerFactory } from './task_runner'; import { AlertTypeRegistry } from './alert_type_registry'; import { AlertType } from './types'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; -const taskManager = taskManagerMock.setup(); +const taskManager = taskManagerMock.createSetup(); const alertTypeRegistryParams = { taskManager, taskRunnerFactory: new TaskRunnerFactory(), diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/create.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/create.test.ts index 65a30d175014..d91896d17bf1 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/create.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/create.test.ts @@ -6,7 +6,7 @@ import { schema } from '@kbn/config-schema'; import { AlertsClient, ConstructorOptions, CreateOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -16,7 +16,7 @@ import { ActionsAuthorization } from '../../../../actions/server'; import { TaskStatus } from '../../../../task_manager/server'; import { getBeforeSetup, setGlobalDate } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); const encryptedSavedObjects = encryptedSavedObjectsMock.createClient(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/delete.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/delete.test.ts index 1ebd9fc296b1..d9b253c3a56e 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/delete.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/delete.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); const encryptedSavedObjects = encryptedSavedObjectsMock.createClient(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/disable.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/disable.test.ts index 2dd3da07234c..d0557df62202 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/disable.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/disable.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/enable.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/enable.test.ts index b214d8ba697b..f098bbcad8d0 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/enable.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/enable.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -15,7 +15,7 @@ import { ActionsAuthorization } from '../../../../actions/server'; import { TaskStatus } from '../../../../task_manager/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/find.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/find.test.ts index bf55a2070d8f..c1adaddc80d9 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/find.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/find.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { nodeTypes } from '../../../../../../src/plugins/data/common'; @@ -16,7 +16,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup, setGlobalDate } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/get.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/get.test.ts index 327a1fa23ef0..004230403de2 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/get.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/get.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup, setGlobalDate } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_instance_summary.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_instance_summary.test.ts index 09212732b76e..a53e49337f38 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_instance_summary.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_instance_summary.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -19,7 +19,7 @@ import { EventsFactory } from '../../lib/alert_instance_summary_from_event_log.t import { RawAlert } from '../../types'; import { getBeforeSetup, mockedDateString, setGlobalDate } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); const eventLogClient = eventLogClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_state.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_state.test.ts index 42e573aea347..8b32f05f6d5a 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_state.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/get_alert_state.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { TaskStatus } from '../../../../task_manager/server'; @@ -15,7 +15,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/list_alert_types.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/list_alert_types.test.ts index 4337ed6c491d..b2f5c5498f84 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/list_alert_types.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/list_alert_types.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/mute_all.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/mute_all.test.ts index 44ee6713f256..88199dfd1f7b 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/mute_all.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/mute_all.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); const encryptedSavedObjects = encryptedSavedObjectsMock.createClient(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/mute_instance.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/mute_instance.test.ts index dc9a1600a577..cd7112b3551b 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/mute_instance.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/mute_instance.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/unmute_all.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/unmute_all.test.ts index 45920db105c2..07666c1cc626 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/unmute_all.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/unmute_all.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/unmute_instance.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/unmute_instance.test.ts index 560401150113..97711b8c1457 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/unmute_instance.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/unmute_instance.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/update.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/update.test.ts index 14275575f75f..146f8ac400ad 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/update.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/update.test.ts @@ -7,7 +7,7 @@ import uuid from 'uuid'; import { schema } from '@kbn/config-schema'; import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { IntervalSchedule } from '../../types'; @@ -19,7 +19,7 @@ import { ActionsAuthorization } from '../../../../actions/server'; import { TaskStatus } from '../../../../task_manager/server'; import { getBeforeSetup, setGlobalDate } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/update_api_key.test.ts b/x-pack/plugins/alerts/server/alerts_client/tests/update_api_key.test.ts index 97ddfa5e4adb..1f3b567b2c03 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/update_api_key.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/update_api_key.test.ts @@ -5,7 +5,7 @@ */ import { AlertsClient, ConstructorOptions } from '../alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { alertTypeRegistryMock } from '../../alert_type_registry.mock'; import { alertsAuthorizationMock } from '../../authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../../../encrypted_saved_objects/server/mocks'; @@ -14,7 +14,7 @@ import { AlertsAuthorization } from '../../authorization/alerts_authorization'; import { ActionsAuthorization } from '../../../../actions/server'; import { getBeforeSetup } from './lib'; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); const encryptedSavedObjects = encryptedSavedObjectsMock.createClient(); diff --git a/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts b/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts index 1c5edb45c80f..b1ac5ac4c678 100644 --- a/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client_conflict_retries.test.ts @@ -8,7 +8,7 @@ import { cloneDeep } from 'lodash'; import { AlertsClient, ConstructorOptions } from './alerts_client'; import { savedObjectsClientMock, loggingSystemMock } from '../../../../src/core/server/mocks'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { alertTypeRegistryMock } from './alert_type_registry.mock'; import { alertsAuthorizationMock } from './authorization/alerts_authorization.mock'; import { encryptedSavedObjectsMock } from '../../encrypted_saved_objects/server/mocks'; @@ -25,7 +25,7 @@ const MockAlertId = 'alert-id'; const ConflictAfterRetries = RetryForConflictsAttempts + 1; -const taskManager = taskManagerMock.start(); +const taskManager = taskManagerMock.createStart(); const alertTypeRegistry = alertTypeRegistryMock.create(); const unsecuredSavedObjectsClient = savedObjectsClientMock.create(); diff --git a/x-pack/plugins/alerts/server/alerts_client_factory.test.ts b/x-pack/plugins/alerts/server/alerts_client_factory.test.ts index ac91d689798c..770658fdde10 100644 --- a/x-pack/plugins/alerts/server/alerts_client_factory.test.ts +++ b/x-pack/plugins/alerts/server/alerts_client_factory.test.ts @@ -7,7 +7,7 @@ import { Request } from 'hapi'; import { AlertsClientFactory, AlertsClientFactoryOpts } from './alerts_client_factory'; import { alertTypeRegistryMock } from './alert_type_registry.mock'; -import { taskManagerMock } from '../../task_manager/server/task_manager.mock'; +import { taskManagerMock } from '../../task_manager/server/mocks'; import { KibanaRequest } from '../../../../src/core/server'; import { savedObjectsClientMock, @@ -35,7 +35,7 @@ const features = featuresPluginMock.createStart(); const securityPluginSetup = securityMock.createSetup(); const alertsClientFactoryParams: jest.Mocked = { logger: loggingSystemMock.create().get(), - taskManager: taskManagerMock.start(), + taskManager: taskManagerMock.createStart(), alertTypeRegistry: alertTypeRegistryMock.create(), getSpaceId: jest.fn(), getSpace: jest.fn(), diff --git a/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts b/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts index b48d173ba36d..a5f83bc393d4 100644 --- a/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts +++ b/x-pack/plugins/alerts/server/usage/alerts_usage_collector.test.ts @@ -6,8 +6,8 @@ import { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; import { registerAlertsUsageCollector } from './alerts_usage_collector'; -import { taskManagerMock } from '../../../task_manager/server/task_manager.mock'; -const taskManagerStart = taskManagerMock.start(); +import { taskManagerMock } from '../../../task_manager/server/mocks'; +const taskManagerStart = taskManagerMock.createStart(); beforeEach(() => jest.resetAllMocks()); diff --git a/x-pack/plugins/task_manager/server/plugin.test.ts b/x-pack/plugins/task_manager/server/plugin.test.ts new file mode 100644 index 000000000000..2751cdb60ce9 --- /dev/null +++ b/x-pack/plugins/task_manager/server/plugin.test.ts @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { TaskManagerPlugin } from './plugin'; +import { coreMock } from '../../../../src/core/server/mocks'; +import { TaskManagerConfig } from './config'; + +describe('TaskManagerPlugin', () => { + describe('setup', () => { + test('throws if no valid UUID is available', async () => { + const pluginInitializerContext = coreMock.createPluginInitializerContext({ + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 3000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + }); + + pluginInitializerContext.env.instanceUuid = ''; + + const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); + expect(taskManagerPlugin.setup(coreMock.createSetup())).rejects.toEqual( + new Error(`TaskManager is unable to start as Kibana has no valid UUID assigned to it.`) + ); + }); + }); +}); diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index b05334a7c618..6bac284d4846 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -13,6 +13,7 @@ import { setupSavedObjects } from './saved_objects'; import { TaskTypeDictionary } from './task_type_dictionary'; import { FetchResult, SearchOpts, TaskStore } from './task_store'; import { createManagedConfiguration } from './lib/create_managed_configuration'; +import { TaskScheduling } from './task_scheduling'; export type TaskManagerSetupContract = { addMiddleware: (middleware: Middleware) => void } & Pick< TaskTypeDictionary, @@ -20,7 +21,7 @@ export type TaskManagerSetupContract = { addMiddleware: (middleware: Middleware) >; export type TaskManagerStartContract = Pick< - TaskManager, + TaskScheduling, 'schedule' | 'runNow' | 'ensureScheduled' > & Pick; @@ -49,6 +50,15 @@ export class TaskManagerPlugin setupSavedObjects(savedObjects, this.config); this.taskManagerId = this.initContext.env.instanceUuid; + if (!this.taskManagerId) { + this.logger.error( + `TaskManager is unable to start as there the Kibana UUID is invalid (value of the "server.uuid" configuration is ${this.taskManagerId})` + ); + throw new Error(`TaskManager is unable to start as Kibana has no valid UUID assigned to it.`); + } else { + this.logger.info(`TaskManager is identified by the Kibana UUID: ${this.taskManagerId}`); + } + return { /** * Adds middleware to the task manager, such as adding security layers, loggers, etc. @@ -87,7 +97,6 @@ export class TaskManagerPlugin }); const taskManager = new TaskManager({ - taskManagerId: this.taskManagerId!, config: this.config!, definitions: this.definitions, logger: this.logger, @@ -98,6 +107,13 @@ export class TaskManagerPlugin }); this.taskManager = taskManager; + const taskScheduling = new TaskScheduling({ + logger: this.logger, + taskStore, + middleware: this.middleware, + taskManager, + }); + // start polling for work taskManager.start(); @@ -123,9 +139,9 @@ export class TaskManagerPlugin * @returns {Promise} */ remove: (id: string) => taskStore.remove(id), - schedule: (...args) => taskManager.schedule(...args), - ensureScheduled: (...args) => taskManager.ensureScheduled(...args), - runNow: (...args) => taskManager.runNow(...args), + schedule: (...args) => taskScheduling.schedule(...args), + ensureScheduled: (...args) => taskScheduling.ensureScheduled(...args), + runNow: (...args) => taskScheduling.runNow(...args), }; } diff --git a/x-pack/plugins/task_manager/server/task_manager.mock.ts b/x-pack/plugins/task_manager/server/task_manager.mock.ts index 1fc626e7d58d..204e2d8e8fb7 100644 --- a/x-pack/plugins/task_manager/server/task_manager.mock.ts +++ b/x-pack/plugins/task_manager/server/task_manager.mock.ts @@ -4,27 +4,29 @@ * you may not use this file except in compliance with the Elastic License. */ -import { TaskManagerSetupContract, TaskManagerStartContract } from './plugin'; +import { TaskManager, TaskLifecycleEvent } from './task_manager'; +import { of, Observable } from 'rxjs'; + +const createTaskManagerMock = ({ + isStarted = true, + events$ = of(), +}: { + isStarted?: boolean; + events$?: Observable; +} = {}) => { + return ({ + start: jest.fn(), + attemptToRun: jest.fn(), + get isStarted() { + return isStarted; + }, + get events() { + return events$; + }, + stop: jest.fn(), + } as unknown) as jest.Mocked; +}; export const taskManagerMock = { - setup(overrides: Partial> = {}) { - const mocked: jest.Mocked = { - registerTaskDefinitions: jest.fn(), - addMiddleware: jest.fn(), - ...overrides, - }; - return mocked; - }, - start(overrides: Partial> = {}) { - const mocked: jest.Mocked = { - ensureScheduled: jest.fn(), - schedule: jest.fn(), - fetch: jest.fn(), - get: jest.fn(), - runNow: jest.fn(), - remove: jest.fn(), - ...overrides, - }; - return mocked; - }, + create: createTaskManagerMock, }; diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/task_manager.test.ts index 6ddd8b85e717..b90dd1de38c1 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/task_manager.test.ts @@ -6,24 +6,10 @@ import _ from 'lodash'; import sinon from 'sinon'; -import { of, Subject } from 'rxjs'; -import { none } from 'fp-ts/lib/Option'; +import { of } from 'rxjs'; -import { - asTaskMarkRunningEvent, - asTaskRunEvent, - asTaskClaimEvent, - asTaskRunRequestEvent, -} from './task_events'; -import { - TaskManager, - claimAvailableTasks, - awaitTaskRunResult, - TaskLifecycleEvent, -} from './task_manager'; +import { TaskManager, claimAvailableTasks } from './task_manager'; import { mockLogger } from './test_utils'; -import { asErr, asOk } from './lib/result_type'; -import { ConcreteTaskInstance, TaskLifecycleResult, TaskStatus } from './task'; import { createInitialMiddleware } from './lib/middleware'; import { TaskTypeDictionary } from './task_type_dictionary'; import { taskStoreMock } from './task_store.mock'; @@ -47,7 +33,6 @@ describe('TaskManager', () => { config, taskStore: mockTaskStore, logger: mockLogger(), - taskManagerId: 'some-uuid', definitions: new TaskTypeDictionary(taskManagerLogger), middleware: createInitialMiddleware(), maxWorkersConfiguration$: of(100), @@ -61,281 +46,17 @@ describe('TaskManager', () => { afterEach(() => clock.restore()); - test('throws if no valid UUID is available', async () => { - expect(() => { - new TaskManager({ - ...taskManagerOpts, - taskManagerId: '', - }); - }).toThrowErrorMatchingInlineSnapshot( - `"TaskManager is unable to start as Kibana has no valid UUID assigned to it."` - ); - }); - - test('allows scheduling tasks', async () => { - const client = new TaskManager(taskManagerOpts); - taskManagerOpts.definitions.registerTaskDefinitions({ - foo: { - type: 'foo', - title: 'Foo', - createTaskRunner: jest.fn(), - }, - }); - client.start(); - const task = { - taskType: 'foo', - params: {}, - state: {}, - }; - await client.schedule(task); - expect(mockTaskStore.schedule).toHaveBeenCalled(); - }); - - test('allows scheduling existing tasks that may have already been scheduled', async () => { - const client = new TaskManager(taskManagerOpts); - taskManagerOpts.definitions.registerTaskDefinitions({ - foo: { - type: 'foo', - title: 'Foo', - createTaskRunner: jest.fn(), - }, - }); - mockTaskStore.schedule.mockRejectedValueOnce({ - statusCode: 409, - }); - - client.start(); - - const result = await client.ensureScheduled({ - id: 'my-foo-id', - taskType: 'foo', - params: {}, - state: {}, - }); + describe('start', () => { + test('begins poilling once start is called', () => { + const taskManager = new TaskManager(taskManagerOpts); - expect(result.id).toEqual('my-foo-id'); - }); - - test('doesnt ignore failure to scheduling existing tasks for reasons other than already being scheduled', async () => { - const client = new TaskManager(taskManagerOpts); - taskManagerOpts.definitions.registerTaskDefinitions({ - foo: { - type: 'foo', - title: 'Foo', - createTaskRunner: jest.fn(), - }, - }); - mockTaskStore.schedule.mockRejectedValueOnce({ - statusCode: 500, - }); - - client.start(); - - return expect( - client.ensureScheduled({ - id: 'my-foo-id', - taskType: 'foo', - params: {}, - state: {}, - }) - ).rejects.toMatchObject({ - statusCode: 500, - }); - }); - - test('doesnt allow naively rescheduling existing tasks that have already been scheduled', async () => { - const client = new TaskManager(taskManagerOpts); - taskManagerOpts.definitions.registerTaskDefinitions({ - foo: { - type: 'foo', - title: 'Foo', - createTaskRunner: jest.fn(), - }, - }); - mockTaskStore.schedule.mockRejectedValueOnce({ - statusCode: 409, - }); - - client.start(); - - return expect( - client.schedule({ - id: 'my-foo-id', - taskType: 'foo', - params: {}, - state: {}, - }) - ).rejects.toMatchObject({ - statusCode: 409, - }); - }); - - describe('runNow', () => { - describe('awaitTaskRunResult', () => { - test('resolves when the task run succeeds', () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - const task = { id } as ConcreteTaskInstance; - events$.next(asTaskRunEvent(id, asOk(task))); - - return expect(result).resolves.toEqual({ id }); - }); - - test('rejects when the task run fails', () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - const task = { id } as ConcreteTaskInstance; - events$.next(asTaskClaimEvent(id, asOk(task))); - events$.next(asTaskMarkRunningEvent(id, asOk(task))); - events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); - - return expect(result).rejects.toMatchInlineSnapshot( - `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` - ); - }); - - test('rejects when the task mark as running fails', () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - const task = { id } as ConcreteTaskInstance; - events$.next(asTaskClaimEvent(id, asOk(task))); - events$.next(asTaskMarkRunningEvent(id, asErr(new Error('some thing gone wrong')))); - - return expect(result).rejects.toMatchInlineSnapshot( - `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` - ); - }); + clock.tick(150); + expect(mockTaskStore.claimAvailableTasks).not.toHaveBeenCalled(); - test('when a task claim fails we ensure the task exists', async () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(async () => TaskLifecycleResult.NotFound); + taskManager.start(); - const result = awaitTaskRunResult(id, events$, getLifecycle); - - events$.next(asTaskClaimEvent(id, asErr(none))); - - await expect(result).rejects.toEqual( - new Error(`Failed to run task "${id}" as it does not exist`) - ); - - expect(getLifecycle).toHaveBeenCalledWith(id); - }); - - test('when a task claim fails we ensure the task isnt already claimed', async () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(async () => TaskStatus.Claiming); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - events$.next(asTaskClaimEvent(id, asErr(none))); - - await expect(result).rejects.toEqual( - new Error(`Failed to run task "${id}" as it is currently running`) - ); - - expect(getLifecycle).toHaveBeenCalledWith(id); - }); - - test('when a task claim fails we ensure the task isnt already running', async () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(async () => TaskStatus.Running); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - events$.next(asTaskClaimEvent(id, asErr(none))); - - await expect(result).rejects.toEqual( - new Error(`Failed to run task "${id}" as it is currently running`) - ); - - expect(getLifecycle).toHaveBeenCalledWith(id); - }); - - test('rejects when the task run fails due to capacity', async () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(async () => TaskStatus.Idle); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - events$.next(asTaskRunRequestEvent(id, asErr(new Error('failed to buffer request')))); - - await expect(result).rejects.toEqual( - new Error( - `Failed to run task "${id}" as Task Manager is at capacity, please try again later` - ) - ); - expect(getLifecycle).not.toHaveBeenCalled(); - }); - - test('when a task claim fails we return the underlying error if the task is idle', async () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(async () => TaskStatus.Idle); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - events$.next(asTaskClaimEvent(id, asErr(none))); - - await expect(result).rejects.toMatchInlineSnapshot( - `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2" for unknown reason (Current Task Lifecycle is "idle")]` - ); - - expect(getLifecycle).toHaveBeenCalledWith(id); - }); - - test('when a task claim fails we return the underlying error if the task is failed', async () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const getLifecycle = jest.fn(async () => TaskStatus.Failed); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - events$.next(asTaskClaimEvent(id, asErr(none))); - - await expect(result).rejects.toMatchInlineSnapshot( - `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2" for unknown reason (Current Task Lifecycle is "failed")]` - ); - - expect(getLifecycle).toHaveBeenCalledWith(id); - }); - - test('ignores task run success of other tasks', () => { - const events$ = new Subject(); - const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; - const differentTask = '4bebf429-181b-4518-bb7d-b4246d8a35f0'; - const getLifecycle = jest.fn(); - - const result = awaitTaskRunResult(id, events$, getLifecycle); - - const task = { id } as ConcreteTaskInstance; - const otherTask = { id: differentTask } as ConcreteTaskInstance; - events$.next(asTaskClaimEvent(id, asOk(task))); - events$.next(asTaskClaimEvent(differentTask, asOk(otherTask))); - - events$.next(asTaskRunEvent(differentTask, asOk(task))); - - events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); - - return expect(result).rejects.toMatchInlineSnapshot( - `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` - ); - }); + clock.tick(150); + expect(mockTaskStore.claimAvailableTasks).toHaveBeenCalled(); }); }); diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/task_manager.ts index f8572cc2dbf7..3a6a747ade37 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/task_manager.ts @@ -4,14 +4,13 @@ * you may not use this file except in compliance with the Elastic License. */ import { Subject, Observable, Subscription } from 'rxjs'; -import { filter } from 'rxjs/operators'; import { performance } from 'perf_hooks'; import { pipe } from 'fp-ts/lib/pipeable'; -import { Option, some, map as mapOptional, getOrElse } from 'fp-ts/lib/Option'; +import { Option, some, map as mapOptional } from 'fp-ts/lib/Option'; -import { Result, asOk, asErr, either, map, mapErr, promiseResult } from './lib/result_type'; +import { Result, asErr, mapErr } from './lib/result_type'; import { ManagedConfiguration } from './lib/create_managed_configuration'; import { TaskManagerConfig } from './config'; @@ -21,22 +20,12 @@ import { TaskRun, TaskClaim, TaskRunRequest, - isTaskRunEvent, - isTaskClaimEvent, - isTaskRunRequestEvent, asTaskRunRequestEvent, } from './task_events'; import { fillPool, FillPoolResult } from './lib/fill_pool'; import { Middleware } from './lib/middleware'; import { intervalFromNow } from './lib/intervals'; -import { - ConcreteTaskInstance, - TaskInstanceWithId, - TaskInstanceWithDeprecatedFields, - TaskLifecycle, - TaskLifecycleResult, - TaskStatus, -} from './task'; +import { ConcreteTaskInstance } from './task'; import { createTaskPoller, PollingError, @@ -47,25 +36,17 @@ import { TaskPool } from './task_pool'; import { TaskManagerRunner, TaskRunner } from './task_runner'; import { TaskStore, OwnershipClaimingOpts, ClaimOwnershipResult } from './task_store'; import { identifyEsError } from './lib/identify_es_error'; -import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; import { BufferedTaskStore } from './buffered_task_store'; import { TaskTypeDictionary } from './task_type_dictionary'; -const VERSION_CONFLICT_STATUS = 409; - export type TaskManagerOpts = { logger: Logger; definitions: TaskTypeDictionary; taskStore: TaskStore; config: TaskManagerConfig; - taskManagerId: string; middleware: Middleware; } & ManagedConfiguration; -interface RunNowResult { - id: string; -} - export type TaskLifecycleEvent = TaskMarkRunning | TaskRun | TaskClaim | TaskRunRequest; /* @@ -106,25 +87,10 @@ export class TaskManager { * mechanism. */ constructor(opts: TaskManagerOpts) { - const { - logger, - taskManagerId, - middleware, - maxWorkersConfiguration$, - pollIntervalConfiguration$, - } = opts; + const { logger, middleware, maxWorkersConfiguration$, pollIntervalConfiguration$ } = opts; this.logger = logger; this.middleware = middleware; - if (!taskManagerId) { - this.logger.error( - `TaskManager is unable to start as there the Kibana UUID is invalid (value of the "server.uuid" configuration is ${taskManagerId})` - ); - throw new Error(`TaskManager is unable to start as Kibana has no valid UUID assigned to it.`); - } else { - this.logger.info(`TaskManager is identified by the Kibana UUID: ${taskManagerId}`); - } - this.definitions = opts.definitions; this.store = opts.taskStore; // pipe store events into the TaskManager's event stream @@ -175,11 +141,15 @@ export class TaskManager { ); } + public get events(): Observable { + return this.events$; + } + private emitEvent = (event: TaskLifecycleEvent) => { this.events$.next(event); }; - private attemptToRun(task: string) { + public attemptToRun(task: string) { this.claimRequests$.next(some(task)); } @@ -244,59 +214,6 @@ export class TaskManager { this.pool.cancelRunningTasks(); } } - - /** - * Schedules a task. - * - * @param task - The task being scheduled. - * @returns {Promise} - */ - public async schedule( - taskInstance: TaskInstanceWithDeprecatedFields, - options?: Record - ): Promise { - const { taskInstance: modifiedTask } = await this.middleware.beforeSave({ - ...options, - taskInstance: ensureDeprecatedFieldsAreCorrected(taskInstance, this.logger), - }); - return await this.store.schedule(modifiedTask); - } - - /** - * Run task. - * - * @param taskId - The task being scheduled. - * @returns {Promise} - */ - public async runNow(taskId: string): Promise { - return new Promise(async (resolve, reject) => { - awaitTaskRunResult(taskId, this.events$, this.store.getLifecycle.bind(this.store)) - .then(resolve) - .catch(reject); - - this.attemptToRun(taskId); - }); - } - - /** - * Schedules a task with an Id - * - * @param task - The task being scheduled. - * @returns {Promise} - */ - public async ensureScheduled( - taskInstance: TaskInstanceWithId, - options?: Record - ): Promise { - try { - return await this.schedule(taskInstance, options); - } catch (err) { - if (err.statusCode === VERSION_CONFLICT_STATUS) { - return taskInstance; - } - throw err; - } - } } export async function claimAvailableTasks( @@ -350,76 +267,3 @@ export async function claimAvailableTasks( } return []; } - -export async function awaitTaskRunResult( - taskId: string, - events$: Subject, - getLifecycle: (id: string) => Promise -): Promise { - return new Promise((resolve, reject) => { - const subscription = events$ - // listen for all events related to the current task - .pipe(filter(({ id }: TaskLifecycleEvent) => id === taskId)) - .subscribe((taskEvent: TaskLifecycleEvent) => { - if (isTaskClaimEvent(taskEvent)) { - mapErr(async (error: Option) => { - // reject if any error event takes place for the requested task - subscription.unsubscribe(); - return reject( - map( - await pipe( - error, - mapOptional(async (taskReturnedBySweep) => asOk(taskReturnedBySweep.status)), - getOrElse(() => - // if the error happened in the Claim phase - we try to provide better insight - // into why we failed to claim by getting the task's current lifecycle status - promiseResult(getLifecycle(taskId)) - ) - ), - (taskLifecycleStatus: TaskLifecycle) => { - if (taskLifecycleStatus === TaskLifecycleResult.NotFound) { - return new Error(`Failed to run task "${taskId}" as it does not exist`); - } else if ( - taskLifecycleStatus === TaskStatus.Running || - taskLifecycleStatus === TaskStatus.Claiming - ) { - return new Error(`Failed to run task "${taskId}" as it is currently running`); - } - return new Error( - `Failed to run task "${taskId}" for unknown reason (Current Task Lifecycle is "${taskLifecycleStatus}")` - ); - }, - (getLifecycleError: Error) => - new Error( - `Failed to run task "${taskId}" and failed to get current Status:${getLifecycleError}` - ) - ) - ); - }, taskEvent.event); - } else { - either>( - taskEvent.event, - (taskInstance: ConcreteTaskInstance) => { - // resolve if the task has run sucessfully - if (isTaskRunEvent(taskEvent)) { - subscription.unsubscribe(); - resolve({ id: taskInstance.id }); - } - }, - async (error: Error | Option) => { - // reject if any error event takes place for the requested task - subscription.unsubscribe(); - if (isTaskRunRequestEvent(taskEvent)) { - return reject( - new Error( - `Failed to run task "${taskId}" as Task Manager is at capacity, please try again later` - ) - ); - } - return reject(new Error(`Failed to run task "${taskId}": ${error}`)); - } - ); - } - }); - }); -} diff --git a/x-pack/plugins/task_manager/server/task_scheduling.mock.ts b/x-pack/plugins/task_manager/server/task_scheduling.mock.ts new file mode 100644 index 000000000000..5a6a369ad7a4 --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_scheduling.mock.ts @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { TaskScheduling } from './task_scheduling'; + +const createTaskSchedulingMock = () => { + return ({ + ensureScheduled: jest.fn(), + schedule: jest.fn(), + runNow: jest.fn(), + } as unknown) as jest.Mocked; +}; + +export const taskSchedulingMock = { + create: createTaskSchedulingMock, +}; diff --git a/x-pack/plugins/task_manager/server/task_scheduling.test.ts b/x-pack/plugins/task_manager/server/task_scheduling.test.ts new file mode 100644 index 000000000000..0564738d1f45 --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_scheduling.test.ts @@ -0,0 +1,321 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import _ from 'lodash'; +import { Subject } from 'rxjs'; +import { none } from 'fp-ts/lib/Option'; + +import { + asTaskMarkRunningEvent, + asTaskRunEvent, + asTaskClaimEvent, + asTaskRunRequestEvent, +} from './task_events'; +import { TaskLifecycleEvent } from './task_manager'; +import { taskManagerMock } from './task_manager.mock'; +import { TaskScheduling } from './task_scheduling'; +import { mockLogger } from './test_utils'; +import { asErr, asOk } from './lib/result_type'; +import { ConcreteTaskInstance, TaskLifecycleResult, TaskStatus } from './task'; +import { createInitialMiddleware } from './lib/middleware'; +import { taskStoreMock } from './task_store.mock'; + +describe('TaskScheduling', () => { + const mockTaskStore = taskStoreMock.create({}); + const mockTaskManager = taskManagerMock.create({}); + const taskSchedulingOpts = { + taskStore: mockTaskStore, + taskManager: mockTaskManager, + logger: mockLogger(), + middleware: createInitialMiddleware(), + }; + + beforeEach(() => { + jest.resetAllMocks(); + }); + + test('allows scheduling tasks', async () => { + const taskScheduling = new TaskScheduling(taskSchedulingOpts); + const task = { + taskType: 'foo', + params: {}, + state: {}, + }; + await taskScheduling.schedule(task); + expect(mockTaskStore.schedule).toHaveBeenCalled(); + }); + + test('allows scheduling existing tasks that may have already been scheduled', async () => { + const taskScheduling = new TaskScheduling(taskSchedulingOpts); + mockTaskStore.schedule.mockRejectedValueOnce({ + statusCode: 409, + }); + + const result = await taskScheduling.ensureScheduled({ + id: 'my-foo-id', + taskType: 'foo', + params: {}, + state: {}, + }); + + expect(result.id).toEqual('my-foo-id'); + }); + + test('doesnt ignore failure to scheduling existing tasks for reasons other than already being scheduled', async () => { + const taskScheduling = new TaskScheduling(taskSchedulingOpts); + mockTaskStore.schedule.mockRejectedValueOnce({ + statusCode: 500, + }); + + return expect( + taskScheduling.ensureScheduled({ + id: 'my-foo-id', + taskType: 'foo', + params: {}, + state: {}, + }) + ).rejects.toMatchObject({ + statusCode: 500, + }); + }); + + test('doesnt allow naively rescheduling existing tasks that have already been scheduled', async () => { + const taskScheduling = new TaskScheduling(taskSchedulingOpts); + mockTaskStore.schedule.mockRejectedValueOnce({ + statusCode: 409, + }); + + return expect( + taskScheduling.schedule({ + id: 'my-foo-id', + taskType: 'foo', + params: {}, + state: {}, + }) + ).rejects.toMatchObject({ + statusCode: 409, + }); + }); + + describe('runNow', () => { + test('resolves when the task run succeeds', () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + const task = { id } as ConcreteTaskInstance; + events$.next(asTaskRunEvent(id, asOk(task))); + + return expect(result).resolves.toEqual({ id }); + }); + + test('rejects when the task run fails', () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + const task = { id } as ConcreteTaskInstance; + events$.next(asTaskClaimEvent(id, asOk(task))); + events$.next(asTaskMarkRunningEvent(id, asOk(task))); + events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); + + return expect(result).rejects.toMatchInlineSnapshot( + `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` + ); + }); + + test('rejects when the task mark as running fails', () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + const task = { id } as ConcreteTaskInstance; + events$.next(asTaskClaimEvent(id, asOk(task))); + events$.next(asTaskMarkRunningEvent(id, asErr(new Error('some thing gone wrong')))); + + return expect(result).rejects.toMatchInlineSnapshot( + `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` + ); + }); + + test('when a task claim fails we ensure the task exists', async () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + mockTaskStore.getLifecycle.mockResolvedValue(TaskLifecycleResult.NotFound); + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + events$.next(asTaskClaimEvent(id, asErr(none))); + + await expect(result).rejects.toEqual( + new Error(`Failed to run task "${id}" as it does not exist`) + ); + + expect(mockTaskStore.getLifecycle).toHaveBeenCalledWith(id); + }); + + test('when a task claim fails we ensure the task isnt already claimed', async () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + mockTaskStore.getLifecycle.mockResolvedValue(TaskStatus.Claiming); + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + events$.next(asTaskClaimEvent(id, asErr(none))); + + await expect(result).rejects.toEqual( + new Error(`Failed to run task "${id}" as it is currently running`) + ); + + expect(mockTaskStore.getLifecycle).toHaveBeenCalledWith(id); + }); + + test('when a task claim fails we ensure the task isnt already running', async () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + mockTaskStore.getLifecycle.mockResolvedValue(TaskStatus.Running); + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + events$.next(asTaskClaimEvent(id, asErr(none))); + + await expect(result).rejects.toEqual( + new Error(`Failed to run task "${id}" as it is currently running`) + ); + + expect(mockTaskStore.getLifecycle).toHaveBeenCalledWith(id); + }); + + test('rejects when the task run fails due to capacity', async () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + mockTaskStore.getLifecycle.mockResolvedValue(TaskStatus.Idle); + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + events$.next(asTaskRunRequestEvent(id, asErr(new Error('failed to buffer request')))); + + await expect(result).rejects.toEqual( + new Error( + `Failed to run task "${id}" as Task Manager is at capacity, please try again later` + ) + ); + expect(mockTaskStore.getLifecycle).not.toHaveBeenCalled(); + }); + + test('when a task claim fails we return the underlying error if the task is idle', async () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + mockTaskStore.getLifecycle.mockResolvedValue(TaskStatus.Idle); + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + events$.next(asTaskClaimEvent(id, asErr(none))); + + await expect(result).rejects.toMatchInlineSnapshot( + `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2" for unknown reason (Current Task Lifecycle is "idle")]` + ); + + expect(mockTaskStore.getLifecycle).toHaveBeenCalledWith(id); + }); + + test('when a task claim fails we return the underlying error if the task is failed', async () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + + mockTaskStore.getLifecycle.mockResolvedValue(TaskStatus.Failed); + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + events$.next(asTaskClaimEvent(id, asErr(none))); + + await expect(result).rejects.toMatchInlineSnapshot( + `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2" for unknown reason (Current Task Lifecycle is "failed")]` + ); + + expect(mockTaskStore.getLifecycle).toHaveBeenCalledWith(id); + }); + + test('ignores task run success of other tasks', () => { + const events$ = new Subject(); + const id = '01ddff11-e88a-4d13-bc4e-256164e755e2'; + const differentTask = '4bebf429-181b-4518-bb7d-b4246d8a35f0'; + + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + taskManager: taskManagerMock.create({ events$ }), + }); + + const result = taskScheduling.runNow(id); + + const task = { id } as ConcreteTaskInstance; + const otherTask = { id: differentTask } as ConcreteTaskInstance; + events$.next(asTaskClaimEvent(id, asOk(task))); + events$.next(asTaskClaimEvent(differentTask, asOk(otherTask))); + + events$.next(asTaskRunEvent(differentTask, asOk(task))); + + events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); + + return expect(result).rejects.toMatchInlineSnapshot( + `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` + ); + }); + }); +}); diff --git a/x-pack/plugins/task_manager/server/task_scheduling.ts b/x-pack/plugins/task_manager/server/task_scheduling.ts new file mode 100644 index 000000000000..45d1027fa1d8 --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_scheduling.ts @@ -0,0 +1,177 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +import { filter } from 'rxjs/operators'; + +import { pipe } from 'fp-ts/lib/pipeable'; +import { Option, map as mapOptional, getOrElse } from 'fp-ts/lib/Option'; + +import { asOk, either, map, mapErr, promiseResult } from './lib/result_type'; + +import { Logger } from './types'; +import { isTaskRunEvent, isTaskClaimEvent, isTaskRunRequestEvent } from './task_events'; +import { Middleware } from './lib/middleware'; +import { + ConcreteTaskInstance, + TaskInstanceWithId, + TaskInstanceWithDeprecatedFields, + TaskLifecycle, + TaskLifecycleResult, + TaskStatus, +} from './task'; +import { TaskStore } from './task_store'; +import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; +import { TaskLifecycleEvent, TaskManager } from './task_manager'; + +const VERSION_CONFLICT_STATUS = 409; + +export interface TaskSchedulingOpts { + logger: Logger; + taskStore: TaskStore; + taskManager: TaskManager; + middleware: Middleware; +} + +interface RunNowResult { + id: string; +} + +export class TaskScheduling { + private store: TaskStore; + private taskManager: TaskManager; + private logger: Logger; + private middleware: Middleware; + + /** + * Initializes the task manager, preventing any further addition of middleware, + * enabling the task manipulation methods, and beginning the background polling + * mechanism. + */ + constructor(opts: TaskSchedulingOpts) { + this.logger = opts.logger; + this.middleware = opts.middleware; + this.taskManager = opts.taskManager; + this.store = opts.taskStore; + } + + /** + * Schedules a task. + * + * @param task - The task being scheduled. + * @returns {Promise} + */ + public async schedule( + taskInstance: TaskInstanceWithDeprecatedFields, + options?: Record + ): Promise { + const { taskInstance: modifiedTask } = await this.middleware.beforeSave({ + ...options, + taskInstance: ensureDeprecatedFieldsAreCorrected(taskInstance, this.logger), + }); + return await this.store.schedule(modifiedTask); + } + + /** + * Run task. + * + * @param taskId - The task being scheduled. + * @returns {Promise} + */ + public async runNow(taskId: string): Promise { + return new Promise(async (resolve, reject) => { + this.awaitTaskRunResult(taskId).then(resolve).catch(reject); + this.taskManager.attemptToRun(taskId); + }); + } + + /** + * Schedules a task with an Id + * + * @param task - The task being scheduled. + * @returns {Promise} + */ + public async ensureScheduled( + taskInstance: TaskInstanceWithId, + options?: Record + ): Promise { + try { + return await this.schedule(taskInstance, options); + } catch (err) { + if (err.statusCode === VERSION_CONFLICT_STATUS) { + return taskInstance; + } + throw err; + } + } + + private async awaitTaskRunResult(taskId: string): Promise { + return new Promise((resolve, reject) => { + const subscription = this.taskManager.events + // listen for all events related to the current task + .pipe(filter(({ id }: TaskLifecycleEvent) => id === taskId)) + .subscribe((taskEvent: TaskLifecycleEvent) => { + if (isTaskClaimEvent(taskEvent)) { + mapErr(async (error: Option) => { + // reject if any error event takes place for the requested task + subscription.unsubscribe(); + return reject( + map( + await pipe( + error, + mapOptional(async (taskReturnedBySweep) => asOk(taskReturnedBySweep.status)), + getOrElse(() => + // if the error happened in the Claim phase - we try to provide better insight + // into why we failed to claim by getting the task's current lifecycle status + promiseResult(this.store.getLifecycle(taskId)) + ) + ), + (taskLifecycleStatus: TaskLifecycle) => { + if (taskLifecycleStatus === TaskLifecycleResult.NotFound) { + return new Error(`Failed to run task "${taskId}" as it does not exist`); + } else if ( + taskLifecycleStatus === TaskStatus.Running || + taskLifecycleStatus === TaskStatus.Claiming + ) { + return new Error(`Failed to run task "${taskId}" as it is currently running`); + } + return new Error( + `Failed to run task "${taskId}" for unknown reason (Current Task Lifecycle is "${taskLifecycleStatus}")` + ); + }, + (getLifecycleError: Error) => + new Error( + `Failed to run task "${taskId}" and failed to get current Status:${getLifecycleError}` + ) + ) + ); + }, taskEvent.event); + } else { + either>( + taskEvent.event, + (taskInstance: ConcreteTaskInstance) => { + // resolve if the task has run sucessfully + if (isTaskRunEvent(taskEvent)) { + subscription.unsubscribe(); + resolve({ id: taskInstance.id }); + } + }, + async (error: Error | Option) => { + // reject if any error event takes place for the requested task + subscription.unsubscribe(); + if (isTaskRunRequestEvent(taskEvent)) { + return reject( + new Error( + `Failed to run task "${taskId}" as Task Manager is at capacity, please try again later` + ) + ); + } + return reject(new Error(`Failed to run task "${taskId}": ${error}`)); + } + ); + } + }); + }); + } +} From 02afe234597e927a6d17ff6bc98c3b84399543de Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 15 Oct 2020 18:47:38 +0100 Subject: [PATCH 48/67] renamed task_manager to polling_lifecycle --- .../alerts/server/alerts_client/tests/lib.ts | 7 ++-- .../server/lib/bulk_operation_buffer.test.ts | 4 +-- .../server/lib/bulk_operation_buffer.ts | 2 +- .../lib/correct_deprecated_fields.test.ts | 12 +++---- .../server/lib/correct_deprecated_fields.ts | 2 +- .../lib/create_managed_configuration.test.ts | 4 +-- .../lib/create_managed_configuration.ts | 2 +- x-pack/plugins/task_manager/server/plugin.ts | 18 +++++------ .../server/polling/task_poller.ts | 2 +- ...ager.mock.ts => polling_lifecycle.mock.ts} | 10 +++--- ...ager.test.ts => polling_lifecycle.test.ts} | 22 ++++++------- .../{task_manager.ts => polling_lifecycle.ts} | 20 +++--------- .../mark_available_tasks_as_claimed.test.ts | 4 +-- .../plugins/task_manager/server/task_pool.ts | 2 +- .../task_manager/server/task_runner.test.ts | 7 ++-- .../task_manager/server/task_runner.ts | 2 +- .../server/task_scheduling.test.ts | 32 +++++++++---------- .../task_manager/server/task_scheduling.ts | 15 ++++----- .../task_manager/server/task_store.test.ts | 8 ++--- .../server/task_type_dictionary.ts | 2 +- x-pack/plugins/task_manager/server/types.ts | 16 ---------- 21 files changed, 82 insertions(+), 111 deletions(-) rename x-pack/plugins/task_manager/server/{task_manager.mock.ts => polling_lifecycle.mock.ts} (69%) rename x-pack/plugins/task_manager/server/{task_manager.test.ts => polling_lifecycle.test.ts} (89%) rename x-pack/plugins/task_manager/server/{task_manager.ts => polling_lifecycle.ts} (92%) delete mode 100644 x-pack/plugins/task_manager/server/types.ts diff --git a/x-pack/plugins/alerts/server/alerts_client/tests/lib.ts b/x-pack/plugins/alerts/server/alerts_client/tests/lib.ts index 96e49e21b904..5ebb4e90d4b5 100644 --- a/x-pack/plugins/alerts/server/alerts_client/tests/lib.ts +++ b/x-pack/plugins/alerts/server/alerts_client/tests/lib.ts @@ -3,8 +3,7 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -// eslint-disable-next-line @kbn/eslint/no-restricted-paths -import { TaskManager } from '../../../../task_manager/server/task_manager'; +import { taskManagerMock } from '../../../../task_manager/server/mocks'; import { IEventLogClient } from '../../../../event_log/server'; import { actionsClientMock } from '../../../../actions/server/mocks'; import { ConstructorOptions } from '../alerts_client'; @@ -41,9 +40,7 @@ export function setGlobalDate() { export function getBeforeSetup( alertsClientParams: jest.Mocked, - taskManager: jest.Mocked< - Pick - >, + taskManager: ReturnType, alertTypeRegistry: jest.Mocked>, eventLogClient?: jest.Mocked ) { diff --git a/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.test.ts b/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.test.ts index c007b3233849..ff84b7eac058 100644 --- a/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.test.ts +++ b/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.test.ts @@ -6,7 +6,7 @@ import { createBuffer, Entity, OperationError, BulkOperation } from './bulk_operation_buffer'; import { mapErr, asOk, asErr, Ok, Err } from './result_type'; -import { mockLogger } from '../test_utils'; +import { loggingSystemMock } from '../../../../../src/core/server/mocks'; interface TaskInstance extends Entity { attempts: number; @@ -238,7 +238,7 @@ describe('Bulk Operation Buffer', () => { } ); - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const bufferedUpdate = createBuffer(bulkUpdate, { logger }); diff --git a/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts b/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts index 57a14c2f8a56..6df5b064f279 100644 --- a/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts +++ b/x-pack/plugins/task_manager/server/lib/bulk_operation_buffer.ts @@ -8,7 +8,7 @@ import { map } from 'lodash'; import { Subject, race, from } from 'rxjs'; import { bufferWhen, filter, bufferCount, flatMap, mapTo, first } from 'rxjs/operators'; import { either, Result, asOk, asErr, Ok, Err } from './result_type'; -import { Logger } from '../types'; +import { Logger } from '../../../../../src/core/server'; export interface BufferOptions { bufferMaxDuration?: number; diff --git a/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.test.ts b/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.test.ts index 408e8d36d349..5bc3421f242c 100644 --- a/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.test.ts +++ b/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.test.ts @@ -4,8 +4,8 @@ * you may not use this file except in compliance with the Elastic License. */ +import { loggingSystemMock } from '../../../../../src/core/server/mocks'; import { ensureDeprecatedFieldsAreCorrected } from './correct_deprecated_fields'; -import { mockLogger } from '../test_utils'; describe('ensureDeprecatedFieldsAreCorrected', () => { test('doesnt change tasks without any schedule fields', async () => { @@ -17,7 +17,7 @@ describe('ensureDeprecatedFieldsAreCorrected', () => { params: {}, state: {}, }, - mockLogger() + loggingSystemMock.create().get() ) ).toEqual({ id: 'my-foo-id', @@ -36,7 +36,7 @@ describe('ensureDeprecatedFieldsAreCorrected', () => { params: {}, state: {}, }, - mockLogger() + loggingSystemMock.create().get() ) ).toEqual({ id: 'my-foo-id', @@ -56,7 +56,7 @@ describe('ensureDeprecatedFieldsAreCorrected', () => { params: {}, state: {}, }, - mockLogger() + loggingSystemMock.create().get() ) ).toEqual({ id: 'my-foo-id', @@ -67,7 +67,7 @@ describe('ensureDeprecatedFieldsAreCorrected', () => { }); }); test('logs a warning when a deprecated inteval is corrected on a task', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); ensureDeprecatedFieldsAreCorrected( { taskType: 'foo', @@ -82,7 +82,7 @@ describe('ensureDeprecatedFieldsAreCorrected', () => { ); }); test('logs a warning when a deprecated inteval is corrected on a task with an id', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); ensureDeprecatedFieldsAreCorrected( { id: 'my-foo-id', diff --git a/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts b/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts index 2de95cbb8c2f..9e5f4b7c143a 100644 --- a/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts +++ b/x-pack/plugins/task_manager/server/lib/correct_deprecated_fields.ts @@ -5,7 +5,7 @@ */ import { TaskInstance, TaskInstanceWithDeprecatedFields } from '../task'; -import { Logger } from '../types'; +import { Logger } from '../../../../../src/core/server'; export function ensureDeprecatedFieldsAreCorrected( { id, taskType, interval, schedule, ...taskInstance }: TaskInstanceWithDeprecatedFields, diff --git a/x-pack/plugins/task_manager/server/lib/create_managed_configuration.test.ts b/x-pack/plugins/task_manager/server/lib/create_managed_configuration.test.ts index b6b5cd003c5d..f50f5e5b9933 100644 --- a/x-pack/plugins/task_manager/server/lib/create_managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/lib/create_managed_configuration.test.ts @@ -6,7 +6,7 @@ import sinon from 'sinon'; import { Subject } from 'rxjs'; -import { mockLogger } from '../test_utils'; +import { loggingSystemMock } from '../../../../../src/core/server/mocks'; import { SavedObjectsErrorHelpers } from '../../../../../src/core/server'; import { createManagedConfiguration, @@ -15,7 +15,7 @@ import { describe('createManagedConfiguration()', () => { let clock: sinon.SinonFakeTimers; - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); beforeEach(() => { jest.resetAllMocks(); diff --git a/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts b/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts index a8cb1b178d24..9d093ec0c671 100644 --- a/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts +++ b/x-pack/plugins/task_manager/server/lib/create_managed_configuration.ts @@ -7,7 +7,7 @@ import { interval, merge, of, Observable } from 'rxjs'; import { filter, mergeScan, map, scan, distinctUntilChanged, startWith } from 'rxjs/operators'; import { SavedObjectsErrorHelpers } from '../../../../../src/core/server'; -import { Logger } from '../types'; +import { Logger } from '../../../../../src/core/server'; const FLUSH_MARKER = Symbol('flush'); export const ADJUST_THROUGHPUT_INTERVAL = 10 * 1000; diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 6bac284d4846..b572e67fee6b 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -6,7 +6,7 @@ import { PluginInitializerContext, Plugin, CoreSetup, Logger, CoreStart } from 'src/core/server'; import { first } from 'rxjs/operators'; import { ElasticJs, TaskDefinition } from './task'; -import { TaskManager } from './task_manager'; +import { TaskPollingLifecycle } from './polling_lifecycle'; import { TaskManagerConfig } from './config'; import { createInitialMiddleware, addMiddlewareToChain, Middleware } from './lib/middleware'; import { setupSavedObjects } from './saved_objects'; @@ -28,7 +28,7 @@ export type TaskManagerStartContract = Pick< export class TaskManagerPlugin implements Plugin { - private taskManager?: TaskManager; + private taskPollingLifecycle?: TaskPollingLifecycle; private taskManagerId?: string; private config?: TaskManagerConfig; private logger: Logger; @@ -96,7 +96,7 @@ export class TaskManagerPlugin startingPollInterval: this.config!.poll_interval, }); - const taskManager = new TaskManager({ + const taskPollingLifecycle = new TaskPollingLifecycle({ config: this.config!, definitions: this.definitions, logger: this.logger, @@ -105,17 +105,17 @@ export class TaskManagerPlugin maxWorkersConfiguration$, pollIntervalConfiguration$, }); - this.taskManager = taskManager; + this.taskPollingLifecycle = taskPollingLifecycle; const taskScheduling = new TaskScheduling({ logger: this.logger, taskStore, middleware: this.middleware, - taskManager, + taskPollingLifecycle, }); // start polling for work - taskManager.start(); + taskPollingLifecycle.start(); return { /** @@ -146,8 +146,8 @@ export class TaskManagerPlugin } public stop() { - if (this.taskManager) { - this.taskManager.stop(); + if (this.taskPollingLifecycle) { + this.taskPollingLifecycle.stop(); } } @@ -158,7 +158,7 @@ export class TaskManagerPlugin * @returns void */ private assertStillInSetup(operation: string) { - if (this.taskManager?.isStarted) { + if (this.taskPollingLifecycle?.isStarted) { throw new Error(`Cannot ${operation} after the task manager has started`); } } diff --git a/x-pack/plugins/task_manager/server/polling/task_poller.ts b/x-pack/plugins/task_manager/server/polling/task_poller.ts index 7515668a19d4..3d48453aa5a9 100644 --- a/x-pack/plugins/task_manager/server/polling/task_poller.ts +++ b/x-pack/plugins/task_manager/server/polling/task_poller.ts @@ -15,7 +15,7 @@ import { mapTo, filter, scan, concatMap, tap, catchError, switchMap } from 'rxjs import { pipe } from 'fp-ts/lib/pipeable'; import { Option, none, map as mapOptional, getOrElse } from 'fp-ts/lib/Option'; -import { Logger } from '../types'; +import { Logger } from '../../../../../src/core/server'; import { pullFromSet } from '../lib/pull_from_set'; import { Result, diff --git a/x-pack/plugins/task_manager/server/task_manager.mock.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.mock.ts similarity index 69% rename from x-pack/plugins/task_manager/server/task_manager.mock.ts rename to x-pack/plugins/task_manager/server/polling_lifecycle.mock.ts index 204e2d8e8fb7..05aed2c9e811 100644 --- a/x-pack/plugins/task_manager/server/task_manager.mock.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.mock.ts @@ -4,10 +4,10 @@ * you may not use this file except in compliance with the Elastic License. */ -import { TaskManager, TaskLifecycleEvent } from './task_manager'; +import { TaskPollingLifecycle, TaskLifecycleEvent } from './polling_lifecycle'; import { of, Observable } from 'rxjs'; -const createTaskManagerMock = ({ +const createTaskPollingLifecycleMock = ({ isStarted = true, events$ = of(), }: { @@ -24,9 +24,9 @@ const createTaskManagerMock = ({ return events$; }, stop: jest.fn(), - } as unknown) as jest.Mocked; + } as unknown) as jest.Mocked; }; -export const taskManagerMock = { - create: createTaskManagerMock, +export const taskPollingLifecycleMock = { + create: createTaskPollingLifecycleMock, }; diff --git a/x-pack/plugins/task_manager/server/task_manager.test.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts similarity index 89% rename from x-pack/plugins/task_manager/server/task_manager.test.ts rename to x-pack/plugins/task_manager/server/polling_lifecycle.test.ts index b90dd1de38c1..4c92b6df7d89 100644 --- a/x-pack/plugins/task_manager/server/task_manager.test.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts @@ -8,13 +8,13 @@ import _ from 'lodash'; import sinon from 'sinon'; import { of } from 'rxjs'; -import { TaskManager, claimAvailableTasks } from './task_manager'; -import { mockLogger } from './test_utils'; +import { TaskPollingLifecycle, claimAvailableTasks } from './polling_lifecycle'; +import { loggingSystemMock } from '../../../../src/core/server/mocks'; import { createInitialMiddleware } from './lib/middleware'; import { TaskTypeDictionary } from './task_type_dictionary'; import { taskStoreMock } from './task_store.mock'; -describe('TaskManager', () => { +describe('TaskPollingLifecycle', () => { let clock: sinon.SinonFakeTimers; const config = { @@ -27,12 +27,12 @@ describe('TaskManager', () => { request_capacity: 1000, }; - const taskManagerLogger = mockLogger(); + const taskManagerLogger = loggingSystemMock.create().get(); const mockTaskStore = taskStoreMock.create({}); const taskManagerOpts = { config, taskStore: mockTaskStore, - logger: mockLogger(), + logger: taskManagerLogger, definitions: new TaskTypeDictionary(taskManagerLogger), middleware: createInitialMiddleware(), maxWorkersConfiguration$: of(100), @@ -48,7 +48,7 @@ describe('TaskManager', () => { describe('start', () => { test('begins poilling once start is called', () => { - const taskManager = new TaskManager(taskManagerOpts); + const taskManager = new TaskPollingLifecycle(taskManagerOpts); clock.tick(150); expect(mockTaskStore.claimAvailableTasks).not.toHaveBeenCalled(); @@ -62,7 +62,7 @@ describe('TaskManager', () => { describe('claimAvailableTasks', () => { test('should claim Available Tasks when there are available workers', () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const claim = jest.fn(() => Promise.resolve({ docs: [], claimedTasks: 0 })); const availableWorkers = 1; @@ -73,7 +73,7 @@ describe('TaskManager', () => { }); test('should not claim Available Tasks when there are no available workers', () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const claim = jest.fn(() => Promise.resolve({ docs: [], claimedTasks: 0 })); const availableWorkers = 0; @@ -88,7 +88,7 @@ describe('TaskManager', () => { * This is achieved by setting the `script.allowed_types` flag on Elasticsearch to `none` */ test('handles failure due to inline scripts being disabled', () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const claim = jest.fn(() => { throw Object.assign(new Error(), { msg: '[illegal_argument_exception] cannot execute [inline] scripts', @@ -110,8 +110,8 @@ describe('TaskManager', () => { claimAvailableTasks([], claim, 10, logger); expect(logger.warn).toHaveBeenCalledTimes(1); - expect(logger.warn.mock.calls[0][0]).toMatchInlineSnapshot( - `"Task Manager cannot operate when inline scripts are disabled in Elasticsearch"` + expect(logger.warn).toHaveBeenCalledWith( + `Task Manager cannot operate when inline scripts are disabled in Elasticsearch` ); }); }); diff --git a/x-pack/plugins/task_manager/server/task_manager.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.ts similarity index 92% rename from x-pack/plugins/task_manager/server/task_manager.ts rename to x-pack/plugins/task_manager/server/polling_lifecycle.ts index 3a6a747ade37..8a506cca699d 100644 --- a/x-pack/plugins/task_manager/server/task_manager.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.ts @@ -9,12 +9,12 @@ import { performance } from 'perf_hooks'; import { pipe } from 'fp-ts/lib/pipeable'; import { Option, some, map as mapOptional } from 'fp-ts/lib/Option'; +import { Logger } from '../../../../src/core/server'; import { Result, asErr, mapErr } from './lib/result_type'; import { ManagedConfiguration } from './lib/create_managed_configuration'; import { TaskManagerConfig } from './config'; -import { Logger } from './types'; import { TaskMarkRunning, TaskRun, @@ -39,7 +39,7 @@ import { identifyEsError } from './lib/identify_es_error'; import { BufferedTaskStore } from './buffered_task_store'; import { TaskTypeDictionary } from './task_type_dictionary'; -export type TaskManagerOpts = { +export type TaskPollingLifecycleOpts = { logger: Logger; definitions: TaskTypeDictionary; taskStore: TaskStore; @@ -49,20 +49,10 @@ export type TaskManagerOpts = { export type TaskLifecycleEvent = TaskMarkRunning | TaskRun | TaskClaim | TaskRunRequest; -/* - * The TaskManager is the public interface into the task manager system. This glues together - * all of the disparate modules in one integration point. The task manager operates in two different ways: - * - * - pre-init, it allows middleware registration, but disallows task manipulation - * - post-init, it disallows middleware registration, but allows task manipulation - * - * Due to its complexity, this is mostly tested by integration tests (see readme). - */ - /** * The public interface into the task manager system. */ -export class TaskManager { +export class TaskPollingLifecycle { private definitions: TaskTypeDictionary; private store: TaskStore; @@ -86,14 +76,14 @@ export class TaskManager { * enabling the task manipulation methods, and beginning the background polling * mechanism. */ - constructor(opts: TaskManagerOpts) { + constructor(opts: TaskPollingLifecycleOpts) { const { logger, middleware, maxWorkersConfiguration$, pollIntervalConfiguration$ } = opts; this.logger = logger; this.middleware = middleware; this.definitions = opts.definitions; this.store = opts.taskStore; - // pipe store events into the TaskManager's event stream + // pipe store events into the lifecycle event stream this.store.events.subscribe((event) => this.events$.next(event)); this.bufferedStore = new BufferedTaskStore(this.store, { diff --git a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts index 024c006fdc05..63d8c1575b26 100644 --- a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts +++ b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts @@ -24,11 +24,11 @@ import { } from './mark_available_tasks_as_claimed'; import { TaskTypeDictionary } from '../task_type_dictionary'; -import { mockLogger } from '../test_utils'; +import { loggingSystemMock } from '../../../../../src/core/server/mocks'; describe('mark_available_tasks_as_claimed', () => { test('generates query matching tasks to be claimed when polling for tasks', () => { - const definitions = new TaskTypeDictionary(mockLogger()); + const definitions = new TaskTypeDictionary(loggingSystemMock.create().get()); definitions.registerTaskDefinitions({ sampleTask: { type: 'sampleTask', diff --git a/x-pack/plugins/task_manager/server/task_pool.ts b/x-pack/plugins/task_manager/server/task_pool.ts index 44f5f5648c2a..9f7948ecad34 100644 --- a/x-pack/plugins/task_manager/server/task_pool.ts +++ b/x-pack/plugins/task_manager/server/task_pool.ts @@ -12,7 +12,7 @@ import { Observable } from 'rxjs'; import moment, { Duration } from 'moment'; import { performance } from 'perf_hooks'; import { padStart } from 'lodash'; -import { Logger } from './types'; +import { Logger } from '../../../../src/core/server'; import { TaskRunner } from './task_runner'; import { isTaskSavedObjectNotFoundError } from './lib/is_task_not_found_error'; diff --git a/x-pack/plugins/task_manager/server/task_runner.test.ts b/x-pack/plugins/task_manager/server/task_runner.test.ts index 733922256673..1619b06e60bb 100644 --- a/x-pack/plugins/task_manager/server/task_runner.test.ts +++ b/x-pack/plugins/task_manager/server/task_runner.test.ts @@ -11,7 +11,8 @@ import { asOk, asErr } from './lib/result_type'; import { TaskEvent, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; import { ConcreteTaskInstance, TaskStatus, TaskDefinition, RunResult } from './task'; import { TaskManagerRunner } from './task_runner'; -import { mockLogger } from './test_utils'; +import { loggingSystemMock } from '../../../../src/core/server/mocks'; +import { Logger } from '../../../../src/core/server'; import { SavedObjectsErrorHelpers } from '../../../../src/core/server'; import moment from 'moment'; import { TaskTypeDictionary } from './task_type_dictionary'; @@ -970,7 +971,7 @@ describe('TaskManagerRunner', () => { function testOpts(opts: TestOpts) { const callCluster = sinon.stub(); const createTaskRunner = sinon.stub(); - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const instance = Object.assign( { @@ -1052,7 +1053,7 @@ describe('TaskManagerRunner', () => { expect(logger.warn).not.toHaveBeenCalled(); } else { expect(logger.warn).toHaveBeenCalledTimes(1); - expect(logger.warn.mock.calls[0][0]).toMatch(/invalid task result/i); + expect((logger as jest.Mocked).warn.mock.calls[0][0]).toMatch(/invalid task result/i); } } diff --git a/x-pack/plugins/task_manager/server/task_runner.ts b/x-pack/plugins/task_manager/server/task_runner.ts index 0516bfb70925..cfe0e97cbfec 100644 --- a/x-pack/plugins/task_manager/server/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_runner.ts @@ -15,10 +15,10 @@ import { performance } from 'perf_hooks'; import Joi from 'joi'; import { identity, defaults, flow } from 'lodash'; +import { Logger } from '../../../../src/core/server'; import { asOk, asErr, mapErr, eitherAsync, unwrap, mapOk, Result } from './lib/result_type'; import { TaskRun, TaskMarkRunning, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; import { intervalFromDate, intervalFromNow } from './lib/intervals'; -import { Logger } from './types'; import { BeforeRunFunction, BeforeMarkRunningFunction } from './lib/middleware'; import { CancelFunction, diff --git a/x-pack/plugins/task_manager/server/task_scheduling.test.ts b/x-pack/plugins/task_manager/server/task_scheduling.test.ts index 0564738d1f45..8c92d7a2de89 100644 --- a/x-pack/plugins/task_manager/server/task_scheduling.test.ts +++ b/x-pack/plugins/task_manager/server/task_scheduling.test.ts @@ -14,10 +14,10 @@ import { asTaskClaimEvent, asTaskRunRequestEvent, } from './task_events'; -import { TaskLifecycleEvent } from './task_manager'; -import { taskManagerMock } from './task_manager.mock'; +import { TaskLifecycleEvent } from './polling_lifecycle'; +import { taskPollingLifecycleMock } from './polling_lifecycle.mock'; import { TaskScheduling } from './task_scheduling'; -import { mockLogger } from './test_utils'; +import { loggingSystemMock } from '../../../../src/core/server/mocks'; import { asErr, asOk } from './lib/result_type'; import { ConcreteTaskInstance, TaskLifecycleResult, TaskStatus } from './task'; import { createInitialMiddleware } from './lib/middleware'; @@ -25,11 +25,11 @@ import { taskStoreMock } from './task_store.mock'; describe('TaskScheduling', () => { const mockTaskStore = taskStoreMock.create({}); - const mockTaskManager = taskManagerMock.create({}); + const mockTaskManager = taskPollingLifecycleMock.create({}); const taskSchedulingOpts = { taskStore: mockTaskStore, - taskManager: mockTaskManager, - logger: mockLogger(), + taskPollingLifecycle: mockTaskManager, + logger: loggingSystemMock.create().get(), middleware: createInitialMiddleware(), }; @@ -107,7 +107,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -124,7 +124,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -145,7 +145,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -167,7 +167,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -189,7 +189,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -211,7 +211,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -233,7 +233,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -256,7 +256,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -278,7 +278,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); @@ -299,7 +299,7 @@ describe('TaskScheduling', () => { const taskScheduling = new TaskScheduling({ ...taskSchedulingOpts, - taskManager: taskManagerMock.create({ events$ }), + taskPollingLifecycle: taskPollingLifecycleMock.create({ events$ }), }); const result = taskScheduling.runNow(id); diff --git a/x-pack/plugins/task_manager/server/task_scheduling.ts b/x-pack/plugins/task_manager/server/task_scheduling.ts index 45d1027fa1d8..9f44316c32fd 100644 --- a/x-pack/plugins/task_manager/server/task_scheduling.ts +++ b/x-pack/plugins/task_manager/server/task_scheduling.ts @@ -8,9 +8,8 @@ import { filter } from 'rxjs/operators'; import { pipe } from 'fp-ts/lib/pipeable'; import { Option, map as mapOptional, getOrElse } from 'fp-ts/lib/Option'; +import { Logger } from '../../../../src/core/server'; import { asOk, either, map, mapErr, promiseResult } from './lib/result_type'; - -import { Logger } from './types'; import { isTaskRunEvent, isTaskClaimEvent, isTaskRunRequestEvent } from './task_events'; import { Middleware } from './lib/middleware'; import { @@ -23,14 +22,14 @@ import { } from './task'; import { TaskStore } from './task_store'; import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; -import { TaskLifecycleEvent, TaskManager } from './task_manager'; +import { TaskLifecycleEvent, TaskPollingLifecycle } from './polling_lifecycle'; const VERSION_CONFLICT_STATUS = 409; export interface TaskSchedulingOpts { logger: Logger; taskStore: TaskStore; - taskManager: TaskManager; + taskPollingLifecycle: TaskPollingLifecycle; middleware: Middleware; } @@ -40,7 +39,7 @@ interface RunNowResult { export class TaskScheduling { private store: TaskStore; - private taskManager: TaskManager; + private taskPollingLifecycle: TaskPollingLifecycle; private logger: Logger; private middleware: Middleware; @@ -52,7 +51,7 @@ export class TaskScheduling { constructor(opts: TaskSchedulingOpts) { this.logger = opts.logger; this.middleware = opts.middleware; - this.taskManager = opts.taskManager; + this.taskPollingLifecycle = opts.taskPollingLifecycle; this.store = opts.taskStore; } @@ -82,7 +81,7 @@ export class TaskScheduling { public async runNow(taskId: string): Promise { return new Promise(async (resolve, reject) => { this.awaitTaskRunResult(taskId).then(resolve).catch(reject); - this.taskManager.attemptToRun(taskId); + this.taskPollingLifecycle.attemptToRun(taskId); }); } @@ -108,7 +107,7 @@ export class TaskScheduling { private async awaitTaskRunResult(taskId: string): Promise { return new Promise((resolve, reject) => { - const subscription = this.taskManager.events + const subscription = this.taskPollingLifecycle.events // listen for all events related to the current task .pipe(filter(({ id }: TaskLifecycleEvent) => id === taskId)) .subscribe((taskEvent: TaskLifecycleEvent) => { diff --git a/x-pack/plugins/task_manager/server/task_store.test.ts b/x-pack/plugins/task_manager/server/task_store.test.ts index 5c1eeba223c4..fcfecbb5da13 100644 --- a/x-pack/plugins/task_manager/server/task_store.test.ts +++ b/x-pack/plugins/task_manager/server/task_store.test.ts @@ -17,7 +17,7 @@ import { SerializedConcreteTaskInstance, ConcreteTaskInstance, } from './task'; -import { mockLogger } from './test_utils'; +import { loggingSystemMock } from '../../../../src/core/server/mocks'; import { StoreOpts, OwnershipClaimingOpts, TaskStore, SearchOpts } from './task_store'; import { savedObjectsRepositoryMock } from 'src/core/server/mocks'; import { @@ -46,7 +46,7 @@ const mockedDate = new Date('2019-02-12T21:01:22.479Z'); } }; -const taskDefinitions = new TaskTypeDictionary(mockLogger()); +const taskDefinitions = new TaskTypeDictionary(loggingSystemMock.create().get()); taskDefinitions.registerTaskDefinitions({ report: { type: 'report', @@ -337,7 +337,7 @@ describe('TaskStore', () => { const maxAttempts = _.random(2, 43); const customMaxAttempts = _.random(44, 100); - const definitions = new TaskTypeDictionary(mockLogger()); + const definitions = new TaskTypeDictionary(loggingSystemMock.create().get()); definitions.registerTaskDefinitions({ foo: { type: 'foo', @@ -470,7 +470,7 @@ describe('TaskStore', () => { test('it supports claiming specific tasks by id', async () => { const maxAttempts = _.random(2, 43); const customMaxAttempts = _.random(44, 100); - const definitions = new TaskTypeDictionary(mockLogger()); + const definitions = new TaskTypeDictionary(loggingSystemMock.create().get()); definitions.registerTaskDefinitions({ foo: { type: 'foo', diff --git a/x-pack/plugins/task_manager/server/task_type_dictionary.ts b/x-pack/plugins/task_manager/server/task_type_dictionary.ts index 8feebaad3af0..5fd2ab47e38a 100644 --- a/x-pack/plugins/task_manager/server/task_type_dictionary.ts +++ b/x-pack/plugins/task_manager/server/task_type_dictionary.ts @@ -6,7 +6,7 @@ import { mapValues } from 'lodash'; import Joi from 'joi'; import { TaskDefinition, validateTaskDefinition } from './task'; -import { Logger } from './types'; +import { Logger } from '../../../../src/core/server'; /* * The TaskManager is the public interface into the task manager system. This glues together diff --git a/x-pack/plugins/task_manager/server/types.ts b/x-pack/plugins/task_manager/server/types.ts deleted file mode 100644 index a38730ad7f76..000000000000 --- a/x-pack/plugins/task_manager/server/types.ts +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ - -import { TaskManager as TaskManagerClass } from './task_manager'; - -export type TaskManager = PublicMethodsOf; - -export interface Logger { - info(message: string): void; - debug(message: string): void; - warn(message: string): void; - error(message: string): void; -} From 320ea570af260953178da4a826281c118143628c Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 15 Oct 2020 19:07:19 +0100 Subject: [PATCH 49/67] last bit og logger --- .../server/polling/task_poller.test.ts | 25 ++++++++------- .../task_manager/server/task_pool.test.ts | 32 ++++++++++--------- .../task_manager/server/test_utils/index.ts | 12 ------- 3 files changed, 30 insertions(+), 39 deletions(-) diff --git a/x-pack/plugins/task_manager/server/polling/task_poller.test.ts b/x-pack/plugins/task_manager/server/polling/task_poller.test.ts index 956c8b05f386..f5f1667312d7 100644 --- a/x-pack/plugins/task_manager/server/polling/task_poller.test.ts +++ b/x-pack/plugins/task_manager/server/polling/task_poller.test.ts @@ -9,7 +9,8 @@ import { Subject, of, BehaviorSubject } from 'rxjs'; import { Option, none, some } from 'fp-ts/lib/Option'; import { createTaskPoller, PollingError, PollingErrorType } from './task_poller'; import { fakeSchedulers } from 'rxjs-marbles/jest'; -import { sleep, resolvable, Resolvable, mockLogger } from '../test_utils'; +import { sleep, resolvable, Resolvable } from '../test_utils'; +import { loggingSystemMock } from '../../../../../src/core/server/mocks'; import { asOk, asErr } from '../lib/result_type'; describe('TaskPoller', () => { @@ -24,7 +25,7 @@ describe('TaskPoller', () => { const work = jest.fn(async () => true); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, getCapacity: () => 1, @@ -59,7 +60,7 @@ describe('TaskPoller', () => { const work = jest.fn(async () => true); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$, bufferCapacity, getCapacity: () => 1, @@ -101,7 +102,7 @@ describe('TaskPoller', () => { let hasCapacity = true; createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work, @@ -160,7 +161,7 @@ describe('TaskPoller', () => { const work = jest.fn(async () => true); const pollRequests$ = new Subject>(); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work, @@ -206,7 +207,7 @@ describe('TaskPoller', () => { const work = jest.fn(async () => true); const pollRequests$ = new Subject>(); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work, @@ -251,7 +252,7 @@ describe('TaskPoller', () => { const work = jest.fn(async () => true); const pollRequests$ = new Subject>(); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work, @@ -288,7 +289,7 @@ describe('TaskPoller', () => { const handler = jest.fn(); const pollRequests$ = new Subject>(); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work: async (...args) => { @@ -339,7 +340,7 @@ describe('TaskPoller', () => { type ResolvableTupple = [string, PromiseLike & Resolvable]; const pollRequests$ = new Subject>(); createTaskPoller<[string, Resolvable], string[]>({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work: async (...resolvables) => { @@ -399,7 +400,7 @@ describe('TaskPoller', () => { const handler = jest.fn(); const pollRequests$ = new Subject>(); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work: async (...args) => { @@ -440,7 +441,7 @@ describe('TaskPoller', () => { return callCount; }); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work, @@ -483,7 +484,7 @@ describe('TaskPoller', () => { const work = jest.fn(async () => {}); const pollRequests$ = new Subject>(); createTaskPoller({ - logger: mockLogger(), + logger: loggingSystemMock.create().get(), pollInterval$: of(pollInterval), bufferCapacity, work, diff --git a/x-pack/plugins/task_manager/server/task_pool.test.ts b/x-pack/plugins/task_manager/server/task_pool.test.ts index 12b731b2b78a..a174af71ef18 100644 --- a/x-pack/plugins/task_manager/server/task_pool.test.ts +++ b/x-pack/plugins/task_manager/server/task_pool.test.ts @@ -7,7 +7,9 @@ import sinon from 'sinon'; import { of, Subject } from 'rxjs'; import { TaskPool, TaskPoolRunResult } from './task_pool'; -import { mockLogger, resolvable, sleep } from './test_utils'; +import { resolvable, sleep } from './test_utils'; +import { loggingSystemMock } from '../../../../src/core/server/mocks'; +import { Logger } from '../../../../src/core/server'; import { asOk } from './lib/result_type'; import { SavedObjectsErrorHelpers } from '../../../../src/core/server'; import moment from 'moment'; @@ -16,7 +18,7 @@ describe('TaskPool', () => { test('occupiedWorkers are a sum of running tasks', async () => { const pool = new TaskPool({ maxWorkers$: of(200), - logger: mockLogger(), + logger: loggingSystemMock.create().get(), }); const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]); @@ -28,7 +30,7 @@ describe('TaskPool', () => { test('availableWorkers are a function of total_capacity - occupiedWorkers', async () => { const pool = new TaskPool({ maxWorkers$: of(10), - logger: mockLogger(), + logger: loggingSystemMock.create().get(), }); const result = await pool.run([{ ...mockTask() }, { ...mockTask() }, { ...mockTask() }]); @@ -41,7 +43,7 @@ describe('TaskPool', () => { const maxWorkers$ = new Subject(); const pool = new TaskPool({ maxWorkers$, - logger: mockLogger(), + logger: loggingSystemMock.create().get(), }); expect(pool.availableWorkers).toEqual(0); @@ -52,7 +54,7 @@ describe('TaskPool', () => { test('does not run tasks that are beyond its available capacity', async () => { const pool = new TaskPool({ maxWorkers$: of(2), - logger: mockLogger(), + logger: loggingSystemMock.create().get(), }); const shouldRun = mockRun(); @@ -71,7 +73,7 @@ describe('TaskPool', () => { }); test('should log when marking a Task as running fails', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const pool = new TaskPool({ maxWorkers$: of(2), logger, @@ -84,7 +86,7 @@ describe('TaskPool', () => { const result = await pool.run([mockTask(), taskFailedToMarkAsRunning, mockTask()]); - expect(logger.error.mock.calls[0]).toMatchInlineSnapshot(` + expect((logger as jest.Mocked).error.mock.calls[0]).toMatchInlineSnapshot(` Array [ "Failed to mark Task TaskType \\"shooooo\\" as running: Mark Task as running has failed miserably", ] @@ -94,7 +96,7 @@ describe('TaskPool', () => { }); test('should log when running a Task fails', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const pool = new TaskPool({ maxWorkers$: of(3), logger, @@ -107,7 +109,7 @@ describe('TaskPool', () => { const result = await pool.run([mockTask(), taskFailedToRun, mockTask()]); - expect(logger.warn.mock.calls[0]).toMatchInlineSnapshot(` + expect((logger as jest.Mocked).warn.mock.calls[0]).toMatchInlineSnapshot(` Array [ "Task TaskType \\"shooooo\\" failed in attempt to run: Run Task has failed miserably", ] @@ -117,7 +119,7 @@ describe('TaskPool', () => { }); test('should not log when running a Task fails due to the Task SO having been deleted while in flight', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const pool = new TaskPool({ maxWorkers$: of(3), logger, @@ -139,7 +141,7 @@ describe('TaskPool', () => { }); test('Running a task which fails still takes up capacity', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const pool = new TaskPool({ maxWorkers$: of(1), logger, @@ -159,7 +161,7 @@ describe('TaskPool', () => { test('clears up capacity when a task completes', async () => { const pool = new TaskPool({ maxWorkers$: of(1), - logger: mockLogger(), + logger: loggingSystemMock.create().get(), }); const firstWork = resolvable(); @@ -202,7 +204,7 @@ describe('TaskPool', () => { }); test('run cancels expired tasks prior to running new tasks', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const pool = new TaskPool({ maxWorkers$: of(2), logger, @@ -259,7 +261,7 @@ describe('TaskPool', () => { }); test('logs if cancellation errors', async () => { - const logger = mockLogger(); + const logger = loggingSystemMock.create().get(); const pool = new TaskPool({ logger, maxWorkers$: of(20), @@ -290,7 +292,7 @@ describe('TaskPool', () => { // Allow the task to cancel... await cancelled; - expect(logger.error.mock.calls[0][0]).toMatchInlineSnapshot( + expect((logger as jest.Mocked).error.mock.calls[0][0]).toMatchInlineSnapshot( `"Failed to cancel task \\"shooooo!\\": Error: Dern!"` ); }); diff --git a/x-pack/plugins/task_manager/server/test_utils/index.ts b/x-pack/plugins/task_manager/server/test_utils/index.ts index 6f43a60ff42d..69ea7263548c 100644 --- a/x-pack/plugins/task_manager/server/test_utils/index.ts +++ b/x-pack/plugins/task_manager/server/test_utils/index.ts @@ -11,18 +11,6 @@ // Caching this here to avoid setTimeout mocking affecting our tests. const nativeTimeout = setTimeout; -/** - * Creates a mock task manager Logger. - */ -export function mockLogger() { - return { - info: jest.fn(), - debug: jest.fn(), - warn: jest.fn(), - error: jest.fn(), - }; -} - export interface Resolvable { resolve: () => void; } From b0275e08eb27713ad0d8a576845c97cdebffddf9 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 16 Oct 2020 13:03:56 +0100 Subject: [PATCH 50/67] removed usage of deprecated legacy es client --- .../task_manager/server/plugin.test.ts | 40 +++ x-pack/plugins/task_manager/server/plugin.ts | 4 +- x-pack/plugins/task_manager/server/task.ts | 6 - .../task_manager/server/task_store.test.ts | 258 ++++++++---------- .../plugins/task_manager/server/task_store.ts | 59 ++-- 5 files changed, 183 insertions(+), 184 deletions(-) diff --git a/x-pack/plugins/task_manager/server/plugin.test.ts b/x-pack/plugins/task_manager/server/plugin.test.ts index 2751cdb60ce9..6cde4f3d782c 100644 --- a/x-pack/plugins/task_manager/server/plugin.test.ts +++ b/x-pack/plugins/task_manager/server/plugin.test.ts @@ -28,5 +28,45 @@ describe('TaskManagerPlugin', () => { new Error(`TaskManager is unable to start as Kibana has no valid UUID assigned to it.`) ); }); + + test('throws if setup methods are called after start', async () => { + const pluginInitializerContext = coreMock.createPluginInitializerContext({ + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 3000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + }); + + const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); + + const setupApi = await taskManagerPlugin.setup(coreMock.createSetup()); + + await taskManagerPlugin.start(coreMock.createStart()); + + expect(() => + setupApi.addMiddleware({ + beforeSave: async (saveOpts) => saveOpts, + beforeRun: async (runOpts) => runOpts, + beforeMarkRunning: async (runOpts) => runOpts, + }) + ).toThrowErrorMatchingInlineSnapshot( + `"Cannot add Middleware after the task manager has started"` + ); + + expect(() => + setupApi.registerTaskDefinitions({ + lateRegisteredType: { + type: 'lateRegisteredType', + title: 'lateRegisteredType', + createTaskRunner: () => ({ async run() {} }), + }, + }) + ).toThrowErrorMatchingInlineSnapshot( + `"Cannot register task definitions after the task manager has started"` + ); + }); }); }); diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index b572e67fee6b..5b9686ccc6c7 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -5,7 +5,7 @@ */ import { PluginInitializerContext, Plugin, CoreSetup, Logger, CoreStart } from 'src/core/server'; import { first } from 'rxjs/operators'; -import { ElasticJs, TaskDefinition } from './task'; +import { TaskDefinition } from './task'; import { TaskPollingLifecycle } from './polling_lifecycle'; import { TaskManagerConfig } from './config'; import { createInitialMiddleware, addMiddlewareToChain, Middleware } from './lib/middleware'; @@ -82,7 +82,7 @@ export class TaskManagerPlugin const taskStore = new TaskStore({ serializer: savedObjects.createSerializer(), savedObjectsRepository, - callCluster: (elasticsearch.legacy.client.callAsInternalUser as unknown) as ElasticJs, + esClient: elasticsearch.createClient('taskManager').asInternalUser, index: this.config!.index, maxAttempts: this.config!.max_attempts, definitions: this.definitions, diff --git a/x-pack/plugins/task_manager/server/task.ts b/x-pack/plugins/task_manager/server/task.ts index 83f73ca2f4ad..6551bd47ef9e 100644 --- a/x-pack/plugins/task_manager/server/task.ts +++ b/x-pack/plugins/task_manager/server/task.ts @@ -24,12 +24,6 @@ import Joi from 'joi'; */ type Require = Omit & Required>; -/** - * A loosely typed definition of the elasticjs wrapper. It's beyond the scope - * of this work to try to make a comprehensive type definition of this. - */ -export type ElasticJs = (action: string, args: unknown) => Promise; - /** * The run context is passed into a task's run function as its sole argument. */ diff --git a/x-pack/plugins/task_manager/server/task_store.test.ts b/x-pack/plugins/task_manager/server/task_store.test.ts index fcfecbb5da13..b06a6cf909f5 100644 --- a/x-pack/plugins/task_manager/server/task_store.test.ts +++ b/x-pack/plugins/task_manager/server/task_store.test.ts @@ -5,7 +5,6 @@ */ import _ from 'lodash'; -import sinon from 'sinon'; import uuid from 'uuid'; import { filter, take, first } from 'rxjs/operators'; import { Option, some, none } from 'fp-ts/lib/Option'; @@ -17,7 +16,7 @@ import { SerializedConcreteTaskInstance, ConcreteTaskInstance, } from './task'; -import { loggingSystemMock } from '../../../../src/core/server/mocks'; +import { elasticsearchServiceMock, loggingSystemMock } from '../../../../src/core/server/mocks'; import { StoreOpts, OwnershipClaimingOpts, TaskStore, SearchOpts } from './task_store'; import { savedObjectsRepositoryMock } from 'src/core/server/mocks'; import { @@ -29,6 +28,9 @@ import { import { asTaskClaimEvent, TaskEvent } from './task_events'; import { asOk, asErr } from './lib/result_type'; import { TaskTypeDictionary } from './task_type_dictionary'; +import { RequestEvent } from '@elastic/elasticsearch/lib/Transport'; +import { Search, UpdateByQuery } from '@elastic/elasticsearch/api/requestParams'; +import { BoolClauseWithAnyCondition, TermFilter } from './queries/query_clauses'; const savedObjectsClient = savedObjectsRepositoryMock.create(); const serializer = new SavedObjectsSerializer(new SavedObjectTypeRegistry()); @@ -74,7 +76,7 @@ describe('TaskStore', () => { index: 'tasky', taskManagerId: '', serializer, - callCluster: jest.fn(), + esClient: elasticsearchServiceMock.createClusterClient().asInternalUser, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, @@ -199,14 +201,15 @@ describe('TaskStore', () => { describe('fetch', () => { let store: TaskStore; - const callCluster = jest.fn(); + let esClient: ReturnType['asInternalUser']; beforeAll(() => { + esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; store = new TaskStore({ index: 'tasky', taskManagerId: '', serializer, - callCluster, + esClient, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, @@ -214,16 +217,15 @@ describe('TaskStore', () => { }); async function testFetch(opts?: SearchOpts, hits: unknown[] = []) { - callCluster.mockResolvedValue({ hits: { hits } }); + esClient.search.mockResolvedValue(asApiResponse({ hits: { hits } })); const result = await store.fetch(opts); - expect(callCluster).toHaveBeenCalledTimes(1); - expect(callCluster).toHaveBeenCalledWith('search', expect.anything()); + expect(esClient.search).toHaveBeenCalledTimes(1); return { result, - args: callCluster.mock.calls[0][1], + args: esClient.search.mock.calls[0][0], }; } @@ -258,7 +260,7 @@ describe('TaskStore', () => { test('pushes error from call cluster to errors$', async () => { const firstErrorPromise = store.errors$.pipe(first()).toPromise(); - callCluster.mockRejectedValue(new Error('Failure')); + esClient.search.mockRejectedValue(new Error('Failure')); await expect(store.fetch()).rejects.toThrowErrorMatchingInlineSnapshot(`"Failure"`); expect(await firstErrorPromise).toMatchInlineSnapshot(`[Error: Failure]`); }); @@ -275,17 +277,18 @@ describe('TaskStore', () => { claimingOpts: OwnershipClaimingOpts; }) { const versionConflicts = 2; - const callCluster = sinon.spy(async (name: string, params?: unknown) => - name === 'updateByQuery' - ? { - total: hits.length + versionConflicts, - updated: hits.length, - version_conflicts: versionConflicts, - } - : { hits: { hits } } + const esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; + esClient.search.mockResolvedValue(asApiResponse({ hits: { hits } })); + esClient.updateByQuery.mockResolvedValue( + asApiResponse({ + total: hits.length + versionConflicts, + updated: hits.length, + version_conflicts: versionConflicts, + }) ); + const store = new TaskStore({ - callCluster, + esClient, maxAttempts: 2, definitions: taskDefinitions, serializer, @@ -297,26 +300,41 @@ describe('TaskStore', () => { const result = await store.claimAvailableTasks(claimingOpts); - sinon.assert.calledTwice(callCluster); - sinon.assert.calledWithMatch(callCluster, 'updateByQuery', { max_docs: claimingOpts.size }); - sinon.assert.calledWithMatch(callCluster, 'search', { body: { size: claimingOpts.size } }); - + expect(esClient.updateByQuery.mock.calls[0][0]).toMatchObject({ + max_docs: claimingOpts.size, + }); + expect(esClient.search.mock.calls[0][0]).toMatchObject({ body: { size: claimingOpts.size } }); return { result, - args: Object.assign({}, ...callCluster.args.map(([name, args]) => ({ [name]: args }))), + args: { + search: esClient.search.mock.calls[0][0]! as Search<{ + query: BoolClauseWithAnyCondition; + size: number; + sort: string | string[]; + }>, + updateByQuery: esClient.updateByQuery.mock.calls[0][0]! as UpdateByQuery<{ + query: BoolClauseWithAnyCondition; + size: number; + sort: string | string[]; + script: object; + }>, + }, }; } test('it returns normally with no tasks when the index does not exist.', async () => { - const callCluster = sinon.spy(async (name: string, params?: unknown) => ({ - total: 0, - updated: 0, - })); + const esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; + esClient.updateByQuery.mockResolvedValue( + asApiResponse({ + total: 0, + updated: 0, + }) + ); const store = new TaskStore({ index: 'tasky', taskManagerId: '', serializer, - callCluster, + esClient, definitions: taskDefinitions, maxAttempts: 2, savedObjectsRepository: savedObjectsClient, @@ -325,9 +343,8 @@ describe('TaskStore', () => { claimOwnershipUntil: new Date(), size: 10, }); - sinon.assert.calledOnce(callCluster); - sinon.assert.calledWithMatch(callCluster, 'updateByQuery', { - ignoreUnavailable: true, + expect(esClient.updateByQuery.mock.calls[0][0]).toMatchObject({ + ignore_unavailable: true, max_docs: 10, }); expect(docs.length).toBe(0); @@ -354,9 +371,7 @@ describe('TaskStore', () => { const { args: { - updateByQuery: { - body: { query }, - }, + updateByQuery: { body: { query } = {} }, }, } = await testClaimAvailableTasks({ opts: { @@ -486,9 +501,7 @@ describe('TaskStore', () => { }); const { args: { - updateByQuery: { - body: { query, sort }, - }, + updateByQuery: { body: { query, sort } = {} }, }, } = await testClaimAvailableTasks({ opts: { @@ -641,9 +654,7 @@ if (doc['task.runAt'].size()!=0) { const claimOwnershipUntil = new Date(Date.now()); const { args: { - updateByQuery: { - body: { script }, - }, + updateByQuery: { body: { script } = {} }, }, } = await testClaimAvailableTasks({ opts: { @@ -717,9 +728,7 @@ if (doc['task.runAt'].size()!=0) { const { result: { docs }, args: { - search: { - body: { query }, - }, + search: { body: { query } = {} }, }, } = await testClaimAvailableTasks({ opts: { @@ -732,7 +741,7 @@ if (doc['task.runAt'].size()!=0) { hits: tasks, }); - expect(query.bool.must).toContainEqual({ + expect(query?.bool?.must).toContainEqual({ bool: { must: [ { @@ -811,11 +820,9 @@ if (doc['task.runAt'].size()!=0) { }, ]; const { - result: { docs }, + result: { docs } = {}, args: { - search: { - body: { query }, - }, + search: { body: { query } = {} }, }, } = await testClaimAvailableTasks({ opts: { @@ -828,7 +835,7 @@ if (doc['task.runAt'].size()!=0) { hits: tasks, }); - expect(query.bool.must).toContainEqual({ + expect(query?.bool?.must).toContainEqual({ bool: { must: [ { @@ -907,11 +914,9 @@ if (doc['task.runAt'].size()!=0) { }, ]; const { - result: { docs }, + result: { docs } = {}, args: { - search: { - body: { query }, - }, + search: { body: { query } = {} }, }, } = await testClaimAvailableTasks({ opts: { @@ -924,7 +929,7 @@ if (doc['task.runAt'].size()!=0) { hits: tasks, }); - expect(query.bool.must).toContainEqual({ + expect(query?.bool?.must).toContainEqual({ bool: { must: [ { @@ -968,19 +973,19 @@ if (doc['task.runAt'].size()!=0) { }); test('pushes error from saved objects client to errors$', async () => { - const callCluster = jest.fn(); + const esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; const store = new TaskStore({ index: 'tasky', taskManagerId: '', serializer, - callCluster, + esClient, definitions: taskDefinitions, maxAttempts: 2, savedObjectsRepository: savedObjectsClient, }); const firstErrorPromise = store.errors$.pipe(first()).toPromise(); - callCluster.mockRejectedValue(new Error('Failure')); + esClient.updateByQuery.mockRejectedValue(new Error('Failure')); await expect( store.claimAvailableTasks({ claimOwnershipUntil: new Date(), @@ -993,13 +998,15 @@ if (doc['task.runAt'].size()!=0) { describe('update', () => { let store: TaskStore; + let esClient: ReturnType['asInternalUser']; beforeAll(() => { + esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; store = new TaskStore({ index: 'tasky', taskManagerId: '', serializer, - callCluster: jest.fn(), + esClient, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, @@ -1099,7 +1106,7 @@ if (doc['task.runAt'].size()!=0) { index: 'tasky', taskManagerId: '', serializer, - callCluster: jest.fn(), + esClient: elasticsearchServiceMock.createClusterClient().asInternalUser, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, @@ -1139,7 +1146,7 @@ if (doc['task.runAt'].size()!=0) { index: 'tasky', taskManagerId: '', serializer, - callCluster: jest.fn(), + esClient: elasticsearchServiceMock.createClusterClient().asInternalUser, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, @@ -1147,17 +1154,18 @@ if (doc['task.runAt'].size()!=0) { }); test('removes the task with the specified id', async () => { - const id = `id-${_.random(1, 20)}`; + const id = randomId(); const result = await store.remove(id); expect(result).toBeUndefined(); expect(savedObjectsClient.delete).toHaveBeenCalledWith('task', id); }); test('pushes error from saved objects client to errors$', async () => { - const id = `id-${_.random(1, 20)}`; const firstErrorPromise = store.errors$.pipe(first()).toPromise(); savedObjectsClient.delete.mockRejectedValue(new Error('Failure')); - await expect(store.remove(id)).rejects.toThrowErrorMatchingInlineSnapshot(`"Failure"`); + await expect(store.remove(randomId())).rejects.toThrowErrorMatchingInlineSnapshot( + `"Failure"` + ); expect(await firstErrorPromise).toMatchInlineSnapshot(`[Error: Failure]`); }); }); @@ -1170,7 +1178,7 @@ if (doc['task.runAt'].size()!=0) { index: 'tasky', taskManagerId: '', serializer, - callCluster: jest.fn(), + esClient: elasticsearchServiceMock.createClusterClient().asInternalUser, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, @@ -1178,13 +1186,12 @@ if (doc['task.runAt'].size()!=0) { }); test('gets the task with the specified id', async () => { - const id = `id-${_.random(1, 20)}`; const task = { runAt: mockedDate, scheduledAt: mockedDate, startedAt: null, retryAt: null, - id, + id: randomId(), params: { hello: 'world' }, state: { foo: 'bar' }, taskType: 'report', @@ -1205,18 +1212,17 @@ if (doc['task.runAt'].size()!=0) { version: '123', })); - const result = await store.get(id); + const result = await store.get(task.id); expect(result).toEqual(task); - expect(savedObjectsClient.get).toHaveBeenCalledWith('task', id); + expect(savedObjectsClient.get).toHaveBeenCalledWith('task', task.id); }); test('pushes error from saved objects client to errors$', async () => { - const id = `id-${_.random(1, 20)}`; const firstErrorPromise = store.errors$.pipe(first()).toPromise(); savedObjectsClient.get.mockRejectedValue(new Error('Failure')); - await expect(store.get(id)).rejects.toThrowErrorMatchingInlineSnapshot(`"Failure"`); + await expect(store.get(randomId())).rejects.toThrowErrorMatchingInlineSnapshot(`"Failure"`); expect(await firstErrorPromise).toMatchInlineSnapshot(`[Error: Failure]`); }); }); @@ -1226,13 +1232,12 @@ if (doc['task.runAt'].size()!=0) { expect.assertions(4); return Promise.all( Object.values(TaskStatus).map(async (status) => { - const id = `id-${_.random(1, 20)}`; const task = { runAt: mockedDate, scheduledAt: mockedDate, startedAt: null, retryAt: null, - id, + id: randomId(), params: { hello: 'world' }, state: { foo: 'bar' }, taskType: 'report', @@ -1242,7 +1247,6 @@ if (doc['task.runAt'].size()!=0) { ownerId: null, }; - const callCluster = jest.fn(); savedObjectsClient.get.mockImplementation(async (type: string, objectId: string) => ({ id: objectId, type, @@ -1258,20 +1262,18 @@ if (doc['task.runAt'].size()!=0) { index: 'tasky', taskManagerId: '', serializer, - callCluster, + esClient: elasticsearchServiceMock.createClusterClient().asInternalUser, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, }); - expect(await store.getLifecycle(id)).toEqual(status); + expect(await store.getLifecycle(task.id)).toEqual(status); }) ); }); test('returns NotFound status if the task doesnt exists ', async () => { - const id = `id-${_.random(1, 20)}`; - savedObjectsClient.get.mockRejectedValueOnce( SavedObjectsErrorHelpers.createGenericNotFoundError('type', 'id') ); @@ -1280,18 +1282,16 @@ if (doc['task.runAt'].size()!=0) { index: 'tasky', taskManagerId: '', serializer, - callCluster: jest.fn(), + esClient: elasticsearchServiceMock.createClusterClient().asInternalUser, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, }); - expect(await store.getLifecycle(id)).toEqual(TaskLifecycleResult.NotFound); + expect(await store.getLifecycle(randomId())).toEqual(TaskLifecycleResult.NotFound); }); test('throws if an unknown error takes place ', async () => { - const id = `id-${_.random(1, 20)}`; - savedObjectsClient.get.mockRejectedValueOnce( SavedObjectsErrorHelpers.createBadRequestError() ); @@ -1300,13 +1300,13 @@ if (doc['task.runAt'].size()!=0) { index: 'tasky', taskManagerId: '', serializer, - callCluster: jest.fn(), + esClient: elasticsearchServiceMock.createClusterClient().asInternalUser, maxAttempts: 2, definitions: taskDefinitions, savedObjectsRepository: savedObjectsClient, }); - return expect(store.getLifecycle(id)).rejects.toThrow('Bad Request'); + return expect(store.getLifecycle(randomId())).rejects.toThrow('Bad Request'); }); }); @@ -1392,18 +1392,20 @@ if (doc['task.runAt'].size()!=0) { return { taskManagerId, runAt, tasks }; } - test('emits an event when a task is succesfully claimed by id', async () => { + function instantiateStoreWithMockedApiResponses() { const { taskManagerId, runAt, tasks } = generateTasks(); - const callCluster = sinon.spy(async (name: string, params?: unknown) => - name === 'updateByQuery' - ? { - total: tasks.length, - updated: tasks.length, - } - : { hits: { hits: tasks } } + + const esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; + esClient.search.mockResolvedValue(asApiResponse({ hits: { hits: tasks } })); + esClient.updateByQuery.mockResolvedValue( + asApiResponse({ + total: tasks.length, + updated: tasks.length, + }) ); + const store = new TaskStore({ - callCluster, + esClient, maxAttempts: 2, definitions: taskDefinitions, serializer, @@ -1412,6 +1414,12 @@ if (doc['task.runAt'].size()!=0) { index: '', }); + return { taskManagerId, runAt, store }; + } + + test('emits an event when a task is succesfully claimed by id', async () => { + const { taskManagerId, runAt, store } = instantiateStoreWithMockedApiResponses(); + const promise = store.events .pipe( filter( @@ -1453,24 +1461,7 @@ if (doc['task.runAt'].size()!=0) { }); test('emits an event when a task is succesfully by scheduling', async () => { - const { taskManagerId, runAt, tasks } = generateTasks(); - const callCluster = sinon.spy(async (name: string, params?: unknown) => - name === 'updateByQuery' - ? { - total: tasks.length, - updated: tasks.length, - } - : { hits: { hits: tasks } } - ); - const store = new TaskStore({ - callCluster, - maxAttempts: 2, - definitions: taskDefinitions, - serializer, - savedObjectsRepository: savedObjectsClient, - taskManagerId, - index: '', - }); + const { taskManagerId, runAt, store } = instantiateStoreWithMockedApiResponses(); const promise = store.events .pipe( @@ -1513,24 +1504,7 @@ if (doc['task.runAt'].size()!=0) { }); test('emits an event when the store fails to claim a required task by id', async () => { - const { taskManagerId, runAt, tasks } = generateTasks(); - const callCluster = sinon.spy(async (name: string, params?: unknown) => - name === 'updateByQuery' - ? { - total: tasks.length, - updated: tasks.length, - } - : { hits: { hits: tasks } } - ); - const store = new TaskStore({ - callCluster, - maxAttempts: 2, - definitions: taskDefinitions, - serializer, - savedObjectsRepository: savedObjectsClient, - taskManagerId, - index: '', - }); + const { taskManagerId, runAt, store } = instantiateStoreWithMockedApiResponses(); const promise = store.events .pipe( @@ -1575,24 +1549,7 @@ if (doc['task.runAt'].size()!=0) { }); test('emits an event when the store fails to find a task which was required by id', async () => { - const { taskManagerId, tasks } = generateTasks(); - const callCluster = sinon.spy(async (name: string, params?: unknown) => - name === 'updateByQuery' - ? { - total: tasks.length, - updated: tasks.length, - } - : { hits: { hits: tasks } } - ); - const store = new TaskStore({ - callCluster, - maxAttempts: 2, - definitions: taskDefinitions, - serializer, - savedObjectsRepository: savedObjectsClient, - taskManagerId, - index: '', - }); + const { store } = instantiateStoreWithMockedApiResponses(); const promise = store.events .pipe( @@ -1628,3 +1585,10 @@ function generateFakeTasks(count: number = 1) { sort: ['a', _.random(1, 5)], })); } + +const asApiResponse = (body: T): RequestEvent => + ({ + body, + } as RequestEvent); + +const randomId = () => `id-${_.random(1, 20)}`; diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index 8ffeceb8e561..4c41be9577ad 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -20,13 +20,13 @@ import { SavedObjectsRawDoc, ISavedObjectsRepository, SavedObjectsUpdateResponse, + ElasticsearchClient, } from '../../../../src/core/server'; import { asOk, asErr, Result } from './lib/result_type'; import { ConcreteTaskInstance, - ElasticJs, TaskInstance, TaskLifecycle, TaskLifecycleResult, @@ -61,7 +61,7 @@ import { import { TaskTypeDictionary } from './task_type_dictionary'; export interface StoreOpts { - callCluster: ElasticJs; + esClient: ElasticsearchClient; index: string; taskManagerId: string; maxAttempts: number; @@ -122,7 +122,7 @@ export class TaskStore { public readonly taskManagerId: string; public readonly errors$ = new Subject(); - private callCluster: ElasticJs; + private esClient: ElasticsearchClient; private definitions: TaskTypeDictionary; private savedObjectsRepository: ISavedObjectsRepository; private serializer: SavedObjectsSerializer; @@ -131,7 +131,7 @@ export class TaskStore { /** * Constructs a new TaskStore. * @param {StoreOpts} opts - * @prop {CallCluster} callCluster - The elastic search connection + * @prop {esClient} esClient - An elasticsearch client * @prop {string} index - The name of the task manager index * @prop {number} maxAttempts - The maximum number of attempts before a task will be abandoned * @prop {TaskDefinition} definition - The definition of the task being run @@ -139,7 +139,7 @@ export class TaskStore { * @prop {savedObjectsRepository} - An instance to the saved objects repository */ constructor(opts: StoreOpts) { - this.callCluster = opts.callCluster; + this.esClient = opts.esClient; this.index = opts.index; this.taskManagerId = opts.taskManagerId; this.maxAttempts = opts.maxAttempts; @@ -467,30 +467,31 @@ export class TaskStore { private async search(opts: SearchOpts = {}): Promise { const { query } = ensureQueryOnlyReturnsTaskObjects(opts); - let result; try { - result = await this.callCluster('search', { + const { + body: { + hits: { hits: tasks }, + }, + } = await this.esClient.search>({ index: this.index, - ignoreUnavailable: true, + ignore_unavailable: true, body: { ...opts, query, }, }); + + return { + docs: tasks + .filter((doc) => this.serializer.isRawSavedObject(doc)) + .map((doc) => this.serializer.rawToSavedObject(doc)) + .map((doc) => omit(doc, 'namespace') as SavedObject) + .map(savedObjectToConcreteTaskInstance), + }; } catch (e) { this.errors$.next(e); throw e; } - - const rawDocs = (result as SearchResponse).hits.hits; - - return { - docs: (rawDocs as SavedObjectsRawDoc[]) - .filter((doc) => this.serializer.isRawSavedObject(doc)) - .map((doc) => this.serializer.rawToSavedObject(doc)) - .map((doc) => omit(doc, 'namespace') as SavedObject) - .map(savedObjectToConcreteTaskInstance), - }; } private async updateByQuery( @@ -499,11 +500,13 @@ export class TaskStore { { max_docs }: UpdateByQueryOpts = {} ): Promise { const { query } = ensureQueryOnlyReturnsTaskObjects(opts); - let result; try { - result = await this.callCluster('updateByQuery', { + const { + // eslint-disable-next-line @typescript-eslint/naming-convention + body: { total, updated, version_conflicts }, + } = await this.esClient.updateByQuery({ index: this.index, - ignoreUnavailable: true, + ignore_unavailable: true, refresh: true, max_docs, conflicts: 'proceed', @@ -512,18 +515,16 @@ export class TaskStore { query, }, }); + + return { + total, + updated, + version_conflicts, + }; } catch (e) { this.errors$.next(e); throw e; } - - // eslint-disable-next-line @typescript-eslint/naming-convention - const { total, updated, version_conflicts } = result as UpdateDocumentByQueryResponse; - return { - total, - updated, - version_conflicts, - }; } } From aa669e663016eb34d0fb4a538115edb7e061452a Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 16 Oct 2020 13:57:58 +0100 Subject: [PATCH 51/67] renamed variables to match new names of components --- .../managed_configuration.test.ts | 16 ++++----- x-pack/plugins/task_manager/server/plugin.ts | 23 ------------- .../server/polling_lifecycle.mock.ts | 34 +++++++------------ .../server/polling_lifecycle.test.ts | 20 +++++------ 4 files changed, 29 insertions(+), 64 deletions(-) diff --git a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts index 2deb2a55a0c3..9c9dd9db19e9 100644 --- a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts @@ -6,16 +6,15 @@ import sinon from 'sinon'; import { savedObjectsRepositoryMock } from '../../../../../src/core/server/mocks'; -import { SavedObjectsErrorHelpers, PluginInitializerContext } from '../../../../../src/core/server'; +import { SavedObjectsErrorHelpers, Logger } from '../../../../../src/core/server'; import { ADJUST_THROUGHPUT_INTERVAL } from '../lib/create_managed_configuration'; import { TaskManagerPlugin, TaskManagerStartContract } from '../plugin'; import { coreMock } from '../../../../../src/core/server/mocks'; import { TaskManagerConfig } from '../config'; describe('managed configuration', () => { - let taskManagerPlugin: TaskManagerPlugin; - let pluginInitializerContext: PluginInitializerContext; let taskManagerStart: TaskManagerStartContract; + let logger: Logger; let clock: sinon.SinonFakeTimers; const savedObjectsClient = savedObjectsRepositoryMock.create(); @@ -24,7 +23,7 @@ describe('managed configuration', () => { jest.resetAllMocks(); clock = sinon.useFakeTimers(); - pluginInitializerContext = coreMock.createPluginInitializerContext({ + const context = coreMock.createPluginInitializerContext({ enabled: true, max_workers: 10, index: 'foo', @@ -33,9 +32,10 @@ describe('managed configuration', () => { max_poll_inactivity_cycles: 10, request_capacity: 1000, }); + logger = context.logger.get('taskManager'); - taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); - (await taskManagerPlugin.setup(coreMock.createSetup())).registerTaskDefinitions({ + const taskManager = new TaskManagerPlugin(context); + (await taskManager.setup(coreMock.createSetup())).registerTaskDefinitions({ foo: { type: 'foo', title: 'Foo', @@ -46,7 +46,7 @@ describe('managed configuration', () => { const coreStart = coreMock.createStart(); coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient); - taskManagerStart = await taskManagerPlugin.start(coreStart); + taskManagerStart = await taskManager.start(coreStart); // force rxjs timers to fire when they are scheduled for setTimeout(0) as the // sinon fake timers cause them to stall @@ -70,7 +70,6 @@ describe('managed configuration', () => { ).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`); clock.tick(ADJUST_THROUGHPUT_INTERVAL); - const logger = pluginInitializerContext.logger.get('taskManager'); expect(logger.warn).toHaveBeenCalledWith( 'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).' ); @@ -95,7 +94,6 @@ describe('managed configuration', () => { ).rejects.toThrowErrorMatchingInlineSnapshot(`"Too Many Requests"`); clock.tick(ADJUST_THROUGHPUT_INTERVAL); - const logger = pluginInitializerContext.logger.get('taskManager'); expect(logger.warn).toHaveBeenCalledWith( 'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).' ); diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 5b9686ccc6c7..0cb34a6f10fb 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -60,11 +60,6 @@ export class TaskManagerPlugin } return { - /** - * Adds middleware to the task manager, such as adding security layers, loggers, etc. - * - * @param {Middleware} middleware - The middlware being added. - */ addMiddleware: (middleware: Middleware) => { this.assertStillInSetup('add Middleware'); this.middleware = addMiddlewareToChain(this.middleware, middleware); @@ -118,26 +113,8 @@ export class TaskManagerPlugin taskPollingLifecycle.start(); return { - /** - * Fetches a list of scheduled tasks. - * - * @param opts - The query options used to filter tasks - * @returns {Promise} - */ fetch: (opts: SearchOpts): Promise => taskStore.fetch(opts), - /** - * Get the current state of a specified task. - * - * @param {string} id - * @returns {Promise} - */ get: (id: string) => taskStore.get(id), - /** - * Removes the specified task from the index. - * - * @param {string} id - * @returns {Promise} - */ remove: (id: string) => taskStore.remove(id), schedule: (...args) => taskScheduling.schedule(...args), ensureScheduled: (...args) => taskScheduling.ensureScheduled(...args), diff --git a/x-pack/plugins/task_manager/server/polling_lifecycle.mock.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.mock.ts index 05aed2c9e811..9df1e06165bc 100644 --- a/x-pack/plugins/task_manager/server/polling_lifecycle.mock.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.mock.ts @@ -7,26 +7,18 @@ import { TaskPollingLifecycle, TaskLifecycleEvent } from './polling_lifecycle'; import { of, Observable } from 'rxjs'; -const createTaskPollingLifecycleMock = ({ - isStarted = true, - events$ = of(), -}: { - isStarted?: boolean; - events$?: Observable; -} = {}) => { - return ({ - start: jest.fn(), - attemptToRun: jest.fn(), - get isStarted() { - return isStarted; - }, - get events() { - return events$; - }, - stop: jest.fn(), - } as unknown) as jest.Mocked; -}; - export const taskPollingLifecycleMock = { - create: createTaskPollingLifecycleMock, + create(opts: { isStarted?: boolean; events$?: Observable }) { + return ({ + start: jest.fn(), + attemptToRun: jest.fn(), + get isStarted() { + return opts.isStarted ?? true; + }, + get events() { + return opts.events$ ?? of(); + }, + stop: jest.fn(), + } as unknown) as jest.Mocked; + }, }; diff --git a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts index 4c92b6df7d89..92e32f8197a2 100644 --- a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts @@ -17,20 +17,18 @@ import { taskStoreMock } from './task_store.mock'; describe('TaskPollingLifecycle', () => { let clock: sinon.SinonFakeTimers; - const config = { - enabled: true, - max_workers: 10, - index: 'foo', - max_attempts: 9, - poll_interval: 6000000, - max_poll_inactivity_cycles: 10, - request_capacity: 1000, - }; - const taskManagerLogger = loggingSystemMock.create().get(); const mockTaskStore = taskStoreMock.create({}); const taskManagerOpts = { - config, + config: { + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + }, taskStore: mockTaskStore, logger: taskManagerLogger, definitions: new TaskTypeDictionary(taskManagerLogger), From 42e041f5d3108c13c3fee336dfb9a8e8c47a25c3 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 16 Oct 2020 14:32:16 +0100 Subject: [PATCH 52/67] made type optional --- .../actions/server/action_type_registry.ts | 1 - x-pack/plugins/actions/server/usage/task.ts | 1 - .../alerts/server/alert_type_registry.ts | 1 - x-pack/plugins/alerts/server/usage/task.ts | 1 - .../apm/server/lib/apm_telemetry/index.ts | 1 - x-pack/plugins/lens/server/usage/task.ts | 1 - .../session_management_service.ts | 1 - .../server/endpoint/lib/artifacts/task.ts | 1 - .../managed_configuration.test.ts | 2 - .../task_manager/server/plugin.test.ts | 1 - .../mark_available_tasks_as_claimed.test.ts | 2 - .../task_manager/server/task_runner.test.ts | 1 - .../task_manager/server/task_store.test.ts | 7 --- .../server/task_type_dictionary.test.ts | 48 +++++++++---------- .../server/task_type_dictionary.ts | 20 ++++---- 15 files changed, 32 insertions(+), 57 deletions(-) diff --git a/x-pack/plugins/actions/server/action_type_registry.ts b/x-pack/plugins/actions/server/action_type_registry.ts index b93d4a6e78ac..cacf7166b96b 100644 --- a/x-pack/plugins/actions/server/action_type_registry.ts +++ b/x-pack/plugins/actions/server/action_type_registry.ts @@ -125,7 +125,6 @@ export class ActionTypeRegistry { this.taskManager.registerTaskDefinitions({ [`actions:${actionType.id}`]: { title: actionType.name, - type: `actions:${actionType.id}`, maxAttempts: actionType.maxAttempts || 1, getRetry(attempts: number, error: unknown) { if (error instanceof ExecutorError) { diff --git a/x-pack/plugins/actions/server/usage/task.ts b/x-pack/plugins/actions/server/usage/task.ts index efa695cdc266..f7af480aa9fb 100644 --- a/x-pack/plugins/actions/server/usage/task.ts +++ b/x-pack/plugins/actions/server/usage/task.ts @@ -39,7 +39,6 @@ function registerActionsTelemetryTask( taskManager.registerTaskDefinitions({ [TELEMETRY_TASK_TYPE]: { title: 'Actions usage fetch task', - type: TELEMETRY_TASK_TYPE, timeout: '5m', createTaskRunner: telemetryTaskRunner(logger, core, kibanaIndex), }, diff --git a/x-pack/plugins/alerts/server/alert_type_registry.ts b/x-pack/plugins/alerts/server/alert_type_registry.ts index 7f34803b05a8..0cd218571035 100644 --- a/x-pack/plugins/alerts/server/alert_type_registry.ts +++ b/x-pack/plugins/alerts/server/alert_type_registry.ts @@ -86,7 +86,6 @@ export class AlertTypeRegistry { this.taskManager.registerTaskDefinitions({ [`alerting:${alertType.id}`]: { title: alertType.name, - type: `alerting:${alertType.id}`, createTaskRunner: (context: RunContext) => this.taskRunnerFactory.create({ ...alertType } as AlertType, context), }, diff --git a/x-pack/plugins/alerts/server/usage/task.ts b/x-pack/plugins/alerts/server/usage/task.ts index daf3ac246ada..24ac15bbea78 100644 --- a/x-pack/plugins/alerts/server/usage/task.ts +++ b/x-pack/plugins/alerts/server/usage/task.ts @@ -42,7 +42,6 @@ function registerAlertingTelemetryTask( taskManager.registerTaskDefinitions({ [TELEMETRY_TASK_TYPE]: { title: 'Alerting usage fetch task', - type: TELEMETRY_TASK_TYPE, timeout: '5m', createTaskRunner: telemetryTaskRunner(logger, core, kibanaIndex), }, diff --git a/x-pack/plugins/apm/server/lib/apm_telemetry/index.ts b/x-pack/plugins/apm/server/lib/apm_telemetry/index.ts index c93fdfc15fe3..62fc16fb2505 100644 --- a/x-pack/plugins/apm/server/lib/apm_telemetry/index.ts +++ b/x-pack/plugins/apm/server/lib/apm_telemetry/index.ts @@ -49,7 +49,6 @@ export async function createApmTelemetry({ taskManager.registerTaskDefinitions({ [APM_TELEMETRY_TASK_NAME]: { title: 'Collect APM usage', - type: APM_TELEMETRY_TASK_NAME, createTaskRunner: () => { return { run: async () => { diff --git a/x-pack/plugins/lens/server/usage/task.ts b/x-pack/plugins/lens/server/usage/task.ts index 9fee72b59b44..83cdbd62f348 100644 --- a/x-pack/plugins/lens/server/usage/task.ts +++ b/x-pack/plugins/lens/server/usage/task.ts @@ -48,7 +48,6 @@ function registerLensTelemetryTask( taskManager.registerTaskDefinitions({ [TELEMETRY_TASK_TYPE]: { title: 'Lens usage fetch task', - type: TELEMETRY_TASK_TYPE, timeout: '1m', createTaskRunner: telemetryTaskRunner(logger, core, config), }, diff --git a/x-pack/plugins/security/server/session_management/session_management_service.ts b/x-pack/plugins/security/server/session_management/session_management_service.ts index 60c0f7c23e95..fc2e85d683d5 100644 --- a/x-pack/plugins/security/server/session_management/session_management_service.ts +++ b/x-pack/plugins/security/server/session_management/session_management_service.ts @@ -78,7 +78,6 @@ export class SessionManagementService { taskManager.registerTaskDefinitions({ [SESSION_INDEX_CLEANUP_TASK_NAME]: { title: 'Cleanup expired or invalid user sessions', - type: SESSION_INDEX_CLEANUP_TASK_NAME, createTaskRunner: () => ({ run: () => this.sessionIndex.cleanUp() }), }, }); diff --git a/x-pack/plugins/security_solution/server/endpoint/lib/artifacts/task.ts b/x-pack/plugins/security_solution/server/endpoint/lib/artifacts/task.ts index 02e57a71dcd9..0d78c90735ab 100644 --- a/x-pack/plugins/security_solution/server/endpoint/lib/artifacts/task.ts +++ b/x-pack/plugins/security_solution/server/endpoint/lib/artifacts/task.ts @@ -39,7 +39,6 @@ export class ManifestTask { setupContract.taskManager.registerTaskDefinitions({ [ManifestTaskConstants.TYPE]: { title: 'Security Solution Endpoint Exceptions Handler', - type: ManifestTaskConstants.TYPE, timeout: ManifestTaskConstants.TIMEOUT, createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => { return { diff --git a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts index 9c9dd9db19e9..11f6ccc88185 100644 --- a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts @@ -37,7 +37,6 @@ describe('managed configuration', () => { const taskManager = new TaskManagerPlugin(context); (await taskManager.setup(coreMock.createSetup())).registerTaskDefinitions({ foo: { - type: 'foo', title: 'Foo', createTaskRunner: jest.fn(), }, @@ -45,7 +44,6 @@ describe('managed configuration', () => { const coreStart = coreMock.createStart(); coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient); - taskManagerStart = await taskManager.start(coreStart); // force rxjs timers to fire when they are scheduled for setTimeout(0) as the diff --git a/x-pack/plugins/task_manager/server/plugin.test.ts b/x-pack/plugins/task_manager/server/plugin.test.ts index 6cde4f3d782c..50e7e9a7aa19 100644 --- a/x-pack/plugins/task_manager/server/plugin.test.ts +++ b/x-pack/plugins/task_manager/server/plugin.test.ts @@ -59,7 +59,6 @@ describe('TaskManagerPlugin', () => { expect(() => setupApi.registerTaskDefinitions({ lateRegisteredType: { - type: 'lateRegisteredType', title: 'lateRegisteredType', createTaskRunner: () => ({ async run() {} }), }, diff --git a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts index 63d8c1575b26..46cc47961f9f 100644 --- a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts +++ b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts @@ -31,13 +31,11 @@ describe('mark_available_tasks_as_claimed', () => { const definitions = new TaskTypeDictionary(loggingSystemMock.create().get()); definitions.registerTaskDefinitions({ sampleTask: { - type: 'sampleTask', title: 'title', maxAttempts: 5, createTaskRunner: () => ({ run: () => Promise.resolve() }), }, otherTask: { - type: 'otherTask', title: 'title', createTaskRunner: () => ({ run: () => Promise.resolve() }), }, diff --git a/x-pack/plugins/task_manager/server/task_runner.test.ts b/x-pack/plugins/task_manager/server/task_runner.test.ts index 1619b06e60bb..d14ff7a53fbb 100644 --- a/x-pack/plugins/task_manager/server/task_runner.test.ts +++ b/x-pack/plugins/task_manager/server/task_runner.test.ts @@ -1005,7 +1005,6 @@ describe('TaskManagerRunner', () => { const definitions = new TaskTypeDictionary(logger); definitions.registerTaskDefinitions({ testbar: { - type: 'bar', title: 'Bar!', createTaskRunner, }, diff --git a/x-pack/plugins/task_manager/server/task_store.test.ts b/x-pack/plugins/task_manager/server/task_store.test.ts index b06a6cf909f5..a941a53d9ff6 100644 --- a/x-pack/plugins/task_manager/server/task_store.test.ts +++ b/x-pack/plugins/task_manager/server/task_store.test.ts @@ -51,17 +51,14 @@ const mockedDate = new Date('2019-02-12T21:01:22.479Z'); const taskDefinitions = new TaskTypeDictionary(loggingSystemMock.create().get()); taskDefinitions.registerTaskDefinitions({ report: { - type: 'report', title: 'report', createTaskRunner: jest.fn(), }, dernstraight: { - type: 'dernstraight', title: 'dernstraight', createTaskRunner: jest.fn(), }, yawn: { - type: 'yawn', title: 'yawn', createTaskRunner: jest.fn(), }, @@ -357,12 +354,10 @@ describe('TaskStore', () => { const definitions = new TaskTypeDictionary(loggingSystemMock.create().get()); definitions.registerTaskDefinitions({ foo: { - type: 'foo', title: 'foo', createTaskRunner: jest.fn(), }, bar: { - type: 'bar', title: 'bar', maxAttempts: customMaxAttempts, createTaskRunner: jest.fn(), @@ -488,12 +483,10 @@ describe('TaskStore', () => { const definitions = new TaskTypeDictionary(loggingSystemMock.create().get()); definitions.registerTaskDefinitions({ foo: { - type: 'foo', title: 'foo', createTaskRunner: jest.fn(), }, bar: { - type: 'bar', title: 'bar', maxAttempts: customMaxAttempts, createTaskRunner: jest.fn(), diff --git a/x-pack/plugins/task_manager/server/task_type_dictionary.test.ts b/x-pack/plugins/task_manager/server/task_type_dictionary.test.ts index 6afb2241b1ef..e1d6ef17f5f9 100644 --- a/x-pack/plugins/task_manager/server/task_type_dictionary.test.ts +++ b/x-pack/plugins/task_manager/server/task_type_dictionary.test.ts @@ -45,30 +45,30 @@ describe('taskTypeDictionary', () => { const result = sanitizeTaskDefinitions(taskDefinitions); expect(result).toMatchInlineSnapshot(` -Object { - "test_task_type_0": Object { - "createTaskRunner": [Function], - "description": "one super cool task", - "timeout": "5m", - "title": "Test", - "type": "test_task_type_0", - }, - "test_task_type_1": Object { - "createTaskRunner": [Function], - "description": "one super cool task", - "timeout": "5m", - "title": "Test", - "type": "test_task_type_1", - }, - "test_task_type_2": Object { - "createTaskRunner": [Function], - "description": "one super cool task", - "timeout": "5m", - "title": "Test", - "type": "test_task_type_2", - }, -} -`); + Array [ + Object { + "createTaskRunner": [Function], + "description": "one super cool task", + "timeout": "5m", + "title": "Test", + "type": "test_task_type_0", + }, + Object { + "createTaskRunner": [Function], + "description": "one super cool task", + "timeout": "5m", + "title": "Test", + "type": "test_task_type_1", + }, + Object { + "createTaskRunner": [Function], + "description": "one super cool task", + "timeout": "5m", + "title": "Test", + "type": "test_task_type_2", + }, + ] + `); }); it('throws a validation exception for invalid task definition', () => { diff --git a/x-pack/plugins/task_manager/server/task_type_dictionary.ts b/x-pack/plugins/task_manager/server/task_type_dictionary.ts index 5fd2ab47e38a..cb7cda6dfa84 100644 --- a/x-pack/plugins/task_manager/server/task_type_dictionary.ts +++ b/x-pack/plugins/task_manager/server/task_type_dictionary.ts @@ -3,7 +3,6 @@ * or more contributor license agreements. Licensed under the Elastic License; * you may not use this file except in compliance with the Elastic License. */ -import { mapValues } from 'lodash'; import Joi from 'joi'; import { TaskDefinition, validateTaskDefinition } from './task'; import { Logger } from '../../../../src/core/server'; @@ -56,17 +55,15 @@ export class TaskTypeDictionary { * Method for allowing consumers to register task definitions into the system. * @param taskDefinitions - The Kibana task definitions dictionary */ - public registerTaskDefinitions(taskDefinitions: Record) { + public registerTaskDefinitions(taskDefinitions: Record>) { const duplicate = Object.keys(taskDefinitions).find((type) => this.definitions.has(type)); if (duplicate) { throw new Error(`Task ${duplicate} is already defined!`); } try { - for (const [type, sanitizedDefinition] of Object.entries( - sanitizeTaskDefinitions(taskDefinitions) - )) { - this.definitions.set(type, sanitizedDefinition); + for (const definition of sanitizeTaskDefinitions(taskDefinitions)) { + this.definitions.set(definition.type, definition); } } catch (e) { this.logger.error('Could not sanitize task definitions'); @@ -81,10 +78,9 @@ export class TaskTypeDictionary { * @param taskDefinitions - The Kibana task definitions dictionary */ export function sanitizeTaskDefinitions( - taskDefinitions: Record = {} -): Record { - return mapValues(taskDefinitions, (rawDefinition, type) => { - rawDefinition.type = type; - return Joi.attempt(rawDefinition, validateTaskDefinition); - }); + taskDefinitions: Record> +): TaskDefinition[] { + return Object.entries(taskDefinitions).map(([type, rawDefinition]) => + Joi.attempt({ type, ...rawDefinition }, validateTaskDefinition) + ); } From 77d4450f5eab5f13da4447ea8d85442abc893ba1 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 16 Oct 2020 15:15:52 +0100 Subject: [PATCH 53/67] removed type from tasks --- .../plugins/sample_task_plugin/server/plugin.ts | 2 -- .../plugins/task_manager_performance/server/plugin.ts | 1 - 2 files changed, 3 deletions(-) diff --git a/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts b/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts index 3ea669ae9d40..803df6a66ea5 100644 --- a/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts +++ b/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts @@ -92,13 +92,11 @@ export class SampleTaskManagerFixturePlugin taskManager.registerTaskDefinitions({ sampleTask: { ...defaultSampleTaskConfig, - type: 'sampleTask', title: 'Sample Task', description: 'A sample task for testing the task_manager.', }, singleAttemptSampleTask: { ...defaultSampleTaskConfig, - type: 'singleAttemptSampleTask', title: 'Failing Sample Task', description: 'A sample task for testing the task_manager that fails on the first attempt to run.', diff --git a/x-pack/test/plugin_api_perf/plugins/task_manager_performance/server/plugin.ts b/x-pack/test/plugin_api_perf/plugins/task_manager_performance/server/plugin.ts index ba6d7ced3c59..18449ef61d1a 100644 --- a/x-pack/test/plugin_api_perf/plugins/task_manager_performance/server/plugin.ts +++ b/x-pack/test/plugin_api_perf/plugins/task_manager_performance/server/plugin.ts @@ -79,7 +79,6 @@ export class SampleTaskManagerFixturePlugin taskManager.registerTaskDefinitions({ performanceTestTask: { - type: 'performanceTestTask', title, description: 'A task for stress testing task_manager.', timeout: '1m', From 4e08766321794014a63dc8f47f6330b9343d90d7 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 16 Oct 2020 16:30:39 +0100 Subject: [PATCH 54/67] simplified middleware --- .../task_manager/server/lib/middleware.ts | 54 ++++++------------- .../task_manager/server/task_runner.ts | 12 ++--- 2 files changed, 22 insertions(+), 44 deletions(-) diff --git a/x-pack/plugins/task_manager/server/lib/middleware.ts b/x-pack/plugins/task_manager/server/lib/middleware.ts index e9662b50048c..c255ddd4775f 100644 --- a/x-pack/plugins/task_manager/server/lib/middleware.ts +++ b/x-pack/plugins/task_manager/server/lib/middleware.ts @@ -6,56 +6,36 @@ import { RunContext, TaskInstance } from '../task'; -/* - * BeforeSaveMiddlewareParams is nearly identical to RunContext, but - * taskInstance is before save (no _id property) - * - * taskInstance property is guaranteed to exist. The params can optionally - * include fields from an "options" object passed as the 2nd parameter to - * taskManager.schedule() - */ -export interface BeforeSaveMiddlewareParams { +type Mapper = (params: T) => Promise; +interface BeforeSaveContext { taskInstance: TaskInstance; } -export type BeforeSaveFunction = ( - params: BeforeSaveMiddlewareParams -) => Promise; - -export type BeforeRunFunction = (params: RunContext) => Promise; -export type BeforeMarkRunningFunction = (params: RunContext) => Promise; +export type BeforeSaveContextFunction = Mapper; +export type BeforeRunContextFunction = Mapper; export interface Middleware { - beforeSave: BeforeSaveFunction; - beforeRun: BeforeRunFunction; - beforeMarkRunning: BeforeMarkRunningFunction; + beforeSave: BeforeSaveContextFunction; + beforeRun: BeforeRunContextFunction; + beforeMarkRunning: BeforeRunContextFunction; } -export function addMiddlewareToChain(prevMiddleware: Middleware, middleware: Middleware) { - const beforeSave = middleware.beforeSave - ? (params: BeforeSaveMiddlewareParams) => - middleware.beforeSave(params).then(prevMiddleware.beforeSave) - : prevMiddleware.beforeSave; - - const beforeRun = middleware.beforeRun - ? (params: RunContext) => middleware.beforeRun(params).then(prevMiddleware.beforeRun) - : prevMiddleware.beforeRun; - - const beforeMarkRunning = middleware.beforeMarkRunning - ? (params: RunContext) => - middleware.beforeMarkRunning(params).then(prevMiddleware.beforeMarkRunning) - : prevMiddleware.beforeMarkRunning; - +export function addMiddlewareToChain(prev: Middleware, next: Partial) { return { - beforeSave, - beforeRun, - beforeMarkRunning, + beforeSave: next.beforeSave ? chain(prev.beforeSave, next.beforeSave) : prev.beforeSave, + beforeRun: next.beforeRun ? chain(prev.beforeRun, next.beforeRun) : prev.beforeRun, + beforeMarkRunning: next.beforeMarkRunning + ? chain(prev.beforeMarkRunning, next.beforeMarkRunning) + : prev.beforeMarkRunning, }; } +const chain = (prev: Mapper, next: Mapper): Mapper => (params) => + next(params).then(prev); + export function createInitialMiddleware(): Middleware { return { - beforeSave: async (saveOpts: BeforeSaveMiddlewareParams) => saveOpts, + beforeSave: async (saveOpts: BeforeSaveContext) => saveOpts, beforeRun: async (runOpts: RunContext) => runOpts, beforeMarkRunning: async (runOpts: RunContext) => runOpts, }; diff --git a/x-pack/plugins/task_manager/server/task_runner.ts b/x-pack/plugins/task_manager/server/task_runner.ts index cfe0e97cbfec..24a487e36602 100644 --- a/x-pack/plugins/task_manager/server/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_runner.ts @@ -19,7 +19,7 @@ import { Logger } from '../../../../src/core/server'; import { asOk, asErr, mapErr, eitherAsync, unwrap, mapOk, Result } from './lib/result_type'; import { TaskRun, TaskMarkRunning, asTaskRunEvent, asTaskMarkRunningEvent } from './task_events'; import { intervalFromDate, intervalFromNow } from './lib/intervals'; -import { BeforeRunFunction, BeforeMarkRunningFunction } from './lib/middleware'; +import { Middleware } from './lib/middleware'; import { CancelFunction, CancellableTask, @@ -55,15 +55,13 @@ export interface Updatable { remove(id: string): Promise; } -interface Opts { +type Opts = { logger: Logger; definitions: TaskTypeDictionary; instance: ConcreteTaskInstance; store: Updatable; - beforeRun: BeforeRunFunction; - beforeMarkRunning: BeforeMarkRunningFunction; onTaskEvent?: (event: TaskRun | TaskMarkRunning) => void; -} +} & Pick; /** * Runs a background task, ensures that errors are properly handled, @@ -79,8 +77,8 @@ export class TaskManagerRunner implements TaskRunner { private definitions: TaskTypeDictionary; private logger: Logger; private bufferedTaskStore: Updatable; - private beforeRun: BeforeRunFunction; - private beforeMarkRunning: BeforeMarkRunningFunction; + private beforeRun: Middleware['beforeRun']; + private beforeMarkRunning: Middleware['beforeMarkRunning']; private onTaskEvent: (event: TaskRun | TaskMarkRunning) => void; /** From 1c21c6b64da9a0cf1e21b8925431905b356ab151 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Fri, 16 Oct 2020 16:41:59 +0100 Subject: [PATCH 55/67] removed types from the tasks in actions and alerts --- x-pack/plugins/actions/server/action_type_registry.test.ts | 1 - x-pack/plugins/alerts/server/alert_type_registry.test.ts | 1 - .../server/session_management/session_management_service.test.ts | 1 - 3 files changed, 3 deletions(-) diff --git a/x-pack/plugins/actions/server/action_type_registry.test.ts b/x-pack/plugins/actions/server/action_type_registry.test.ts index d1f8263fc33c..95e7784e51ac 100644 --- a/x-pack/plugins/actions/server/action_type_registry.test.ts +++ b/x-pack/plugins/actions/server/action_type_registry.test.ts @@ -66,7 +66,6 @@ describe('register()', () => { "getRetry": [Function], "maxAttempts": 1, "title": "My action type", - "type": "actions:my-action-type", }, }, ] diff --git a/x-pack/plugins/alerts/server/alert_type_registry.test.ts b/x-pack/plugins/alerts/server/alert_type_registry.test.ts index 020b4f55619b..9e1545bae538 100644 --- a/x-pack/plugins/alerts/server/alert_type_registry.test.ts +++ b/x-pack/plugins/alerts/server/alert_type_registry.test.ts @@ -118,7 +118,6 @@ describe('register()', () => { "alerting:test": Object { "createTaskRunner": [Function], "title": "Test", - "type": "alerting:test", }, }, ] diff --git a/x-pack/plugins/security/server/session_management/session_management_service.test.ts b/x-pack/plugins/security/server/session_management/session_management_service.test.ts index 0328455fc837..155cc0bdd58f 100644 --- a/x-pack/plugins/security/server/session_management/session_management_service.test.ts +++ b/x-pack/plugins/security/server/session_management/session_management_service.test.ts @@ -50,7 +50,6 @@ describe('SessionManagementService', () => { expect(mockTaskManager.registerTaskDefinitions).toHaveBeenCalledWith({ [SESSION_INDEX_CLEANUP_TASK_NAME]: { title: 'Cleanup expired or invalid user sessions', - type: SESSION_INDEX_CLEANUP_TASK_NAME, createTaskRunner: expect.any(Function), }, }); From 40fd393a03006605a8435295ddabdaac935ee5e7 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 19 Oct 2020 12:37:01 +0100 Subject: [PATCH 56/67] merged refactor into obvs --- .../managed_configuration.test.ts | 3 ++ x-pack/plugins/task_manager/server/plugin.ts | 9 +++-- .../server/task_scheduling.test.ts | 34 ++++++++++++++----- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts index 11f6ccc88185..803b01de9504 100644 --- a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts @@ -30,6 +30,9 @@ describe('managed configuration', () => { max_attempts: 9, poll_interval: 3000, max_poll_inactivity_cycles: 10, + monitored_aggregated_stats_refresh_rate: 60000, + monitored_stats_required_freshness: 4000, + monitored_stats_running_average_window: 50, request_capacity: 1000, }); logger = context.logger.get('taskManager'); diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index d887b8effe07..8e30f7be4339 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -131,9 +131,12 @@ export class TaskManagerPlugin }); this.taskPollingLifecycle = taskPollingLifecycle; - createMonitoringStats(taskPollingLifecycle, taskStore, this.config!, this.logger).subscribe( - this.monitoringStats$.next - ); + createMonitoringStats( + taskPollingLifecycle, + taskStore, + this.config!, + this.logger + ).subscribe((stat) => this.monitoringStats$.next(stat)); const taskScheduling = new TaskScheduling({ logger: this.logger, diff --git a/x-pack/plugins/task_manager/server/task_scheduling.test.ts b/x-pack/plugins/task_manager/server/task_scheduling.test.ts index 8c92d7a2de89..3d95734594ba 100644 --- a/x-pack/plugins/task_manager/server/task_scheduling.test.ts +++ b/x-pack/plugins/task_manager/server/task_scheduling.test.ts @@ -22,6 +22,7 @@ import { asErr, asOk } from './lib/result_type'; import { ConcreteTaskInstance, TaskLifecycleResult, TaskStatus } from './task'; import { createInitialMiddleware } from './lib/middleware'; import { taskStoreMock } from './task_store.mock'; +import { TaskRunResult } from './task_runner'; describe('TaskScheduling', () => { const mockTaskStore = taskStoreMock.create({}); @@ -113,7 +114,7 @@ describe('TaskScheduling', () => { const result = taskScheduling.runNow(id); const task = { id } as ConcreteTaskInstance; - events$.next(asTaskRunEvent(id, asOk(task))); + events$.next(asTaskRunEvent(id, asOk({ task, result: TaskRunResult.Success }))); return expect(result).resolves.toEqual({ id }); }); @@ -132,7 +133,16 @@ describe('TaskScheduling', () => { const task = { id } as ConcreteTaskInstance; events$.next(asTaskClaimEvent(id, asOk(task))); events$.next(asTaskMarkRunningEvent(id, asOk(task))); - events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); + events$.next( + asTaskRunEvent( + id, + asErr({ + task, + error: new Error('some thing gone wrong'), + result: TaskRunResult.Failed, + }) + ) + ); return expect(result).rejects.toMatchInlineSnapshot( `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` @@ -241,9 +251,7 @@ describe('TaskScheduling', () => { events$.next(asTaskRunRequestEvent(id, asErr(new Error('failed to buffer request')))); await expect(result).rejects.toEqual( - new Error( - `Failed to run task "${id}" as Task Manager is at capacity, please try again later` - ) + new Error(`Failed to run task "${id}": Task Manager is at capacity, please try again later`) ); expect(mockTaskStore.getLifecycle).not.toHaveBeenCalled(); }); @@ -308,10 +316,20 @@ describe('TaskScheduling', () => { const otherTask = { id: differentTask } as ConcreteTaskInstance; events$.next(asTaskClaimEvent(id, asOk(task))); events$.next(asTaskClaimEvent(differentTask, asOk(otherTask))); + events$.next( + asTaskRunEvent(differentTask, asOk({ task: otherTask, result: TaskRunResult.Success })) + ); - events$.next(asTaskRunEvent(differentTask, asOk(task))); - - events$.next(asTaskRunEvent(id, asErr(new Error('some thing gone wrong')))); + events$.next( + asTaskRunEvent( + id, + asErr({ + task, + error: new Error('some thing gone wrong'), + result: TaskRunResult.Failed, + }) + ) + ); return expect(result).rejects.toMatchInlineSnapshot( `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` From b310cb77e5988e6b95ba19aca5c92db4698937c4 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 19 Oct 2020 13:35:30 +0100 Subject: [PATCH 57/67] fixed typing --- .../server/monitoring/workload_statistics.test.ts | 1 - x-pack/plugins/task_manager/server/plugin.test.ts | 6 ++++++ .../plugins/task_manager/server/polling_lifecycle.test.ts | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index 6c9f4ac74b9a..a57ea6b17548 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -6,7 +6,6 @@ import { first, take, bufferCount } from 'rxjs/operators'; import { loggingSystemMock } from '../../../../../src/core/server/mocks'; -import { Logger } from '../../../../../src/core/server'; import { WorkloadAggregation, createWorkloadAggregator, diff --git a/x-pack/plugins/task_manager/server/plugin.test.ts b/x-pack/plugins/task_manager/server/plugin.test.ts index 50e7e9a7aa19..a975ad24486e 100644 --- a/x-pack/plugins/task_manager/server/plugin.test.ts +++ b/x-pack/plugins/task_manager/server/plugin.test.ts @@ -19,6 +19,9 @@ describe('TaskManagerPlugin', () => { poll_interval: 3000, max_poll_inactivity_cycles: 10, request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_required_freshness: 5000, + monitored_stats_running_average_window: 50, }); pluginInitializerContext.env.instanceUuid = ''; @@ -38,6 +41,9 @@ describe('TaskManagerPlugin', () => { poll_interval: 3000, max_poll_inactivity_cycles: 10, request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_required_freshness: 5000, + monitored_stats_running_average_window: 50, }); const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); diff --git a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts index 92e32f8197a2..ce970f2a157e 100644 --- a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts @@ -28,6 +28,9 @@ describe('TaskPollingLifecycle', () => { poll_interval: 6000000, max_poll_inactivity_cycles: 10, request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_required_freshness: 5000, + monitored_stats_running_average_window: 50, }, taskStore: mockTaskStore, logger: taskManagerLogger, From d3157fda9068441b8e2dba123f6ca89cbcdd495e Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 19 Oct 2020 16:19:12 +0100 Subject: [PATCH 58/67] use camel case in health endpoint as per styleguide --- .../monitoring/monitoring_stats_stream.ts | 13 ++--- .../monitoring/task_run_statistics.test.ts | 15 ++++-- .../server/monitoring/task_run_statistics.ts | 48 +++++++++++-------- .../monitoring/workload_statistics.test.ts | 8 ++-- .../server/monitoring/workload_statistics.ts | 12 +++-- .../task_manager/server/routes/health.test.ts | 35 ++++++++------ .../task_manager/server/routes/health.ts | 4 +- .../test_suites/task_manager/health_route.ts | 42 +++++++++------- 8 files changed, 102 insertions(+), 75 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts index 8574931b84b8..02a4f50c9060 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -39,7 +39,7 @@ const CONFIG_FIELDS_TO_EXPOSE = [ type ConfigStat = Pick; export interface MonitoringStats { - lastUpdate: string; + last_update: string; stats: { configuration: MonitoredStat; workload?: MonitoredStat; @@ -62,7 +62,7 @@ type RawMonitoredStat = MonitoredStat & { }; export interface RawMonitoringStats { - lastUpdate: string; + last_update: string; stats: { configuration: RawMonitoredStat; workload?: RawMonitoredStat; @@ -106,7 +106,7 @@ export function createMonitoringStatsStream( scan((monitoringStats: MonitoringStats, { key, value }) => { // incrementally merge stats as they come in set(monitoringStats.stats, key, value); - monitoringStats.lastUpdate = new Date().toISOString(); + monitoringStats.last_update = new Date().toISOString(); return monitoringStats; }, initialStats) ) @@ -114,11 +114,12 @@ export function createMonitoringStatsStream( } export function summarizeMonitoringStats({ - lastUpdate, + // eslint-disable-next-line @typescript-eslint/naming-convention + last_update, stats: { runtime, workload, configuration }, }: MonitoringStats): RawMonitoringStats { return { - lastUpdate, + last_update, stats: { configuration: { ...configuration, @@ -148,7 +149,7 @@ const initializeStats = ( initialisationTimestamp: string, config: TaskManagerConfig ): MonitoringStats => ({ - lastUpdate: initialisationTimestamp, + last_update: initialisationTimestamp, stats: { configuration: { timestamp: initialisationTimestamp, diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index a326a4fa62a2..64a5e83dd356 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -244,7 +244,10 @@ describe('Task Run Statistics', () => { * In the tests this is ocnfiugured to a window size of 5 */ expect( - taskStats.map((taskStat) => taskStat.value.execution.resultFrequency['alerting:test']) + taskStats.map( + (taskStat) => + taskStat.value.execution.result_frequency_percent_as_number['alerting:test'] + ) ).toEqual([ // Success { Success: 100, RetryScheduled: 0, Failed: 0 }, @@ -320,15 +323,17 @@ describe('Task Run Statistics', () => { ) .subscribe((taskStats: Array>) => { try { - expect(taskStats.map((taskStat) => taskStat.value.polling.lastSuccessfulPoll)).toEqual( - expectedTimestamp - ); + expect( + taskStats.map((taskStat) => taskStat.value.polling.last_successful_poll) + ).toEqual(expectedTimestamp); /** * At any given time we only keep track of the last X Polling Results * In the tests this is ocnfiugured to a window size of 5 */ - expect(taskStats.map((taskStat) => taskStat.value.polling.resultFrequency)).toEqual([ + expect( + taskStats.map((taskStat) => taskStat.value.polling.result_frequency_percent_as_number) + ).toEqual([ // NoTasksClaimed { NoTasksClaimed: 100, RanOutOfCapacity: 0, PoolFilled: 0 }, // NoTasksClaimed, NoTasksClaimed, diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index 0512c4403672..0156d1000ad7 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -33,24 +33,24 @@ import { HealthStatus } from './monitoring_stats_stream'; import { TaskPollingLifecycle } from '../polling_lifecycle'; interface FillPoolStat extends JsonObject { - lastSuccessfulPoll: string; - resultFrequency: FillPoolResult[]; + last_successful_poll: string; + result_frequency_percent_as_number: FillPoolResult[]; } interface ExecutionStat extends JsonObject { duration: Record; - resultFrequency: Record; + result_frequency_percent_as_number: Record; } export interface TaskRunStat extends JsonObject { drift: number[]; execution: ExecutionStat; - polling: FillPoolStat | Omit; + polling: FillPoolStat | Omit; } interface FillPoolRawStat extends JsonObject { - lastSuccessfulPoll: string; - resultFrequency: { + last_successful_poll: string; + result_frequency_percent_as_number: { [FillPoolResult.NoTasksClaimed]: number; [FillPoolResult.RanOutOfCapacity]: number; [FillPoolResult.PoolFilled]: number; @@ -61,7 +61,7 @@ export interface SummarizedTaskRunStat extends JsonObject { drift: AveragedStat; execution: { duration: Record; - resultFrequency: Record< + result_frequency_percent_as_number: Record< string, { [TaskRunResult.Success]: number; @@ -71,7 +71,7 @@ export interface SummarizedTaskRunStat extends JsonObject { } >; }; - polling: FillPoolRawStat | Omit; + polling: FillPoolRawStat | Omit; } export function createTaskRunAggregator( @@ -99,8 +99,8 @@ export function createTaskRunAggregator( map((taskEvent: TaskLifecycleEvent) => { return { polling: { - lastSuccessfulPoll: new Date().toISOString(), - resultFrequency: resultFrequencyQueue( + last_successful_poll: new Date().toISOString(), + result_frequency_percent_as_number: resultFrequencyQueue( ((taskEvent.event as unknown) as Ok).value ), }, @@ -109,10 +109,12 @@ export function createTaskRunAggregator( ); return combineLatest([ - taskRunEvents$.pipe(startWith({ drift: [], execution: { duration: {}, resultFrequency: {} } })), + taskRunEvents$.pipe( + startWith({ drift: [], execution: { duration: {}, result_frequency_percent_as_number: {} } }) + ), taskPollingEvents$.pipe( startWith({ - polling: { resultFrequency: [] }, + polling: { result_frequency_percent_as_number: [] }, }) ), ]).pipe( @@ -146,7 +148,7 @@ function createTaskRunEventToStat(runningAverageWindowSize: number) { drift: driftQueue(timing!.start - task.runAt.getTime()), execution: { duration: taskRunDurationQueue(task.taskType, timing!.stop - timing!.start), - resultFrequency: resultFrequencyQueue(task.taskType, result), + result_frequency_percent_as_number: resultFrequencyQueue(task.taskType, result), }, }); } @@ -164,15 +166,16 @@ const DEFAULT_POLLING_FREQUENCIES = { }; export function summarizeTaskRunStat({ - polling: { lastSuccessfulPoll, resultFrequency: pollingResultFrequency }, + // eslint-disable-next-line @typescript-eslint/naming-convention + polling: { last_successful_poll, result_frequency_percent_as_number: pollingResultFrequency }, drift, - execution: { duration, resultFrequency: executionResultFrequency }, + execution: { duration, result_frequency_percent_as_number: executionResultFrequency }, }: TaskRunStat): { value: SummarizedTaskRunStat; status: HealthStatus } { return { value: { polling: { - ...(lastSuccessfulPoll ? { lastSuccessfulPoll } : {}), - resultFrequency: { + ...(last_successful_poll ? { last_successful_poll } : {}), + result_frequency_percent_as_number: { ...DEFAULT_POLLING_FREQUENCIES, ...calculateFrequency(pollingResultFrequency as FillPoolResult[]), }, @@ -180,10 +183,13 @@ export function summarizeTaskRunStat({ drift: calculateRunningAverage(drift), execution: { duration: mapValues(duration, (typedDurations) => calculateRunningAverage(typedDurations)), - resultFrequency: mapValues(executionResultFrequency, (typedResultFrequencies) => ({ - ...DEFAULT_TASK_RUN_FREQUENCIES, - ...calculateFrequency(typedResultFrequencies), - })), + result_frequency_percent_as_number: mapValues( + executionResultFrequency, + (typedResultFrequencies) => ({ + ...DEFAULT_TASK_RUN_FREQUENCIES, + ...calculateFrequency(typedResultFrequencies), + }) + ), }, }, status: HealthStatus.OK, diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts index a57ea6b17548..d9af3307e75c 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.test.ts @@ -241,7 +241,7 @@ describe('Workload Statistics Aggregator', () => { expect(result.key).toEqual('workload'); expect(result.value).toMatchObject({ count: 4, - taskTypes: { + task_types: { actions_telemetry: { count: 2, status: { idle: 2 } }, alerting_telemetry: { count: 1, status: { idle: 1 } }, session_cleanup: { count: 1, status: { idle: 1 } }, @@ -294,7 +294,7 @@ describe('Workload Statistics Aggregator', () => { // 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57 // [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 ] // Above you see each bucket and the number of scheduled tasks we expect to have in them - estimatedScheduleDensity: [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], + estimated_schedule_density: [0, 0, 0, 0, 0, 0, 0, 2, 2, 5, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], }); resolve(); }); @@ -399,7 +399,7 @@ describe('Workload Statistics Aggregator', () => { expect(results[0].key).toEqual('workload'); expect(results[0].value).toMatchObject({ count: 5, - taskTypes: { + task_types: { actions_telemetry: { count: 2, status: { idle: 2 } }, alerting_telemetry: { count: 2, status: { idle: 2 } }, session_cleanup: { count: 1, status: { idle: 1 } }, @@ -408,7 +408,7 @@ describe('Workload Statistics Aggregator', () => { expect(results[1].key).toEqual('workload'); expect(results[1].value).toMatchObject({ count: 5, - taskTypes: { + task_types: { actions_telemetry: { count: 2, status: { idle: 2 } }, alerting_telemetry: { count: 2, status: { idle: 1, failed: 1 } }, session_cleanup: { count: 1, status: { idle: 1 } }, diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 373b4dfab411..37bf65ba90b5 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -29,10 +29,10 @@ interface TaskTypeStat extends JsonObject { export interface WorkloadStat extends JsonObject { count: number; - taskTypes: TaskTypeStat; + task_types: TaskTypeStat; schedule: Array<[string, number]>; overdue: number; - estimatedScheduleDensity: number[]; + estimated_schedule_density: number[]; } export interface WorkloadAggregation { @@ -204,7 +204,7 @@ export function createWorkloadAggregator( const summary: WorkloadStat = { count, - taskTypes: mapValues(keyBy(taskTypes, 'key'), ({ doc_count: docCount, status }) => { + task_types: mapValues(keyBy(taskTypes, 'key'), ({ doc_count: docCount, status }) => { return { count: docCount, status: mapValues(keyBy(status.buckets, 'key'), 'doc_count'), @@ -218,7 +218,11 @@ export function createWorkloadAggregator( ) .map((schedule) => [schedule.key as string, schedule.doc_count]), overdue, - estimatedScheduleDensity: padBuckets(scheduleDensityBuckets, pollInterval, scheduleDensity), + estimated_schedule_density: padBuckets( + scheduleDensityBuckets, + pollInterval, + scheduleDensity + ), }; return { key: 'workload', diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 0dbfc8b8f3b2..9de30ef560c1 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -103,7 +103,7 @@ describe('healthRoute', () => { stats$.next( mockHealthStats({ - lastUpdate: new Date(Date.now() - 1500).toISOString(), + last_update: new Date(Date.now() - 1500).toISOString(), }) ); @@ -112,7 +112,7 @@ describe('healthRoute', () => { status: 'error', ...summarizeMonitoringStats( mockHealthStats({ - lastUpdate: expect.any(String), + last_update: expect.any(String), stats: { configuration: { timestamp: expect.any(String), @@ -124,7 +124,7 @@ describe('healthRoute', () => { timestamp: expect.any(String), value: { polling: { - lastSuccessfulPoll: expect.any(String), + last_successful_poll: expect.any(String), }, }, }, @@ -141,7 +141,7 @@ describe('healthRoute', () => { status: 'error', ...summarizeMonitoringStats( mockHealthStats({ - lastUpdate: expect.any(String), + last_update: expect.any(String), stats: { configuration: { timestamp: expect.any(String), @@ -153,7 +153,7 @@ describe('healthRoute', () => { timestamp: expect.any(String), value: { polling: { - lastSuccessfulPoll: expect.any(String), + last_successful_poll: expect.any(String), }, }, }, @@ -195,7 +195,7 @@ describe('healthRoute', () => { status: 'error', ...summarizeMonitoringStats( mockHealthStats({ - lastUpdate: expect.any(String), + last_update: expect.any(String), stats: { configuration: { timestamp: expect.any(String), @@ -207,7 +207,7 @@ describe('healthRoute', () => { timestamp: expect.any(String), value: { polling: { - lastSuccessfulPoll: expect.any(String), + last_successful_poll: expect.any(String), }, }, }, @@ -226,14 +226,15 @@ describe('healthRoute', () => { await sleep(0); - const lastSuccessfulPoll = new Date(Date.now() - 2000).toISOString(); + // eslint-disable-next-line @typescript-eslint/naming-convention + const last_successful_poll = new Date(Date.now() - 2000).toISOString(); stats$.next( mockHealthStats({ stats: { runtime: { value: { polling: { - lastSuccessfulPoll, + last_successful_poll, }, }, }, @@ -250,7 +251,7 @@ describe('healthRoute', () => { status: 'error', ...summarizeMonitoringStats( mockHealthStats({ - lastUpdate: expect.any(String), + last_update: expect.any(String), stats: { configuration: { timestamp: expect.any(String), @@ -262,7 +263,7 @@ describe('healthRoute', () => { timestamp: expect.any(String), value: { polling: { - lastSuccessfulPoll, + last_successful_poll, }, }, }, @@ -277,7 +278,7 @@ describe('healthRoute', () => { function mockHealthStats(overrides = {}) { return (merge( { - lastUpdate: new Date().toISOString(), + last_update: new Date().toISOString(), stats: { configuration: { timestamp: new Date().toISOString(), @@ -312,11 +313,15 @@ function mockHealthStats(overrides = {}) { drift: [1000, 60000], execution: { duration: [], - resultFrequency: [], + result_frequency_percent_as_number: [], }, polling: { - lastSuccessfulPoll: new Date().toISOString(), - resultFrequency: ['NoTasksClaimed', 'NoTasksClaimed', 'NoTasksClaimed'], + last_successful_poll: new Date().toISOString(), + result_frequency_percent_as_number: [ + 'NoTasksClaimed', + 'NoTasksClaimed', + 'NoTasksClaimed', + ], }, }, }, diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 59210800ce71..b04b5bbd063d 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -133,8 +133,8 @@ function hasExpiredHotTimestamps( return ( now - getOldestTimestamp( - monitoringStats.lastUpdate, - monitoringStats.stats.runtime?.value.polling.lastSuccessfulPoll + monitoringStats.last_update, + monitoringStats.stats.runtime?.value.polling.last_successful_poll ) > requiredFreshness ); diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index 270371acdcbf..8b20cdc2fac7 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -12,7 +12,7 @@ import { FtrProviderContext } from '../../ftr_provider_context'; import { ConcreteTaskInstance } from '../../../../plugins/task_manager/server'; interface MonitoringStats { - lastUpdate: string; + last_update: string; status: string; stats: { configuration: { @@ -23,10 +23,10 @@ interface MonitoringStats { timestamp: string; value: { count: number; - taskTypes: Record; + task_types: Record; schedule: Array<[string, number]>; overdue: number; - estimatedScheduleDensity: number[]; + estimated_schedule_density: number[]; }; }; runtime: { @@ -35,11 +35,11 @@ interface MonitoringStats { drift: Record; execution: { duration: Record>; - resultFrequency: Record>; + result_frequency_percent_as_number: Record>; }; polling: { - lastSuccessfulPoll: string; - resultFrequency: Record; + last_successful_poll: string; + result_frequency_percent_as_number: Record; }; }; }; @@ -98,7 +98,7 @@ export default function ({ getService }: FtrProviderContext) { expect(status).to.eql('OK'); const sumSampleTaskInWorkload = - (workload.value.taskTypes as { + (workload.value.task_types as { sampleTask?: { count: number }; }).sampleTask?.count ?? 0; const scheduledWorkload = (mapValues( @@ -123,7 +123,7 @@ export default function ({ getService }: FtrProviderContext) { const workloadAfterScheduling = (await getHealth()).stats.workload.value; expect( - (workloadAfterScheduling.taskTypes as { sampleTask: { count: number } }).sampleTask.count + (workloadAfterScheduling.task_types as { sampleTask: { count: number } }).sampleTask.count ).to.eql(sumSampleTaskInWorkload + 2); const schedulesWorkloadAfterScheduling = (mapValues( @@ -148,11 +148,11 @@ export default function ({ getService }: FtrProviderContext) { expect(typeof workload.overdue).to.eql('number'); - expect(Array.isArray(workload.estimatedScheduleDensity)).to.eql(true); + expect(Array.isArray(workload.estimated_schedule_density)).to.eql(true); // test run with the default poll_interval of 3s and a monitored_aggregated_stats_refresh_rate of 5s, - // so we expect the estimatedScheduleDensity to span a minute (which means 20 buckets, as 60s / 3s = 20) - expect(workload.estimatedScheduleDensity.length).to.eql(20); + // so we expect the estimated_schedule_density to span a minute (which means 20 buckets, as 60s / 3s = 20) + expect(workload.estimated_schedule_density.length).to.eql(20); }); it('should return the task manager runtime stats', async () => { @@ -167,10 +167,10 @@ export default function ({ getService }: FtrProviderContext) { }, } = (await getHealth()).stats; - expect(isNaN(Date.parse(polling.lastSuccessfulPoll as string))).to.eql(false); - expect(typeof polling.resultFrequency.NoTasksClaimed).to.eql('number'); - expect(typeof polling.resultFrequency.RanOutOfCapacity).to.eql('number'); - expect(typeof polling.resultFrequency.PoolFilled).to.eql('number'); + expect(isNaN(Date.parse(polling.last_successful_poll as string))).to.eql(false); + expect(typeof polling.result_frequency_percent_as_number.NoTasksClaimed).to.eql('number'); + expect(typeof polling.result_frequency_percent_as_number.RanOutOfCapacity).to.eql('number'); + expect(typeof polling.result_frequency_percent_as_number.PoolFilled).to.eql('number'); expect(typeof drift.p50).to.eql('number'); expect(typeof drift.p90).to.eql('number'); @@ -182,9 +182,15 @@ export default function ({ getService }: FtrProviderContext) { expect(typeof execution.duration.sampleTask.p95).to.eql('number'); expect(typeof execution.duration.sampleTask.p99).to.eql('number'); - expect(typeof execution.resultFrequency.sampleTask.Success).to.eql('number'); - expect(typeof execution.resultFrequency.sampleTask.RetryScheduled).to.eql('number'); - expect(typeof execution.resultFrequency.sampleTask.Failed).to.eql('number'); + expect(typeof execution.result_frequency_percent_as_number.sampleTask.Success).to.eql( + 'number' + ); + expect(typeof execution.result_frequency_percent_as_number.sampleTask.RetryScheduled).to.eql( + 'number' + ); + expect(typeof execution.result_frequency_percent_as_number.sampleTask.Failed).to.eql( + 'number' + ); }); }); } From 20d7e66bba0f4992603562b9ef15d4583926f409 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 19 Oct 2020 18:01:52 +0100 Subject: [PATCH 59/67] fixed readme --- .../plugins/task_manager/server/MONITORING.md | 330 +++++++++--------- 1 file changed, 165 insertions(+), 165 deletions(-) diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md index eaa9013ccad3..f304c1e5c3c7 100644 --- a/x-pack/plugins/task_manager/server/MONITORING.md +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -42,7 +42,7 @@ An `Error` status in any section will cause the whole system to display as `Erro A `Warning` status in any section will cause the whole system to display as `Warning`. An `OK` status will only be displayed when all sections are marked as `OK`. -The root `timestamp` is the time in which the summary was exposed (either to the DEBUG logger or the http api) and the `lastUpdate` is the last time any one of the sections was updated. +The root `timestamp` is the time in which the summary was exposed (either to the DEBUG logger or the http api) and the `last_update` is the last time any one of the sections was updated. #### The Configuration Section The `configuration` section summarizes Task Manager's current configuration, including dynamic configurations which change over time, such as `poll_interval` and `max_workers` which adjust in reaction to changing load on the system. @@ -74,169 +74,169 @@ These are "Hot" stats which are updated reactively as Tasks are executed and int For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might get these stats: ``` { - /* the time these stats were returned by the api */ - "timestamp": "2020-10-05T18:26:11.346Z", - /* the overall status of the system */ - "status": "OK", - /* last time any stat was updated in this output */ - "lastUpdate": "2020-10-05T17:57:55.411Z", - "stats": { - "configuration": { /* current configuration of TM */ - "timestamp": "2020-10-05T17:56:06.507Z", - "status": "OK", - "value": { - "max_workers": 10, - "poll_interval": 3000, - "request_capacity": 1000, - "max_poll_inactivity_cycles": 10, - "monitored_aggregated_stats_refresh_rate": 60000, - "monitored_stats_running_average_window": 50 - } - }, - "workload": { /* The workload of this deployment */ - "timestamp": "2020-10-05T17:57:06.534Z", - "status": "OK", - "value": { - "count": 6, /* count of tasks in the system */ - "taskTypes": { /* what tasks are there and what status are they in */ - "actions_telemetry": { - "count": 1, - "status": { - "idle": 1 - } - }, - "alerting_telemetry": { - "count": 1, - "status": { - "idle": 1 - } - }, - "apm-telemetry-task": { - "count": 1, - "status": { - "idle": 1 - } - }, - "endpoint:user-artifact-packager": { - "count": 1, - "status": { - "idle": 1 - } - }, - "lens_telemetry": { - "count": 1, - "status": { - "idle": 1 - } - }, - "session_cleanup": { - "count": 1, - "status": { - "idle": 1 - } - } - }, - - /* Frequency of recurring tasks schedules */ - "schedule": [ - ["60s", 1], /* 1 task, every 60s */ - ["3600s", 3], /* 3 tasks every hour */ - ["720m", 1] - ], - /* There are no overdue tasks in this system at the moment */ - "overdue": 0, - /* This is the schedule density, it shows a histogram of all the polling intervals in the next minute (or, if - pollInterval is configured unusually high it will show a min of 2 refresh intervals into the future, and a max of 50 buckets). - Here we see that on the 3rd polling interval from *now* (which is ~9 seconds from now, as pollInterval is `3s`) there is one task due to run. - We also see that there are 5 due two intervals later, which is fine as we have a max workers of `10` - */ - "estimatedScheduleDensity": [0, 0, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - } - }, - "runtime": { - "timestamp": "2020-10-05T17:57:55.411Z", - "status": "OK", - "value": { - "polling": { - /* When was the last polling cycle? */ - "lastSuccessfulPoll": "2020-10-05T17:57:55.411Z", - /* What is the frequency of polling cycle result? - Here we see 94% of "NoTasksClaimed" and 6% "PoolFilled" */ - "resultFrequency": { - "NoTasksClaimed": 94, - "RanOutOfCapacity": 0, /* This is a legacy result, we might want to rename - it tells us when a polling cycle resulted in claiming more tasks than we had workers for, butt he name doesn't make much sense outside of the context of the code */ - "PoolFilled": 6 - } - }, - /* on average, the tasks in this deployment run 1.7s after their scheduled time */ - "drift": { - "mean": 1720, - "median": 2276 - }, - "execution": { - "duration": { - /* on average, the `endpoint:user-artifact-packager` tasks take 15ms to run */ - "endpoint:user-artifact-packager": { - "mean": 15, - "median": 14.5 - }, - "session_cleanup": { - "mean": 28, - "median": 28 - }, - "lens_telemetry": { - "mean": 100, - "median": 100 - }, - "actions_telemetry": { - "mean": 135, - "median": 135 - }, - "alerting_telemetry": { - "mean": 197, - "median": 197 - }, - "apm-telemetry-task": { - "mean": 1347, - "median": 1347 - } - }, - "resultFrequency": { - /* and 100% of `endpoint:user-artifact-packager` have completed in success (within the running average window, so the past 50 runs (by default, configrable by `monitored_stats_running_average_window`) */ - "endpoint:user-artifact-packager": { - "Success": 100, - "RetryScheduled": 0, - "Failed": 0 - }, - "session_cleanup": { - "Success": 100, - "RetryScheduled": 0, - "Failed": 0 - }, - "lens_telemetry": { - "Success": 100, - "RetryScheduled": 0, - "Failed": 0 - }, - "actions_telemetry": { - "Success": 100, - "RetryScheduled": 0, - "Failed": 0 - }, - "alerting_telemetry": { - "Success": 100, - "RetryScheduled": 0, - "Failed": 0 - }, - "apm-telemetry-task": { - "Success": 100, - "RetryScheduled": 0, - "Failed": 0 - } - } - } - } - } - } + /* the time these stats were returned by the api */ + "timestamp": "2020-10-05T18:26:11.346Z", + /* the overall status of the system */ + "status": "OK", + /* last time any stat was updated in this output */ + "last_update": "2020-10-05T17:57:55.411Z", + "stats": { + "configuration": { /* current configuration of TM */ + "timestamp": "2020-10-05T17:56:06.507Z", + "status": "OK", + "value": { + "max_workers": 10, + "poll_interval": 3000, + "request_capacity": 1000, + "max_poll_inactivity_cycles": 10, + "monitored_aggregated_stats_refresh_rate": 60000, + "monitored_stats_running_average_window": 50 + } + }, + "workload": { /* The workload of this deployment */ + "timestamp": "2020-10-05T17:57:06.534Z", + "status": "OK", + "value": { + "count": 6, /* count of tasks in the system */ + "task_types": { /* what tasks are there and what status are they in */ + "actions_telemetry": { + "count": 1, + "status": { + "idle": 1 + } + }, + "alerting_telemetry": { + "count": 1, + "status": { + "idle": 1 + } + }, + "apm-telemetry-task": { + "count": 1, + "status": { + "idle": 1 + } + }, + "endpoint:user-artifact-packager": { + "count": 1, + "status": { + "idle": 1 + } + }, + "lens_telemetry": { + "count": 1, + "status": { + "idle": 1 + } + }, + "session_cleanup": { + "count": 1, + "status": { + "idle": 1 + } + } + }, + + /* Frequency of recurring tasks schedules */ + "schedule": [ + ["60s", 1], /* 1 task, every 60s */ + ["3600s", 3], /* 3 tasks every hour */ + ["720m", 1] + ], + /* There are no overdue tasks in this system at the moment */ + "overdue": 0, + /* This is the schedule density, it shows a histogram of all the polling intervals in the next minute (or, if + pollInterval is configured unusually high it will show a min of 2 refresh intervals into the future, and a max of 50 buckets). + Here we see that on the 3rd polling interval from *now* (which is ~9 seconds from now, as pollInterval is `3s`) there is one task due to run. + We also see that there are 5 due two intervals later, which is fine as we have a max workers of `10` + */ + "estimated_schedule_density": [0, 0, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + } + }, + "runtime": { + "timestamp": "2020-10-05T17:57:55.411Z", + "status": "OK", + "value": { + "polling": { + /* When was the last polling cycle? */ + "last_successful_poll": "2020-10-05T17:57:55.411Z", + /* What is the frequency of polling cycle result? + Here we see 94% of "NoTasksClaimed" and 6% "PoolFilled" */ + "result_frequency_percent_as_number": { + "NoTasksClaimed": 94, + "RanOutOfCapacity": 0, /* This is a legacy result, we might want to rename - it tells us when a polling cycle resulted in claiming more tasks than we had workers for, butt he name doesn't make much sense outside of the context of the code */ + "PoolFilled": 6 + } + }, + /* on average, the tasks in this deployment run 1.7s after their scheduled time */ + "drift": { + "mean": 1720, + "median": 2276 + }, + "execution": { + "duration": { + /* on average, the `endpoint:user-artifact-packager` tasks take 15ms to run */ + "endpoint:user-artifact-packager": { + "mean": 15, + "median": 14.5 + }, + "session_cleanup": { + "mean": 28, + "median": 28 + }, + "lens_telemetry": { + "mean": 100, + "median": 100 + }, + "actions_telemetry": { + "mean": 135, + "median": 135 + }, + "alerting_telemetry": { + "mean": 197, + "median": 197 + }, + "apm-telemetry-task": { + "mean": 1347, + "median": 1347 + } + }, + "result_frequency_percent_as_number": { + /* and 100% of `endpoint:user-artifact-packager` have completed in success (within the running average window, so the past 50 runs (by default, configrable by `monitored_stats_running_average_window`) */ + "endpoint:user-artifact-packager": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "session_cleanup": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "lens_telemetry": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "actions_telemetry": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "alerting_telemetry": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + }, + "apm-telemetry-task": { + "Success": 100, + "RetryScheduled": 0, + "Failed": 0 + } + } + } + } + } + } } ``` From 1906572b9ae0332ee0de5641b67cf6af94eddf43 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 20 Oct 2020 12:03:09 +0100 Subject: [PATCH 60/67] made execution thresholds configurable --- .../plugins/task_manager/server/MONITORING.md | 21 ++- .../task_manager/server/config.test.ts | 72 +++++++++++ x-pack/plugins/task_manager/server/config.ts | 32 +++++ .../managed_configuration.test.ts | 7 + .../monitoring/configuration_statistics.ts | 32 +++++ .../monitoring_stats_stream.test.ts | 41 ++++-- .../monitoring/monitoring_stats_stream.ts | 63 ++++----- .../monitoring/task_run_statistics.test.ts | 122 ++++++++++++++++-- .../server/monitoring/task_run_statistics.ts | 66 +++++++--- .../task_manager/server/plugin.test.ts | 14 ++ x-pack/plugins/task_manager/server/plugin.ts | 6 +- .../server/polling_lifecycle.test.ts | 7 + .../task_manager/server/routes/health.test.ts | 79 ++++++++++-- .../task_manager/server/routes/health.ts | 13 +- .../test_suites/task_manager/health_route.ts | 7 + 15 files changed, 474 insertions(+), 108 deletions(-) create mode 100644 x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md index f304c1e5c3c7..93b998056ea3 100644 --- a/x-pack/plugins/task_manager/server/MONITORING.md +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -9,11 +9,17 @@ There are three different sections to the stats returned by the `health` api. - `runtime`: Tracks Task Manager's performance. ### Configuring the Stats -There are three new configurations: +There are four new configurations: - `xpack.task_manager.monitored_stats_required_freshness` - The _required freshness_ of critical "Hot" stats, which means that if key stats (last polling cycle time, for example) haven't been refreshed within the specified duration, the `_health` endpoint and service will report an `Error` status. By default this is inferred from the configured `poll_interval` and is set to `poll_interval` plus a `1s` buffer. - `xpack.task_manager.monitored_aggregated_stats_refresh_rate` - Dictates how often we refresh the "Cold" metrics. These metrics require an aggregation against Elasticsearch and add load to the system, hence we want to limit how often we execute these. We also inffer the _required freshness_ of these "Cold" metrics from this configuration, which means that if these stats have not been updated within the required duration then the `_health` endpoint and service will report an `Error` status. This covers the entire `workload` section of the stats. By default this is configured to `60s`, and as a result the _required freshness_ defaults to `61s` (refresh plus a `1s` buffer). - `xpack.task_manager.monitored_stats_running_average_window`- Dictates the size of the window used to calculate the running average of various "Hot" stats, such as the time it takes to run a task, the _drift_ that tasks experience etc. These stats are collected throughout the lifecycle of tasks and this window will dictate how large the queue we keep in memory would be, and how many values we need to calculate the average against. We do not calculate the average on *every* new value, but rather only when the time comes to summarize the stats before logging them or returning them to the API endpoint. +- `xpack.task_manager.monitored_task_execution_thresholds`- Configures the threshold of failed task executions at which point the `warn` or `error` health status will be set either at a default level or a custom level for specific task types. This will allow you to mark the health as `error` when any task type failes 90% of the time, but set it to `error` at 50% of the time for task types that you consider critical. This value can be set to any number between 0 to 100, and a threshold is hit when the value *exceeds* this number. This means that you can avoid setting the status to `error` by setting the threshold at 100, or hit `error` the moment any task failes by setting the threshold to 0 (as it will exceed 0 once a single failer occurs). + +For example: +``` + +``` ## Consuming Health Stats Task Manager exposes a `/api/task_manager/_health` api which returns the _latest_ stats. @@ -204,31 +210,38 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g "result_frequency_percent_as_number": { /* and 100% of `endpoint:user-artifact-packager` have completed in success (within the running average window, so the past 50 runs (by default, configrable by `monitored_stats_running_average_window`) */ "endpoint:user-artifact-packager": { + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "session_cleanup": { - "Success": 100, - "RetryScheduled": 0, - "Failed": 0 + /* `error` status as 90% of results are `Failed` */ + "status": "error", + "Success": 5, + "RetryScheduled": 5, + "Failed": 90 }, "lens_telemetry": { + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "actions_telemetry": { + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "alerting_telemetry": { + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "apm-telemetry-task": { + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 diff --git a/x-pack/plugins/task_manager/server/config.test.ts b/x-pack/plugins/task_manager/server/config.test.ts index cb78bc794529..825ae0404f76 100644 --- a/x-pack/plugins/task_manager/server/config.test.ts +++ b/x-pack/plugins/task_manager/server/config.test.ts @@ -18,6 +18,13 @@ describe('config validation', () => { "monitored_aggregated_stats_refresh_rate": 60000, "monitored_stats_required_freshness": 4000, "monitored_stats_running_average_window": 50, + "monitored_task_execution_thresholds": Object { + "custom": Object {}, + "default": Object { + "error_threshold": 90, + "warn_threshold": 80, + }, + }, "poll_interval": 3000, "request_capacity": 1000, } @@ -45,6 +52,7 @@ describe('config validation', () => { `"The specified monitored_stats_required_freshness (100) is invalid, as it is below the poll_interval (3000)"` ); }); + test('the default required freshness of the monitored stats is poll interval with a slight buffer', () => { const config: Record = {}; expect(configSchema.validate(config)).toMatchInlineSnapshot(` @@ -57,9 +65,73 @@ describe('config validation', () => { "monitored_aggregated_stats_refresh_rate": 60000, "monitored_stats_required_freshness": 4000, "monitored_stats_running_average_window": 50, + "monitored_task_execution_thresholds": Object { + "custom": Object {}, + "default": Object { + "error_threshold": 90, + "warn_threshold": 80, + }, + }, "poll_interval": 3000, "request_capacity": 1000, } `); }); + + test('the custom monitored_task_execution_thresholds can be configured at task type', () => { + const config: Record = { + monitored_task_execution_thresholds: { + custom: { + 'alerting:always-fires': { + error_threshold: 50, + warn_threshold: 30, + }, + }, + }, + }; + expect(configSchema.validate(config)).toMatchInlineSnapshot(` + Object { + "enabled": true, + "index": ".kibana_task_manager", + "max_attempts": 3, + "max_poll_inactivity_cycles": 10, + "max_workers": 10, + "monitored_aggregated_stats_refresh_rate": 60000, + "monitored_stats_required_freshness": 4000, + "monitored_stats_running_average_window": 50, + "monitored_task_execution_thresholds": Object { + "custom": Object { + "alerting:always-fires": Object { + "error_threshold": 50, + "warn_threshold": 30, + }, + }, + "default": Object { + "error_threshold": 90, + "warn_threshold": 80, + }, + }, + "poll_interval": 3000, + "request_capacity": 1000, + } + `); + }); + + test('the monitored_task_execution_thresholds warn_threshold must be lte error_threshold', () => { + const config: Record = { + monitored_task_execution_thresholds: { + custom: { + 'alerting:always-fires': { + error_threshold: 80, + warn_threshold: 90, + }, + }, + }, + }; + expect(() => { + configSchema.validate(config); + }).toThrowErrorMatchingInlineSnapshot( + `"[monitored_task_execution_thresholds.custom.alerting:always-fires]: warn_threshold must be less than, or equal to, error_threshold"` + ); + }); }); diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index de82a6ffa888..96825ce0cbd7 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -16,6 +16,30 @@ export const DEFAULT_MAX_POLL_INACTIVITY_CYCLES = 10; export const DEFAULT_MONITORING_REFRESH_RATE = 60 * 1000; export const DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW = 50; +export const taskExecutionFailureThresholdSchema = schema.object( + { + error_threshold: schema.number({ + defaultValue: 90, + min: 1, + }), + warn_threshold: schema.number({ + defaultValue: 80, + min: 1, + }), + }, + { + validate: (config) => { + if ( + config.error_threshold && + config.warn_threshold && + config.warn_threshold > config.error_threshold + ) { + return `warn_threshold must be less than, or equal to, error_threshold`; + } + }, + } +); + export const configSchema = schema.object( { enabled: schema.boolean({ defaultValue: true }), @@ -73,6 +97,13 @@ export const configSchema = schema.object( max: 100, min: 10, }), + /* Task Execution result warn & error thresholds. */ + monitored_task_execution_thresholds: schema.object({ + default: taskExecutionFailureThresholdSchema, + custom: schema.recordOf(schema.string(), taskExecutionFailureThresholdSchema, { + defaultValue: {}, + }), + }), }, { validate: (config) => { @@ -88,3 +119,4 @@ export const configSchema = schema.object( ); export type TaskManagerConfig = TypeOf; +export type TaskExecutionFailureThreshold = TypeOf; diff --git a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts index 803b01de9504..01326c73bd68 100644 --- a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts @@ -34,6 +34,13 @@ describe('managed configuration', () => { monitored_stats_required_freshness: 4000, monitored_stats_running_average_window: 50, request_capacity: 1000, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }); logger = context.logger.get('taskManager'); diff --git a/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts new file mode 100644 index 000000000000..46d3e4a35a25 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { of } from 'rxjs'; +import { pick } from 'lodash'; +import { AggregatedStatProvider } from './runtime_statistics_aggregator'; +import { TaskManagerConfig } from '../config'; + +const CONFIG_FIELDS_TO_EXPOSE = [ + 'max_workers', + 'poll_interval', + 'request_capacity', + 'max_poll_inactivity_cycles', + 'monitored_aggregated_stats_refresh_rate', + 'monitored_stats_running_average_window', + 'monitored_task_execution_thresholds', +] as const; + +export type ConfigStat = Pick; + +export function createConfigurationAggregator( + config: TaskManagerConfig +): AggregatedStatProvider { + const picked: ConfigStat = pick(config, ...CONFIG_FIELDS_TO_EXPOSE); + return of({ + key: 'configuration', + value: picked, + }); +} diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts index b8bcf15101d2..8479def5deee 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts @@ -26,6 +26,13 @@ describe('createMonitoringStatsStream', () => { request_capacity: 1000, monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }; it('returns the initial config used to configure Task Manager', async () => { @@ -33,18 +40,7 @@ describe('createMonitoringStatsStream', () => { createMonitoringStatsStream(of(), configuration) .pipe(take(1)) .subscribe((firstValue) => { - expect(firstValue.stats).toMatchObject({ - configuration: { - value: { - max_workers: 10, - poll_interval: 6000000, - max_poll_inactivity_cycles: 10, - request_capacity: 1000, - monitored_aggregated_stats_refresh_rate: 5000, - monitored_stats_running_average_window: 50, - }, - }, - }); + expect(firstValue.stats).toEqual({}); resolve(); }); }); @@ -68,6 +64,13 @@ describe('createMonitoringStatsStream', () => { request_capacity: 1000, monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }, }, }, @@ -95,6 +98,13 @@ describe('createMonitoringStatsStream', () => { request_capacity: 1000, monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }, }, }, @@ -122,6 +132,13 @@ describe('createMonitoringStatsStream', () => { request_capacity: 1000, monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }, }, }, diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts index 02a4f50c9060..17aaf31e68dd 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -6,7 +6,6 @@ import { merge, of, Observable } from 'rxjs'; import { map, scan } from 'rxjs/operators'; import { set } from '@elastic/safer-lodash-set'; -import { pick } from 'lodash'; import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { TaskStore } from '../task_store'; @@ -22,26 +21,16 @@ import { TaskRunStat, SummarizedTaskRunStat, } from './task_run_statistics'; +import { ConfigStat, createConfigurationAggregator } from './configuration_statistics'; import { TaskManagerConfig } from '../config'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; -const CONFIG_FIELDS_TO_EXPOSE = [ - 'max_workers', - 'poll_interval', - 'request_capacity', - 'max_poll_inactivity_cycles', - 'monitored_aggregated_stats_refresh_rate', - 'monitored_stats_running_average_window', -] as const; - -type ConfigStat = Pick; - export interface MonitoringStats { last_update: string; stats: { - configuration: MonitoredStat; + configuration?: MonitoredStat; workload?: MonitoredStat; runtime?: MonitoredStat; }; @@ -64,7 +53,7 @@ type RawMonitoredStat = MonitoredStat & { export interface RawMonitoringStats { last_update: string; stats: { - configuration: RawMonitoredStat; + configuration?: RawMonitoredStat; workload?: RawMonitoredStat; runtime?: RawMonitoredStat; }; @@ -77,6 +66,7 @@ export function createAggregators( logger: Logger ): AggregatedStatProvider { return merge( + createConfigurationAggregator(config), createTaskRunAggregator(taskPollingLifecycle, config.monitored_stats_running_average_window), createWorkloadAggregator( taskStore, @@ -91,7 +81,10 @@ export function createMonitoringStatsStream( provider$: AggregatedStatProvider, config: TaskManagerConfig ): Observable { - const initialStats = initializeStats(new Date().toISOString(), config); + const initialStats = { + last_update: new Date().toISOString(), + stats: {}, + }; return merge( // emit the initial stats of(initialStats), @@ -113,23 +106,30 @@ export function createMonitoringStatsStream( ); } -export function summarizeMonitoringStats({ - // eslint-disable-next-line @typescript-eslint/naming-convention - last_update, - stats: { runtime, workload, configuration }, -}: MonitoringStats): RawMonitoringStats { +export function summarizeMonitoringStats( + { + // eslint-disable-next-line @typescript-eslint/naming-convention + last_update, + stats: { runtime, workload, configuration }, + }: MonitoringStats, + config: TaskManagerConfig +): RawMonitoringStats { return { last_update, stats: { - configuration: { - ...configuration, - status: HealthStatus.OK, - }, + ...(configuration + ? { + configuration: { + ...configuration, + status: HealthStatus.OK, + }, + } + : {}), ...(runtime ? { runtime: { timestamp: runtime.timestamp, - ...summarizeTaskRunStat(runtime.value), + ...summarizeTaskRunStat(runtime.value, config), }, } : {}), @@ -144,16 +144,3 @@ export function summarizeMonitoringStats({ }, }; } - -const initializeStats = ( - initialisationTimestamp: string, - config: TaskManagerConfig -): MonitoringStats => ({ - last_update: initialisationTimestamp, - stats: { - configuration: { - timestamp: initialisationTimestamp, - value: pick(config, ...CONFIG_FIELDS_TO_EXPOSE) as ConfigStat, - }, - }, -}); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index 64a5e83dd356..a931f0ff7c30 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -24,6 +24,7 @@ import { import { AggregatedStat } from './runtime_statistics_aggregator'; import { FillPoolResult } from '../lib/fill_pool'; import { taskPollingLifecycleMock } from '../polling_lifecycle.mock'; +import { configSchema } from '../config'; describe('Task Run Statistics', () => { let fakeTimer: sinon.SinonFakeTimers; @@ -68,7 +69,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value).value, + value: summarizeTaskRunStat(value, getTaskManagerConfig()).value, })), take(runAtDrift.length), bufferCount(runAtDrift.length) @@ -136,7 +137,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value).value, + value: summarizeTaskRunStat(value, getTaskManagerConfig()).value, })), take(runDurations.length * 2), bufferCount(runDurations.length * 2) @@ -232,7 +233,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value).value, + value: summarizeTaskRunStat(value, getTaskManagerConfig()).value, })), take(10), bufferCount(10) @@ -250,25 +251,116 @@ describe('Task Run Statistics', () => { ) ).toEqual([ // Success - { Success: 100, RetryScheduled: 0, Failed: 0 }, + { Success: 100, RetryScheduled: 0, Failed: 0, status: 'OK' }, // Success, Success, - { Success: 100, RetryScheduled: 0, Failed: 0 }, + { Success: 100, RetryScheduled: 0, Failed: 0, status: 'OK' }, // Success, Success, Success - { Success: 100, RetryScheduled: 0, Failed: 0 }, + { Success: 100, RetryScheduled: 0, Failed: 0, status: 'OK' }, // Success, Success, Success, Failed - { Success: 75, RetryScheduled: 0, Failed: 25 }, + { Success: 75, RetryScheduled: 0, Failed: 25, status: 'OK' }, // Success, Success, Success, Failed, Failed - { Success: 60, RetryScheduled: 0, Failed: 40 }, + { Success: 60, RetryScheduled: 0, Failed: 40, status: 'OK' }, // Success, Success, Failed, Failed, Failed - { Success: 40, RetryScheduled: 0, Failed: 60 }, + { Success: 40, RetryScheduled: 0, Failed: 60, status: 'OK' }, // Success, Failed, Failed, Failed, RetryScheduled - { Success: 20, RetryScheduled: 20, Failed: 60 }, + { Success: 20, RetryScheduled: 20, Failed: 60, status: 'OK' }, // Failed, Failed, Failed, RetryScheduled, RetryScheduled - { Success: 0, RetryScheduled: 40, Failed: 60 }, + { Success: 0, RetryScheduled: 40, Failed: 60, status: 'OK' }, // Failed, Failed, RetryScheduled, RetryScheduled, Success - { Success: 20, RetryScheduled: 40, Failed: 40 }, + { Success: 20, RetryScheduled: 40, Failed: 40, status: 'OK' }, // Failed, RetryScheduled, RetryScheduled, Success, Success - { Success: 40, RetryScheduled: 40, Failed: 20 }, + { Success: 40, RetryScheduled: 40, Failed: 20, status: 'OK' }, + ]); + resolve(); + } catch (e) { + reject(e); + } + }); + + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); + }); + }); + + test('frequency of task run results return an error health status when failure is above a certain threshold', async () => { + const events$ = new Subject(); + + const taskPollingLifecycle = taskPollingLifecycleMock.create({ + events$: events$ as Observable, + }); + + const runningAverageWindowSize = 5; + const taskRunAggregator = createTaskRunAggregator( + taskPollingLifecycle, + runningAverageWindowSize + ); + + return new Promise((resolve, reject) => { + taskRunAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeTaskRunStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeTaskRunStat( + value, + getTaskManagerConfig({ + monitored_task_execution_thresholds: { + custom: { + 'alerting:test': { + error_threshold: 59, + warn_threshold: 39, + }, + }, + }, + }) + ).value, + })), + take(10), + bufferCount(10) + ) + .subscribe((taskStats: Array>) => { + try { + /** + * At any given time we only keep track of the last X Polling Results + * In the tests this is ocnfiugured to a window size of 5 + */ + expect( + taskStats.map( + (taskStat) => + taskStat.value.execution.result_frequency_percent_as_number['alerting:test'] + ) + ).toEqual([ + // Success + { Success: 100, RetryScheduled: 0, Failed: 0, status: 'OK' }, + // Success, Success, + { Success: 100, RetryScheduled: 0, Failed: 0, status: 'OK' }, + // Success, Success, Success + { Success: 100, RetryScheduled: 0, Failed: 0, status: 'OK' }, + // Success, Success, Success, Failed + { Success: 75, RetryScheduled: 0, Failed: 25, status: 'OK' }, + // Success, Success, Success, Failed, Failed + { Success: 60, RetryScheduled: 0, Failed: 40, status: 'warn' }, + // Success, Success, Failed, Failed, Failed + { Success: 40, RetryScheduled: 0, Failed: 60, status: 'error' }, + // Success, Failed, Failed, Failed, RetryScheduled + { Success: 20, RetryScheduled: 20, Failed: 60, status: 'error' }, + // Failed, Failed, Failed, RetryScheduled, RetryScheduled + { Success: 0, RetryScheduled: 40, Failed: 60, status: 'error' }, + // Failed, Failed, RetryScheduled, RetryScheduled, Success + { Success: 20, RetryScheduled: 40, Failed: 40, status: 'warn' }, + // Failed, RetryScheduled, RetryScheduled, Success, Success + { Success: 40, RetryScheduled: 40, Failed: 20, status: 'OK' }, ]); resolve(); } catch (e) { @@ -311,7 +403,7 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat(value).value, + value: summarizeTaskRunStat(value, getTaskManagerConfig()).value, })), tap(() => { expectedTimestamp.push(new Date().toISOString()); @@ -405,3 +497,5 @@ const mockTaskInstance = (overrides: Partial = {}): Concre ownerId: null, ...overrides, }); + +const getTaskManagerConfig = (overrides: unknown = {}) => configSchema.validate(overrides); diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index 0156d1000ad7..ddfb26a7c3c0 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -31,6 +31,7 @@ import { } from './task_run_calcultors'; import { HealthStatus } from './monitoring_stats_stream'; import { TaskPollingLifecycle } from '../polling_lifecycle'; +import { TaskExecutionFailureThreshold, TaskManagerConfig } from '../config'; interface FillPoolStat extends JsonObject { last_successful_poll: string; @@ -57,23 +58,27 @@ interface FillPoolRawStat extends JsonObject { }; } +interface ResultFrequency extends JsonObject { + [TaskRunResult.Success]: number; + [TaskRunResult.SuccessRescheduled]: number; + [TaskRunResult.RetryScheduled]: number; + [TaskRunResult.Failed]: number; +} + +type ResultFrequencySummary = ResultFrequency & { + status: HealthStatus; +}; + export interface SummarizedTaskRunStat extends JsonObject { drift: AveragedStat; execution: { duration: Record; - result_frequency_percent_as_number: Record< - string, - { - [TaskRunResult.Success]: number; - [TaskRunResult.SuccessRescheduled]: number; - [TaskRunResult.RetryScheduled]: number; - [TaskRunResult.Failed]: number; - } - >; + result_frequency_percent_as_number: Record; }; polling: FillPoolRawStat | Omit; } +const DEFAULT_EXECUTION_ERROR_THRESHOLD = 90; export function createTaskRunAggregator( taskPollingLifecycle: TaskPollingLifecycle, runningAverageWindowSize: number @@ -165,12 +170,15 @@ const DEFAULT_POLLING_FREQUENCIES = { [FillPoolResult.PoolFilled]: 0, }; -export function summarizeTaskRunStat({ - // eslint-disable-next-line @typescript-eslint/naming-convention - polling: { last_successful_poll, result_frequency_percent_as_number: pollingResultFrequency }, - drift, - execution: { duration, result_frequency_percent_as_number: executionResultFrequency }, -}: TaskRunStat): { value: SummarizedTaskRunStat; status: HealthStatus } { +export function summarizeTaskRunStat( + { + // eslint-disable-next-line @typescript-eslint/naming-convention + polling: { last_successful_poll, result_frequency_percent_as_number: pollingResultFrequency }, + drift, + execution: { duration, result_frequency_percent_as_number: executionResultFrequency }, + }: TaskRunStat, + config: TaskManagerConfig +): { value: SummarizedTaskRunStat; status: HealthStatus } { return { value: { polling: { @@ -185,13 +193,33 @@ export function summarizeTaskRunStat({ duration: mapValues(duration, (typedDurations) => calculateRunningAverage(typedDurations)), result_frequency_percent_as_number: mapValues( executionResultFrequency, - (typedResultFrequencies) => ({ - ...DEFAULT_TASK_RUN_FREQUENCIES, - ...calculateFrequency(typedResultFrequencies), - }) + (typedResultFrequencies, taskType) => + summarizeTaskExecutionResultFrequencyStat( + { + ...DEFAULT_TASK_RUN_FREQUENCIES, + ...calculateFrequency(typedResultFrequencies), + }, + config.monitored_task_execution_thresholds.custom[taskType] ?? + config.monitored_task_execution_thresholds.default + ) ), }, }, status: HealthStatus.OK, }; } + +function summarizeTaskExecutionResultFrequencyStat( + resultFrequencySummary: ResultFrequency, + executionErrorThreshold: TaskExecutionFailureThreshold +): ResultFrequencySummary { + return { + ...resultFrequencySummary, + status: + resultFrequencySummary.Failed > executionErrorThreshold.warn_threshold + ? resultFrequencySummary.Failed > executionErrorThreshold.error_threshold + ? HealthStatus.Error + : HealthStatus.Warning + : HealthStatus.OK, + }; +} diff --git a/x-pack/plugins/task_manager/server/plugin.test.ts b/x-pack/plugins/task_manager/server/plugin.test.ts index a975ad24486e..8388468164a4 100644 --- a/x-pack/plugins/task_manager/server/plugin.test.ts +++ b/x-pack/plugins/task_manager/server/plugin.test.ts @@ -22,6 +22,13 @@ describe('TaskManagerPlugin', () => { monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_required_freshness: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }); pluginInitializerContext.env.instanceUuid = ''; @@ -44,6 +51,13 @@ describe('TaskManagerPlugin', () => { monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_required_freshness: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }); const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 8e30f7be4339..8babecc6b41d 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -71,11 +71,7 @@ export class TaskManagerPlugin monitoringStats$, logger, this.taskManagerId, - // if "hot" health stats are any more stale than monitored_stats_required_freshness (pollInterval +1s buffer by default) - // consider the system unhealthy - config.monitored_stats_required_freshness, - // if "cold" health stats are any more stale than the configured refresh, consider the system unhealthy - config.monitored_aggregated_stats_refresh_rate + 1000 + config ); core.getStartServices().then(async () => { diff --git a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts index ce970f2a157e..2ee48d74f4a7 100644 --- a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts @@ -31,6 +31,13 @@ describe('TaskPollingLifecycle', () => { monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_required_freshness: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }, taskStore: mockTaskStore, logger: taskManagerLogger, diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index 9de30ef560c1..bdd4241ae6ee 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -16,6 +16,7 @@ import { loggingSystemMock } from '../../../../../src/core/server/mocks'; import { Logger } from '../../../../../src/core/server'; import { MonitoringStats, summarizeMonitoringStats } from '../monitoring'; import { ServiceStatusLevels } from 'src/core/server'; +import { configSchema, TaskManagerConfig } from '../config'; describe('healthRoute', () => { beforeEach(() => { @@ -26,7 +27,7 @@ describe('healthRoute', () => { const router = httpServiceMock.createRouter(); const logger = loggingSystemMock.create().get(); - healthRoute(router, of(), logger, uuid.v4(), 1000, 1000); + healthRoute(router, of(), logger, uuid.v4(), getTaskManagerConfig()); const [config] = router.get.mock.calls[0]; @@ -46,7 +47,16 @@ describe('healthRoute', () => { const stats$ = new Subject(); const id = uuid.v4(); - healthRoute(router, stats$, logger, id, 1000, 60000); + healthRoute( + router, + stats$, + logger, + id, + getTaskManagerConfig({ + monitored_stats_required_freshness: 1000, + monitored_aggregated_stats_refresh_rate: 60000, + }) + ); stats$.next(mockStat); await sleep(500); @@ -59,7 +69,7 @@ describe('healthRoute', () => { id, timestamp: expect.any(String), status: expect.any(String), - ...summarizeMonitoringStats(mockStat), + ...summarizeMonitoringStats(mockStat, getTaskManagerConfig({})), }); const secondDebug = JSON.parse((logger as jest.Mocked).debug.mock.calls[1][0]); @@ -67,13 +77,13 @@ describe('healthRoute', () => { id, timestamp: expect.any(String), status: expect.any(String), - ...summarizeMonitoringStats(skippedMockStat), + ...summarizeMonitoringStats(skippedMockStat, getTaskManagerConfig({})), }); expect(secondDebug).toMatchObject({ id, timestamp: expect.any(String), status: expect.any(String), - ...summarizeMonitoringStats(nextMockStat), + ...summarizeMonitoringStats(nextMockStat, getTaskManagerConfig({})), }); expect(logger.debug).toHaveBeenCalledTimes(2); @@ -89,8 +99,10 @@ describe('healthRoute', () => { stats$, loggingSystemMock.create().get(), uuid.v4(), - 1000, - 60000 + getTaskManagerConfig({ + monitored_stats_required_freshness: 1000, + monitored_aggregated_stats_refresh_rate: 60000, + }) ); const serviceStatus = getLatest(serviceStatus$); @@ -129,7 +141,8 @@ describe('healthRoute', () => { }, }, }, - }) + }), + getTaskManagerConfig({}) ), }, }); @@ -158,7 +171,8 @@ describe('healthRoute', () => { }, }, }, - }) + }), + getTaskManagerConfig({}) ), }, }); @@ -169,7 +183,16 @@ describe('healthRoute', () => { const stats$ = new Subject(); - healthRoute(router, stats$, loggingSystemMock.create().get(), uuid.v4(), 5000, 60000); + healthRoute( + router, + stats$, + loggingSystemMock.create().get(), + uuid.v4(), + getTaskManagerConfig({ + monitored_stats_required_freshness: 5000, + monitored_aggregated_stats_refresh_rate: 60000, + }) + ); await sleep(0); @@ -212,7 +235,8 @@ describe('healthRoute', () => { }, }, }, - }) + }), + getTaskManagerConfig() ), }, }); @@ -222,7 +246,16 @@ describe('healthRoute', () => { const router = httpServiceMock.createRouter(); const stats$ = new Subject(); - healthRoute(router, stats$, loggingSystemMock.create().get(), uuid.v4(), 1000, 60000); + healthRoute( + router, + stats$, + loggingSystemMock.create().get(), + uuid.v4(), + getTaskManagerConfig({ + monitored_stats_required_freshness: 1000, + monitored_aggregated_stats_refresh_rate: 60000, + }) + ); await sleep(0); @@ -268,7 +301,8 @@ describe('healthRoute', () => { }, }, }, - }) + }), + getTaskManagerConfig() ), }, }); @@ -290,6 +324,13 @@ function mockHealthStats(overrides = {}) { request_capacity: 1000, monitored_aggregated_stats_refresh_rate: 5000, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, }, }, }, @@ -334,3 +375,15 @@ function mockHealthStats(overrides = {}) { async function getLatest(stream$: Observable) { return new Promise((resolve) => stream$.pipe(take(1)).subscribe((stats) => resolve(stats))); } + +const getTaskManagerConfig = (overrides: Partial = {}) => + configSchema.validate( + overrides.monitored_stats_required_freshness + ? { + // use `monitored_stats_required_freshness` as poll interval otherwise we might + // fail validation as it must be greather than the poll interval + poll_interval: overrides.monitored_stats_required_freshness, + ...overrides, + } + : overrides + ); diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index b04b5bbd063d..33d684c76c32 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -23,6 +23,7 @@ import { HealthStatus, RawMonitoringStats, } from '../monitoring'; +import { TaskManagerConfig } from '../config'; type MonitoredHealth = RawMonitoringStats & { id: string; status: HealthStatus; timestamp: string }; @@ -37,13 +38,19 @@ export function healthRoute( monitoringStats$: Observable, logger: Logger, taskManagerId: string, - requiredHotStatsFreshness: number, - requiredColdStatsFreshness: number + config: TaskManagerConfig ): Observable { + // if "hot" health stats are any more stale than monitored_stats_required_freshness (pollInterval +1s buffer by default) + // consider the system unhealthy + const requiredHotStatsFreshness: number = config.monitored_stats_required_freshness; + + // if "cold" health stats are any more stale than the configured refresh (+ a buffer), consider the system unhealthy + const requiredColdStatsFreshness: number = config.monitored_aggregated_stats_refresh_rate * 1.5; + function calculateStatus(monitoredStats: MonitoringStats): MonitoredHealth { const now = Date.now(); const timestamp = new Date(now).toISOString(); - const summarizedStats = summarizeMonitoringStats(monitoredStats); + const summarizedStats = summarizeMonitoringStats(monitoredStats, config); /** * If the monitored stats aren't fresh, return a red status diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts index 8b20cdc2fac7..9b02b5857367 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/health_route.ts @@ -83,6 +83,13 @@ export default function ({ getService }: FtrProviderContext) { max_poll_inactivity_cycles: 10, monitored_aggregated_stats_refresh_rate: monitoredAggregatedStatsRefreshRate, monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + custom: {}, + default: { + error_threshold: 90, + warn_threshold: 80, + }, + }, request_capacity: 1000, max_workers: 10, }); From 508a72695e417c3825812dcfeccd17e49a8247a3 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 20 Oct 2020 12:58:56 +0100 Subject: [PATCH 61/67] merge config with dynamic configs in stats --- .../configuration_statistics.test.ts | 98 +++++++++++++++++++ .../monitoring/configuration_statistics.ts | 41 +++++--- .../task_manager/server/monitoring/index.ts | 4 +- .../monitoring_stats_stream.test.ts | 1 + .../monitoring/monitoring_stats_stream.ts | 4 +- .../server/monitoring/task_run_statistics.ts | 1 - x-pack/plugins/task_manager/server/plugin.ts | 6 +- 7 files changed, 138 insertions(+), 17 deletions(-) create mode 100644 x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts diff --git a/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts new file mode 100644 index 000000000000..f97861901b5b --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts @@ -0,0 +1,98 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { Subject } from 'rxjs'; +import { take, bufferCount } from 'rxjs/operators'; +import { createConfigurationAggregator } from './configuration_statistics'; +import { TaskManagerConfig } from '../config'; + +describe('Configuration Statistics Aggregator', () => { + test('merges the static config with the merged configs', async () => { + const configuration: TaskManagerConfig = { + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 6000000, + monitored_stats_required_freshness: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, + }; + + const managedConfig = { + maxWorkersConfiguration$: new Subject(), + pollIntervalConfiguration$: new Subject(), + }; + + return new Promise(async (resolve, reject) => { + createConfigurationAggregator(configuration, managedConfig) + .pipe(take(3), bufferCount(3)) + .subscribe(([initial, updatedWorkers, updatedInterval]) => { + expect(initial.value).toEqual({ + max_workers: 10, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, + }); + + expect(updatedWorkers.value).toEqual({ + max_workers: 8, + poll_interval: 6000000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, + }); + + expect(updatedInterval.value).toEqual({ + max_workers: 8, + poll_interval: 3000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, + }); + resolve(); + }, reject); + + managedConfig.maxWorkersConfiguration$.next(8); + + managedConfig.pollIntervalConfiguration$.next(3000); + }); + }); +}); diff --git a/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts index 46d3e4a35a25..22b08bc5c88d 100644 --- a/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.ts @@ -4,14 +4,14 @@ * you may not use this file except in compliance with the Elastic License. */ -import { of } from 'rxjs'; -import { pick } from 'lodash'; +import { combineLatest, of } from 'rxjs'; +import { pick, merge } from 'lodash'; +import { map, startWith } from 'rxjs/operators'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; import { TaskManagerConfig } from '../config'; +import { ManagedConfiguration } from '../lib/create_managed_configuration'; const CONFIG_FIELDS_TO_EXPOSE = [ - 'max_workers', - 'poll_interval', 'request_capacity', 'max_poll_inactivity_cycles', 'monitored_aggregated_stats_refresh_rate', @@ -19,14 +19,33 @@ const CONFIG_FIELDS_TO_EXPOSE = [ 'monitored_task_execution_thresholds', ] as const; -export type ConfigStat = Pick; +export type ConfigStat = Pick< + TaskManagerConfig, + 'max_workers' | 'poll_interval' | typeof CONFIG_FIELDS_TO_EXPOSE[number] +>; export function createConfigurationAggregator( - config: TaskManagerConfig + config: TaskManagerConfig, + managedConfig: ManagedConfiguration ): AggregatedStatProvider { - const picked: ConfigStat = pick(config, ...CONFIG_FIELDS_TO_EXPOSE); - return of({ - key: 'configuration', - value: picked, - }); + return combineLatest([ + of(pick(config, ...CONFIG_FIELDS_TO_EXPOSE)), + managedConfig.pollIntervalConfiguration$.pipe( + startWith(config.poll_interval), + map>((pollInterval) => ({ + poll_interval: pollInterval, + })) + ), + managedConfig.maxWorkersConfiguration$.pipe( + startWith(config.max_workers), + map>((maxWorkers) => ({ + max_workers: maxWorkers, + })) + ), + ]).pipe( + map((configurations) => ({ + key: 'configuration', + value: merge({}, ...configurations), + })) + ); } diff --git a/x-pack/plugins/task_manager/server/monitoring/index.ts b/x-pack/plugins/task_manager/server/monitoring/index.ts index fb6eda69a6ba..8e71ce2519a7 100644 --- a/x-pack/plugins/task_manager/server/monitoring/index.ts +++ b/x-pack/plugins/task_manager/server/monitoring/index.ts @@ -14,6 +14,7 @@ import { } from './monitoring_stats_stream'; import { TaskStore } from '../task_store'; import { TaskPollingLifecycle } from '../polling_lifecycle'; +import { ManagedConfiguration } from '../lib/create_managed_configuration'; export { MonitoringStats, @@ -28,10 +29,11 @@ export function createMonitoringStats( taskPollingLifecycle: TaskPollingLifecycle, taskStore: TaskStore, config: TaskManagerConfig, + managedConfig: ManagedConfiguration, logger: Logger ): Observable { return createMonitoringStatsStream( - createAggregators(taskPollingLifecycle, taskStore, config, logger), + createAggregators(taskPollingLifecycle, taskStore, config, managedConfig, logger), config ); } diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts index 8479def5deee..f2719d9e21bd 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts @@ -9,6 +9,7 @@ import { of, Subject } from 'rxjs'; import { take, bufferCount } from 'rxjs/operators'; import { createMonitoringStatsStream, AggregatedStat } from './monitoring_stats_stream'; import { JsonValue } from 'src/plugins/kibana_utils/common'; +import { ManagedConfiguration } from '../lib/create_managed_configuration'; beforeEach(() => { jest.resetAllMocks(); diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts index 17aaf31e68dd..374660a257c5 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -24,6 +24,7 @@ import { import { ConfigStat, createConfigurationAggregator } from './configuration_statistics'; import { TaskManagerConfig } from '../config'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; +import { ManagedConfiguration } from '../lib/create_managed_configuration'; export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; @@ -63,10 +64,11 @@ export function createAggregators( taskPollingLifecycle: TaskPollingLifecycle, taskStore: TaskStore, config: TaskManagerConfig, + managedConfig: ManagedConfiguration, logger: Logger ): AggregatedStatProvider { return merge( - createConfigurationAggregator(config), + createConfigurationAggregator(config, managedConfig), createTaskRunAggregator(taskPollingLifecycle, config.monitored_stats_running_average_window), createWorkloadAggregator( taskStore, diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index ddfb26a7c3c0..6dd533177a86 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -78,7 +78,6 @@ export interface SummarizedTaskRunStat extends JsonObject { polling: FillPoolRawStat | Omit; } -const DEFAULT_EXECUTION_ERROR_THRESHOLD = 90; export function createTaskRunAggregator( taskPollingLifecycle: TaskPollingLifecycle, runningAverageWindowSize: number diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index 8babecc6b41d..b56b6dcc1360 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -109,7 +109,7 @@ export class TaskManagerPlugin taskManagerId: `kibana:${this.taskManagerId!}`, }); - const { maxWorkersConfiguration$, pollIntervalConfiguration$ } = createManagedConfiguration({ + const managedConfiguration = createManagedConfiguration({ logger: this.logger, errors$: taskStore.errors$, startingMaxWorkers: this.config!.max_workers, @@ -122,8 +122,7 @@ export class TaskManagerPlugin logger: this.logger, taskStore, middleware: this.middleware, - maxWorkersConfiguration$, - pollIntervalConfiguration$, + ...managedConfiguration, }); this.taskPollingLifecycle = taskPollingLifecycle; @@ -131,6 +130,7 @@ export class TaskManagerPlugin taskPollingLifecycle, taskStore, this.config!, + managedConfiguration, this.logger ).subscribe((stat) => this.monitoringStats$.next(stat)); From 14faf60dbea08844623c9e5ec54afecb3b7d979a Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 20 Oct 2020 15:58:56 +0100 Subject: [PATCH 62/67] expanded test on thresholds --- .../task_manager/server/config.test.ts | 54 ++++++++++++++++++- x-pack/plugins/task_manager/server/config.ts | 10 ++-- .../monitoring_stats_stream.test.ts | 1 - 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/x-pack/plugins/task_manager/server/config.test.ts b/x-pack/plugins/task_manager/server/config.test.ts index 825ae0404f76..d2d5ac8f22a1 100644 --- a/x-pack/plugins/task_manager/server/config.test.ts +++ b/x-pack/plugins/task_manager/server/config.test.ts @@ -117,7 +117,37 @@ describe('config validation', () => { `); }); - test('the monitored_task_execution_thresholds warn_threshold must be lte error_threshold', () => { + test('the monitored_task_execution_thresholds ensures that the default warn_threshold is lt the default error_threshold', () => { + const config: Record = { + monitored_task_execution_thresholds: { + default: { + warn_threshold: 80, + error_threshold: 70, + }, + }, + }; + expect(() => { + configSchema.validate(config); + }).toThrowErrorMatchingInlineSnapshot( + `"[monitored_task_execution_thresholds.default]: warn_threshold (80) must be less than, or equal to, error_threshold (70)"` + ); + }); + + test('the monitored_task_execution_thresholds allows the default warn_threshold to equal the default error_threshold', () => { + const config: Record = { + monitored_task_execution_thresholds: { + default: { + warn_threshold: 70, + error_threshold: 70, + }, + }, + }; + expect(() => { + configSchema.validate(config); + }).not.toThrowError(); + }); + + test('the monitored_task_execution_thresholds ensures that the warn_threshold is lte error_threshold on custom thresholds', () => { const config: Record = { monitored_task_execution_thresholds: { custom: { @@ -131,7 +161,27 @@ describe('config validation', () => { expect(() => { configSchema.validate(config); }).toThrowErrorMatchingInlineSnapshot( - `"[monitored_task_execution_thresholds.custom.alerting:always-fires]: warn_threshold must be less than, or equal to, error_threshold"` + `"[monitored_task_execution_thresholds.custom.alerting:always-fires]: warn_threshold (90) must be less than, or equal to, error_threshold (80)"` ); }); + + test('the monitored_task_execution_thresholds allows a custom error_threshold which is lower than the default warn_threshold', () => { + const config: Record = { + monitored_task_execution_thresholds: { + default: { + warn_threshold: 80, + error_threshold: 90, + }, + custom: { + 'alerting:always-fires': { + error_threshold: 60, + warn_threshold: 50, + }, + }, + }, + }; + expect(() => { + configSchema.validate(config); + }).not.toThrowError(); + }); }); diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index 96825ce0cbd7..11753e90cd9b 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -28,13 +28,9 @@ export const taskExecutionFailureThresholdSchema = schema.object( }), }, { - validate: (config) => { - if ( - config.error_threshold && - config.warn_threshold && - config.warn_threshold > config.error_threshold - ) { - return `warn_threshold must be less than, or equal to, error_threshold`; + validate(config) { + if (config.error_threshold < config.warn_threshold) { + return `warn_threshold (${config.warn_threshold}) must be less than, or equal to, error_threshold (${config.error_threshold})`; } }, } diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts index f2719d9e21bd..8479def5deee 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts @@ -9,7 +9,6 @@ import { of, Subject } from 'rxjs'; import { take, bufferCount } from 'rxjs/operators'; import { createMonitoringStatsStream, AggregatedStat } from './monitoring_stats_stream'; import { JsonValue } from 'src/plugins/kibana_utils/common'; -import { ManagedConfiguration } from '../lib/create_managed_configuration'; beforeEach(() => { jest.resetAllMocks(); From c8b487945e39b16f1f65dce4e0328415e34249cb Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 20 Oct 2020 16:54:49 +0100 Subject: [PATCH 63/67] added config examples to readme --- .../plugins/task_manager/server/MONITORING.md | 28 +++++++++++++------ x-pack/plugins/task_manager/server/config.ts | 6 ++-- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/x-pack/plugins/task_manager/server/MONITORING.md b/x-pack/plugins/task_manager/server/MONITORING.md index 93b998056ea3..4960086411e9 100644 --- a/x-pack/plugins/task_manager/server/MONITORING.md +++ b/x-pack/plugins/task_manager/server/MONITORING.md @@ -16,9 +16,19 @@ There are four new configurations: - `xpack.task_manager.monitored_stats_running_average_window`- Dictates the size of the window used to calculate the running average of various "Hot" stats, such as the time it takes to run a task, the _drift_ that tasks experience etc. These stats are collected throughout the lifecycle of tasks and this window will dictate how large the queue we keep in memory would be, and how many values we need to calculate the average against. We do not calculate the average on *every* new value, but rather only when the time comes to summarize the stats before logging them or returning them to the API endpoint. - `xpack.task_manager.monitored_task_execution_thresholds`- Configures the threshold of failed task executions at which point the `warn` or `error` health status will be set either at a default level or a custom level for specific task types. This will allow you to mark the health as `error` when any task type failes 90% of the time, but set it to `error` at 50% of the time for task types that you consider critical. This value can be set to any number between 0 to 100, and a threshold is hit when the value *exceeds* this number. This means that you can avoid setting the status to `error` by setting the threshold at 100, or hit `error` the moment any task failes by setting the threshold to 0 (as it will exceed 0 once a single failer occurs). -For example: +For example, in your `Kibana.yml`: ``` - +xpack.task_manager.monitored_stats_required_freshness: 5000 +xpack.task_manager.monitored_aggregated_stats_refresh_rate: 60000 +xpack.task_manager.monitored_stats_running_average_window: 50 +xpack.task_manager.monitored_task_execution_thresholds: + default: + error_threshold: 70 + warn_threshold: 50 + custom: + "alerting:always-firing": + error_threshold: 50 + warn_threshold: 0 ``` ## Consuming Health Stats @@ -210,38 +220,38 @@ For example, if you _curl_ the `/api/task_manager/_health` endpoint, you might g "result_frequency_percent_as_number": { /* and 100% of `endpoint:user-artifact-packager` have completed in success (within the running average window, so the past 50 runs (by default, configrable by `monitored_stats_running_average_window`) */ "endpoint:user-artifact-packager": { - "status": "OK", + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "session_cleanup": { - /* `error` status as 90% of results are `Failed` */ - "status": "error", + /* `error` status as 90% of results are `Failed` */ + "status": "error", "Success": 5, "RetryScheduled": 5, "Failed": 90 }, "lens_telemetry": { - "status": "OK", + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "actions_telemetry": { - "status": "OK", + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "alerting_telemetry": { - "status": "OK", + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 }, "apm-telemetry-task": { - "status": "OK", + "status": "OK", "Success": 100, "RetryScheduled": 0, "Failed": 0 diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index 11753e90cd9b..157f01281836 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -20,11 +20,11 @@ export const taskExecutionFailureThresholdSchema = schema.object( { error_threshold: schema.number({ defaultValue: 90, - min: 1, + min: 0, }), warn_threshold: schema.number({ defaultValue: 80, - min: 1, + min: 0, }), }, { @@ -75,7 +75,7 @@ export const configSchema = schema.object( // disable the task manager rather than trying to specify it with 0 workers min: 1, }), - /* The rate at emit fresh monitored stats. By default we'll use the poll_interval (+ a slight buffer) */ + /* The rate at which we emit fresh monitored stats. By default we'll use the poll_interval (+ a slight buffer) */ monitored_stats_required_freshness: schema.number({ defaultValue: (config?: unknown) => ((config as { poll_interval: number })?.poll_interval ?? DEFAULT_POLL_INTERVAL) + 1000, From 1a5a9074f501a4a1f2804660130458c8446fda28 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Thu, 22 Oct 2020 10:33:10 +0100 Subject: [PATCH 64/67] refactored createRunningAveragedStat --- .../server/monitoring/task_run_calcultors.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts index 524615058427..67b77a29b1c7 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_calcultors.ts @@ -35,21 +35,20 @@ export function calculateFrequency(values: T[]): JsonObject { } /** - * Utility to keep track of a limited queue of values which changes over time + * Utility to keep track of a bounded array of values which changes over time * dropping older values as they slide out of the window we wish to track */ export function createRunningAveragedStat(runningAverageWindowSize: number) { - const queue = new Array(); + const list = new Array(); return (value?: T) => { - if (isUndefined(value)) { - return queue; - } else { - if (queue.length === runningAverageWindowSize) { - queue.shift(); + if (!isUndefined(value)) { + if (list.length === runningAverageWindowSize) { + list.shift(); } - queue.push(value); - return [...queue]; + list.push(value); } + // clone list to ensure it isn't mutated externally + return [...list]; }; } From 5ea830c5accef240ea330525cbe4548062ca4d2f Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 26 Oct 2020 12:03:05 +0000 Subject: [PATCH 65/67] use infered type in aggregation --- .../server/monitoring/workload_statistics.ts | 21 +++++-------------- .../plugins/task_manager/server/task_store.ts | 2 +- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts index 37bf65ba90b5..fe70f24684ad 100644 --- a/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/workload_statistics.ts @@ -9,9 +9,7 @@ import { mergeMap, map, catchError } from 'rxjs/operators'; import { Logger } from 'src/core/server'; import { JsonObject } from 'src/plugins/kibana_utils/common'; import { keyBy, mapValues } from 'lodash'; -import { ESSearchResponse } from '../../../apm/typings/elasticsearch'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; -import { ConcreteTaskInstance } from '../task'; import { parseIntervalAsSecond, asInterval, parseIntervalAsMillisecond } from '../lib/intervals'; import { AggregationResultOf } from '../../../apm/typings/elasticsearch/aggregations'; import { HealthStatus } from './monitoring_stats_stream'; @@ -109,7 +107,7 @@ export function createWorkloadAggregator( return timer(0, refreshInterval).pipe( mergeMap(() => - taskStore.aggregate({ + taskStore.aggregate({ aggs: { taskType: { terms: { field: 'task.taskType' }, @@ -166,7 +164,7 @@ export function createWorkloadAggregator( }, }) ), - map((result: ESSearchResponse) => { + map((result) => { const { aggregations, hits: { @@ -185,22 +183,13 @@ export function createWorkloadAggregator( throw new Error(`Invalid workload: ${JSON.stringify(result)}`); } - const taskTypes = (aggregations.taskType as AggregationResultOf< - WorkloadAggregation['aggs']['taskType'], - {} - >).buckets; - const schedules = (aggregations.schedule as AggregationResultOf< - WorkloadAggregation['aggs']['schedule'], - {} - >).buckets; + const taskTypes = aggregations.taskType.buckets; + const schedules = aggregations.schedule.buckets; const { overdue: { doc_count: overdue }, scheduleDensity: { buckets: [scheduleDensity] = [] } = {}, - } = aggregations.idleTasks as AggregationResultOf< - WorkloadAggregation['aggs']['idleTasks'], - {} - >; + } = aggregations.idleTasks; const summary: WorkloadStat = { count, diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index 7f231731db01..4f60a01c14da 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -510,7 +510,7 @@ export class TaskStore { aggs, query, size = 0, - }: AggregationOpts) { + }: TSearchRequest): Promise> { const { body } = await this.esClient.search< ESSearchResponse >({ From 9b485390ecf0eebc6b867a3244db055424a305de Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 27 Oct 2020 10:09:31 +0000 Subject: [PATCH 66/67] added missing configs to docs --- x-pack/plugins/task_manager/server/README.md | 2 ++ x-pack/plugins/task_manager/server/routes/health.ts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/x-pack/plugins/task_manager/server/README.md b/x-pack/plugins/task_manager/server/README.md index 4eb8a78cb4d9..a0b35ad09453 100644 --- a/x-pack/plugins/task_manager/server/README.md +++ b/x-pack/plugins/task_manager/server/README.md @@ -50,6 +50,8 @@ The task_manager can be configured via `taskManager` config options (e.g. `taskM - This allows sysadmins to tweak the operational performance of Kibana, allowing more or fewer tasks of a specific type to run simultaneously - `monitored_aggregated_stats_refresh_rate` - Dictates how often we refresh the "Cold" metrics. Learn More: [./MONITORING](./MONITORING.MD) - `monitored_stats_running_average_window`- Dictates the size of the window used to calculate the running average of various "Hot" stats. Learn More: [./MONITORING](./MONITORING.MD) +- `monitored_stats_required_freshness` - Dictates the _required freshness_ of critical "Hot" stats. Learn More: [./MONITORING](./MONITORING.MD) +- `monitored_task_execution_thresholds`- Dictates the threshold of failed task executions. Learn More: [./MONITORING](./MONITORING.MD) ## Task definitions diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index 33d684c76c32..8ddd728063d2 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -84,7 +84,7 @@ export function healthRoute( ) .subscribe(([monitoredHealth, serviceStatus]) => { serviceStatus$.next(serviceStatus); - logger.debug(JSON.stringify(monitoredHealth)); + logger.debug(`Latest Monitored Stats: ${JSON.stringify(monitoredHealth)}`); }); router.get( From e59ebc0ce51e241d1ced1da4f181147592bc1016 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Tue, 27 Oct 2020 11:35:33 +0000 Subject: [PATCH 67/67] removed text in JSOPN parse --- x-pack/plugins/task_manager/server/routes/health.test.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index bdd4241ae6ee..5a0cef8eda94 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -64,7 +64,9 @@ describe('healthRoute', () => { await sleep(600); stats$.next(nextMockStat); - const firstDebug = JSON.parse((logger as jest.Mocked).debug.mock.calls[0][0]); + const firstDebug = JSON.parse( + (logger as jest.Mocked).debug.mock.calls[0][0].replace('Latest Monitored Stats: ', '') + ); expect(firstDebug).toMatchObject({ id, timestamp: expect.any(String), @@ -72,7 +74,9 @@ describe('healthRoute', () => { ...summarizeMonitoringStats(mockStat, getTaskManagerConfig({})), }); - const secondDebug = JSON.parse((logger as jest.Mocked).debug.mock.calls[1][0]); + const secondDebug = JSON.parse( + (logger as jest.Mocked).debug.mock.calls[1][0].replace('Latest Monitored Stats: ', '') + ); expect(secondDebug).not.toMatchObject({ id, timestamp: expect.any(String),