From f016398f8ba657a6cdb3392fddd1c673766f6321 Mon Sep 17 00:00:00 2001 From: Patrick Mueller <patrick.mueller@elastic.co> Date: Thu, 13 Jun 2024 16:04:23 -0400 Subject: [PATCH] [ResponseOps] implement task claiming strategy mget (#180485) resolves: https://github.com/elastic/kibana/issues/181325 ## Summary Adds a new task claiming strategy `unsafe_mget`, which can be used instead of the default one `default`. Add the following to your `kibana.yml` to enable it: xpack.task_manager.claim_strategy: 'unsafe_mget' --- .buildkite/ftr_configs.yml | 1 + .github/CODEOWNERS | 2 + package.json | 1 + tsconfig.base.json | 2 + .../task_manager/server/config.test.ts | 9 +- x-pack/plugins/task_manager/server/config.ts | 7 +- .../manual_tests/get_rule_run_event_logs.js | 232 ++++ .../server/monitoring/task_run_statistics.ts | 3 +- .../server/polling/task_poller.ts | 12 +- .../task_manager/server/polling_lifecycle.ts | 21 +- .../mark_available_tasks_as_claimed.test.ts | 53 + .../mark_available_tasks_as_claimed.ts | 56 + .../server/queries/task_claiming.test.ts | 30 +- .../server/queries/task_claiming.ts | 5 +- x-pack/plugins/task_manager/server/task.ts | 11 + .../server/task_claimers/README.md | 14 + .../server/task_claimers/index.test.ts | 23 +- .../server/task_claimers/index.ts | 30 +- .../task_claimers/strategy_default.test.ts | 22 +- .../server/task_claimers/strategy_default.ts | 56 +- .../task_claimers/strategy_mget.test.ts | 463 +++++++ .../server/task_claimers/strategy_mget.ts | 326 +++++ .../task_manager/server/task_store.mock.ts | 2 + .../task_manager/server/task_store.test.ts | 197 +++ .../plugins/task_manager/server/task_store.ts | 84 +- .../server/task_type_dictionary.ts | 4 + .../test/task_manager_claimer_mget/config.ts | 40 + .../ftr_provider_context.d.ts | 12 + .../sample_task_plugin_mget/kibana.jsonc | 13 + .../sample_task_plugin_mget/package.json | 14 + .../sample_task_plugin_mget/server/index.ts | 13 + .../server/init_routes.ts | 404 ++++++ .../sample_task_plugin_mget/server/plugin.ts | 409 ++++++ .../sample_task_plugin_mget/tsconfig.json | 18 + .../task_manager_claimer_mget/services.ts | 8 + .../background_task_utilization_route.ts | 103 ++ .../test_suites/task_manager/health_route.ts | 339 +++++ .../test_suites/task_manager/index.ts | 22 + .../test_suites/task_manager/metrics_route.ts | 328 +++++ .../test_suites/task_manager/migrations.ts | 276 ++++ .../task_manager/task_management.ts | 1170 +++++++++++++++++ .../task_management_removed_types.ts | 108 ++ .../task_management_scheduled_at.ts | 59 + .../test_suites/task_manager/task_priority.ts | 216 +++ yarn.lock | 4 + 45 files changed, 5106 insertions(+), 116 deletions(-) create mode 100644 x-pack/plugins/task_manager/server/manual_tests/get_rule_run_event_logs.js create mode 100644 x-pack/plugins/task_manager/server/task_claimers/strategy_mget.test.ts create mode 100644 x-pack/plugins/task_manager/server/task_claimers/strategy_mget.ts create mode 100644 x-pack/test/task_manager_claimer_mget/config.ts create mode 100644 x-pack/test/task_manager_claimer_mget/ftr_provider_context.d.ts create mode 100644 x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/kibana.jsonc create mode 100644 x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/package.json create mode 100644 x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/index.ts create mode 100644 x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/init_routes.ts create mode 100644 x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/plugin.ts create mode 100644 x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/tsconfig.json create mode 100644 x-pack/test/task_manager_claimer_mget/services.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/background_task_utilization_route.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/health_route.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/index.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/metrics_route.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/migrations.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_removed_types.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_scheduled_at.ts create mode 100644 x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_priority.ts diff --git a/.buildkite/ftr_configs.yml b/.buildkite/ftr_configs.yml index 76a0bd83613f9..433ab8b78b7a9 100644 --- a/.buildkite/ftr_configs.yml +++ b/.buildkite/ftr_configs.yml @@ -405,6 +405,7 @@ enabled: - x-pack/test/spaces_api_integration/security_and_spaces/config_trial.ts - x-pack/test/spaces_api_integration/security_and_spaces/copy_to_space_config_trial.ts - x-pack/test/spaces_api_integration/spaces_only/config.ts + - x-pack/test/task_manager_claimer_mget/config.ts - x-pack/test/ui_capabilities/security_and_spaces/config.ts - x-pack/test/ui_capabilities/spaces_only/config.ts - x-pack/test/upgrade_assistant_integration/config.js diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4d7cc07659dde..e8e33cfec9a8f 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -695,6 +695,7 @@ x-pack/plugins/runtime_fields @elastic/kibana-management packages/kbn-safer-lodash-set @elastic/kibana-security x-pack/test/security_api_integration/plugins/saml_provider @elastic/kibana-security x-pack/test/plugin_api_integration/plugins/sample_task_plugin @elastic/response-ops +x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget @elastic/response-ops test/plugin_functional/plugins/saved_object_export_transforms @elastic/kibana-core test/plugin_functional/plugins/saved_object_import_warnings @elastic/kibana-core x-pack/test/saved_object_api_integration/common/plugins/saved_object_test_plugin @elastic/kibana-security @@ -1326,6 +1327,7 @@ x-pack/plugins/cloud_integrations/cloud_full_story/server/config.ts @elastic/kib /x-pack/test/alerting_api_integration/observability @elastic/obs-ux-management-team /x-pack/test/plugin_api_integration/test_suites/task_manager/ @elastic/response-ops /x-pack/test/functional_with_es_ssl/apps/triggers_actions_ui/ @elastic/response-ops +/x-pack/test/task_manager_claimer_mget/ @elastic/response-ops /docs/user/alerting/ @elastic/response-ops /docs/management/connectors/ @elastic/response-ops /x-pack/test/cases_api_integration/ @elastic/response-ops diff --git a/package.json b/package.json index b142765c25ed8..17312cf729911 100644 --- a/package.json +++ b/package.json @@ -706,6 +706,7 @@ "@kbn/safer-lodash-set": "link:packages/kbn-safer-lodash-set", "@kbn/saml-provider-plugin": "link:x-pack/test/security_api_integration/plugins/saml_provider", "@kbn/sample-task-plugin": "link:x-pack/test/plugin_api_integration/plugins/sample_task_plugin", + "@kbn/sample-task-plugin-mget": "link:x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget", "@kbn/saved-object-export-transforms-plugin": "link:test/plugin_functional/plugins/saved_object_export_transforms", "@kbn/saved-object-import-warnings-plugin": "link:test/plugin_functional/plugins/saved_object_import_warnings", "@kbn/saved-object-test-plugin": "link:x-pack/test/saved_object_api_integration/common/plugins/saved_object_test_plugin", diff --git a/tsconfig.base.json b/tsconfig.base.json index da0daf44cd411..69f94542c779a 100644 --- a/tsconfig.base.json +++ b/tsconfig.base.json @@ -1384,6 +1384,8 @@ "@kbn/saml-provider-plugin/*": ["x-pack/test/security_api_integration/plugins/saml_provider/*"], "@kbn/sample-task-plugin": ["x-pack/test/plugin_api_integration/plugins/sample_task_plugin"], "@kbn/sample-task-plugin/*": ["x-pack/test/plugin_api_integration/plugins/sample_task_plugin/*"], + "@kbn/sample-task-plugin-mget": ["x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget"], + "@kbn/sample-task-plugin-mget/*": ["x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/*"], "@kbn/saved-object-export-transforms-plugin": ["test/plugin_functional/plugins/saved_object_export_transforms"], "@kbn/saved-object-export-transforms-plugin/*": ["test/plugin_functional/plugins/saved_object_export_transforms/*"], "@kbn/saved-object-import-warnings-plugin": ["test/plugin_functional/plugins/saved_object_import_warnings"], diff --git a/x-pack/plugins/task_manager/server/config.test.ts b/x-pack/plugins/task_manager/server/config.test.ts index e30fb170910f9..bb59a73a305d6 100644 --- a/x-pack/plugins/task_manager/server/config.test.ts +++ b/x-pack/plugins/task_manager/server/config.test.ts @@ -242,12 +242,7 @@ describe('config validation', () => { }).not.toThrowError(); }); - test('the claim strategy is validated', () => { - const config = { claim_strategy: 'invalid-strategy' }; - expect(() => { - configSchema.validate(config); - }).toThrowErrorMatchingInlineSnapshot( - `"The claim strategy is invalid: Unknown task claiming strategy (invalid-strategy)"` - ); + test('any claim strategy is valid', () => { + configSchema.validate({ claim_strategy: 'anything!' }); }); }); diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index 54783018eed1f..eec63c5be489c 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -6,7 +6,6 @@ */ import { schema, TypeOf } from '@kbn/config-schema'; -import { getTaskClaimer } from './task_claimers'; export const MAX_WORKERS_LIMIT = 100; export const DEFAULT_MAX_WORKERS = 10; @@ -27,6 +26,7 @@ export const DEFAULT_METRICS_RESET_INTERVAL = 30 * 1000; // 30 seconds export const DEFAULT_WORKER_UTILIZATION_RUNNING_AVERAGE_WINDOW = 5; export const CLAIM_STRATEGY_DEFAULT = 'default'; +export const CLAIM_STRATEGY_MGET = 'unsafe_mget'; export const taskExecutionFailureThresholdSchema = schema.object( { @@ -165,11 +165,6 @@ export const configSchema = schema.object( ) { return `The specified monitored_stats_required_freshness (${config.monitored_stats_required_freshness}) is invalid, as it is below the poll_interval (${config.poll_interval})`; } - try { - getTaskClaimer(config.claim_strategy); - } catch (err) { - return `The claim strategy is invalid: ${err.message}`; - } }, } ); diff --git a/x-pack/plugins/task_manager/server/manual_tests/get_rule_run_event_logs.js b/x-pack/plugins/task_manager/server/manual_tests/get_rule_run_event_logs.js new file mode 100644 index 0000000000000..5b1875b208cc4 --- /dev/null +++ b/x-pack/plugins/task_manager/server/manual_tests/get_rule_run_event_logs.js @@ -0,0 +1,232 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +const DOCS_TO_FETCH = 10000; + +// Get the event logs from multiple clusters, focusing on rule runs +// as they test recurring activity easily, and augmenting with other +// bits, producing a single .ndjson file for all clusters. +main(); + +async function main() { + // get urls and their host names + const urls = process.argv.slice(2); + const urlNoCreds = urls.map((url) => new URL(url)).map((url) => url?.origin || 'unknown'); + const urlHosts = urls + .map((url) => new URL(url)) + .map((url) => url?.host || 'unknown') + .map((url) => url.split('.')[0]); + + if (urls.length === 0) return help(); + + // get the event logs + const docPromises = urls.map(getRuleRunEventDocs); + const docResults = await Promise.allSettled(docPromises); + + /** @type { any[][] } */ + const serverDocs = []; + + // log errors, and add urls to event logs + for (let i = 0; i < urls.length; i++) { + const url = urls[i]; + const docResult = docResults[i]; + if (docResult.status === 'rejected') { + console.error(`Failed to get docs from ${url}: ${docResult.reason}`); + } else { + for (const doc of docResult.value) { + if (!doc.kibana) doc.kibana = {}; + // add/remove some bits - remove to save space + doc.kibana.url = urlNoCreds[i]; + doc.kibana.host = urlHosts[i]; + delete doc.kibana.saved_objects; + delete doc.kibana.space_ids; + + if (!doc.event) doc.event = {}; + if (doc.event.start) doc.event.startMs = new Date(doc.event.start).valueOf(); + if (doc.event.end) doc.event.endMs = new Date(doc.event.end).valueOf(); + if (doc.event.endMs && doc.event.startMs) + doc.event.durationMs = doc.event.endMs - doc.event.startMs; + } + serverDocs.push(docResult.value); + } + } + + // for each server's docs, apply a worker id + for (const docs of serverDocs) { + // sort ascending by timestamp + docs.sort((a, b) => a.event.startMs - b.event.startMs); + + assignWorkerIds(docs); + + for (const doc of docs) { + console.log(JSON.stringify(doc)); + } + } +} + +class Worker { + /** @param { string } id */ + constructor(id) { + this.id = id; + /** @type { number | undefined } */ + this.nextEnd = undefined; + /** @type { number | undefined } */ + this.lastEnd = undefined; + } + + /** @type { (currentDate: number) => void } */ + update(currentDate) { + if (currentDate >= this.nextEnd) { + this.lastEnd = this.nextEnd; + this.nextEnd = undefined; + } + } + + /** @type { () => boolean } */ + isAvailable() { + return this.nextEnd === undefined; + } + + /** @type { (end: number) => void } */ + claimTill(end) { + this.nextEnd = end; + } +} + +class Workers { + constructor() { + /** @type { Map<string, Worker[]> } */ + this.workersByServer = new Map(); + + /** @type { Map<string, string> } */ + this.serverMap = new Map(); + } + + /** @type { (doc: any) => string } */ + getServerId(doc) { + const { server_uuid: serverUuid } = doc?.kibana || {}; + return this.serverMap.get(serverUuid) || 'unknown'; + } + + /** @type { (doc: any) => Worker } */ + getAvailableWorker(doc) { + const { startMs, endMs } = doc?.event || {}; + const { server_uuid: serverUuid } = doc?.kibana || {}; + if (!this.serverMap.has(serverUuid)) { + this.serverMap.set(serverUuid, `${this.serverMap.size + 1}`); + } + + const workers = this.getWorkersForServer(serverUuid); + + for (const worker of workers) { + worker.update(startMs); + if (worker.isAvailable()) { + worker.claimTill(endMs); + return worker; + } + } + const worker = new Worker(workers.length + 1); + worker.claimTill(endMs); + workers.push(worker); + + return worker; + } + + /** @type { (serverUuid) => Worker[] } */ + getWorkersForServer(serverUuid) { + let workers = this.workersByServer.get(serverUuid); + if (workers !== undefined) return workers; + + workers = []; + this.workersByServer.set(serverUuid, workers); + return workers; + } +} + +/** @type { (docs: any[]) => void } */ +function assignWorkerIds(docs) { + const workers = new Workers(); + for (const doc of docs) { + const worker = workers.getAvailableWorker(doc); + const serverId = workers.getServerId(doc).padStart(3, '0'); + const workerId = `${worker.id}`.padStart(3, '0'); + doc.kibana.worker = `${serverId}-${workerId}`; + doc.event.preIdleMs = worker.lastEnd ? doc.event.startMs - worker.lastEnd : 0; + } +} + +/** @type { (url: string) => Promise<any[]>} */ +async function getRuleRunEventDocs(url) { + const parsedUrl = new URL(url); + const indices = `.kibana-event-log,.kibana-event-log-ds`; + const options = `expand_wildcards=all&ignore_unavailable=true`; + const searchUrl = `${parsedUrl.origin}/${indices}/_search?${options}`; + const query = getQuery(); + const authHeader = getAuthHeader(parsedUrl.username, parsedUrl.password); + const headers = { + 'Content-Type': 'application/json', + ...(authHeader ? { Authorization: authHeader } : {}), + }; + const fetchResult = await fetch(searchUrl, { + method: 'POST', + headers, + body: JSON.stringify(query), + }); + + if (!fetchResult.ok) { + const text = await fetchResult.text(); + throw new Error(`Failed to fetch from ${searchUrl}: ${fetchResult.statusText}\n${text}`); + } + + const result = await fetchResult.json(); + const sources = result.hits.hits.map((hit) => hit._source); + + return sources; +} + +/** @type { (username: string, password: string) => string | undefined } */ +function getAuthHeader(username, password) { + if (!username || !password) return undefined; + if (username.toUpperCase() === 'APIKEY') return `ApiKey ${password}`; + const encoded = Buffer.from(`${username}:${password}`).toString('base64'); + return `Basic ${encoded}`; +} + +/** @type { (size: number) => any} */ +function getQuery() { + return { + size: DOCS_TO_FETCH, + query: { + bool: { + filter: [ + { term: { 'event.provider': 'alerting' } }, + { term: { 'event.action': 'execute' } }, + ], + }, + }, + sort: [{ '@timestamp': { order: 'desc' } }], + }; +} + +function help() { + console.error(` +usage: [this-command] <es-url1> <es-url2> ... <es-urlN> + +Will fetch rule execution event logs from each url, and augment them: +- adds event.startMs - event.start as an epoch number +- adds event.endMs - event.end as an epoch number +- adds event.durationMs - event.end as an epoch number +- adds event.preIdleMs - time worker was idle before this +- adds kibana.url - the URL passed in (which is actually ES) +- adds kibana.host - just the host name from that URL +- adds kibana.worker - worker in form of nodeId-workerId (unique only by url) +- deletes kibana.saved_objects - not needed and confusing +- deletes kibana.space_ids - not needed + +The output is a single .ndjson file with all the docs. +`); +} diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index 27305a93f5de3..cd75436e1c33a 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -190,7 +190,8 @@ export function createTaskRunAggregator( (taskEvent: TaskLifecycleEvent) => isTaskManagerStatEvent(taskEvent) && taskEvent.id === 'pollingDelay' ), - map(() => new Date().toISOString()) + map(() => new Date().toISOString()), + startWith(new Date().toISOString()) ), // get the average ratio of polled tasks by their persistency taskPollingLifecycle.events.pipe( diff --git a/x-pack/plugins/task_manager/server/polling/task_poller.ts b/x-pack/plugins/task_manager/server/polling/task_poller.ts index 570ecc8686693..64d17fad2f81a 100644 --- a/x-pack/plugins/task_manager/server/polling/task_poller.ts +++ b/x-pack/plugins/task_manager/server/polling/task_poller.ts @@ -22,7 +22,7 @@ interface Opts<H> { logger: Logger; initialPollInterval: number; pollInterval$: Observable<number>; - pollIntervalDelay$: Observable<number>; + pollIntervalDelay$?: Observable<number>; getCapacity: () => number; work: WorkFn<H>; } @@ -99,10 +99,12 @@ export function createTaskPoller<T, H>({ pollInterval = interval; logger.debug(`Task poller now using interval of ${interval}ms`); }); - pollIntervalDelay$.subscribe((delay) => { - pollIntervalDelay = delay; - logger.debug(`Task poller now delaying emission by ${delay}ms`); - }); + if (pollIntervalDelay$) { + pollIntervalDelay$.subscribe((delay) => { + pollIntervalDelay = delay; + logger.debug(`Task poller now delaying emission by ${delay}ms`); + }); + } hasSubscribed = true; } diff --git a/x-pack/plugins/task_manager/server/polling_lifecycle.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.ts index e21cfaa4f7cec..35fc48423f710 100644 --- a/x-pack/plugins/task_manager/server/polling_lifecycle.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.ts @@ -14,7 +14,7 @@ import type { Logger, ExecutionContextStart } from '@kbn/core/server'; import { Result, asErr, mapErr, asOk, map, mapOk } from './lib/result_type'; import { ManagedConfiguration } from './lib/create_managed_configuration'; -import { TaskManagerConfig } from './config'; +import { TaskManagerConfig, CLAIM_STRATEGY_DEFAULT } from './config'; import { TaskMarkRunning, @@ -154,15 +154,18 @@ export class TaskPollingLifecycle implements ITaskEventEmitter<TaskLifecycleEven // pipe taskClaiming events into the lifecycle event stream this.taskClaiming.events.subscribe(emitEvent); - const { poll_interval: pollInterval } = config; + const { poll_interval: pollInterval, claim_strategy: claimStrategy } = config; - const pollIntervalDelay$ = delayOnClaimConflicts( - maxWorkersConfiguration$, - pollIntervalConfiguration$, - this.events$, - config.version_conflict_threshold, - config.monitored_stats_running_average_window - ).pipe(tap((delay) => emitEvent(asTaskManagerStatEvent('pollingDelay', asOk(delay))))); + let pollIntervalDelay$: Observable<number> | undefined; + if (claimStrategy === CLAIM_STRATEGY_DEFAULT) { + pollIntervalDelay$ = delayOnClaimConflicts( + maxWorkersConfiguration$, + pollIntervalConfiguration$, + this.events$, + config.version_conflict_threshold, + config.monitored_stats_running_average_window + ).pipe(tap((delay) => emitEvent(asTaskManagerStatEvent('pollingDelay', asOk(delay))))); + } const poller = createTaskPoller<string, TimedFillPoolResult>({ logger, diff --git a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts index b3a45e0fcc636..e884683926b2b 100644 --- a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts +++ b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.test.ts @@ -14,6 +14,8 @@ import { RunningOrClaimingTaskWithExpiredRetryAt, SortByRunAtAndRetryAt, EnabledTask, + InactiveTasks, + RecognizedTask, OneOfTaskTypes, } from './mark_available_tasks_as_claimed'; @@ -171,6 +173,57 @@ if (doc['task.runAt'].size()!=0) { }); }); + test('generates InactiveTasks clause as expected', () => { + expect(InactiveTasks).toMatchInlineSnapshot(` + Object { + "bool": Object { + "must_not": Array [ + Object { + "bool": Object { + "minimum_should_match": 1, + "must": Object { + "range": Object { + "task.retryAt": Object { + "gt": "now", + }, + }, + }, + "should": Array [ + Object { + "term": Object { + "task.status": "running", + }, + }, + Object { + "term": Object { + "task.status": "claiming", + }, + }, + ], + }, + }, + ], + }, + } + `); + }); + + test('generates RecognizedTask clause as expected', () => { + expect(RecognizedTask).toMatchInlineSnapshot(` + Object { + "bool": Object { + "must_not": Array [ + Object { + "term": Object { + "task.status": "unrecognized", + }, + }, + ], + }, + } + `); + }); + describe(`script`, () => { test('it marks the update as a noop if the type is skipped', async () => { const taskManagerId = '3478fg6-82374f6-83467gf5-384g6f'; diff --git a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.ts b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.ts index f76c033bd9a14..0c241aeef14b8 100644 --- a/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.ts +++ b/x-pack/plugins/task_manager/server/queries/mark_available_tasks_as_claimed.ts @@ -5,6 +5,8 @@ * 2.0. */ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import { TaskTypeDictionary } from '../task_type_dictionary'; +import { TaskStatus, TaskPriority } from '../task'; import { ScriptBasedSortClause, ScriptClause, @@ -65,6 +67,8 @@ export const InactiveTasks: MustNotCondition = { { bool: { should: [{ term: { 'task.status': 'running' } }, { term: { 'task.status': 'claiming' } }], + // needed since default value is 0 when there is a `must` in the `bool` + minimum_should_match: 1, must: { range: { 'task.retryAt': { gt: 'now' } } }, }, }, @@ -84,6 +88,18 @@ export const EnabledTask: MustCondition = { }, }; +export const RecognizedTask: MustNotCondition = { + bool: { + must_not: [ + { + term: { + 'task.status': TaskStatus.Unrecognized, + }, + }, + ], + }, +}; + export const RunningOrClaimingTaskWithExpiredRetryAt: MustCondition = { bool: { must: [ @@ -116,6 +132,46 @@ if (doc['task.runAt'].size()!=0) { }; export const SortByRunAtAndRetryAt = SortByRunAtAndRetryAtScript as estypes.SortCombinations; +function getSortByPriority(definitions: TaskTypeDictionary): estypes.SortCombinations | undefined { + if (definitions.size() === 0) return; + + return { + _script: { + type: 'number', + order: 'desc', + script: { + lang: 'painless', + // Use priority if explicitly specified in task definition, otherwise default to 50 (Normal) + // TODO: we could do this locally as well, but they may starve + source: ` + String taskType = doc['task.taskType'].value; + if (params.priority_map.containsKey(taskType)) { + return params.priority_map[taskType]; + } else { + return ${TaskPriority.Normal}; + } + `, + params: { + priority_map: definitions + .getAllDefinitions() + .reduce<Record<string, TaskPriority>>((acc, taskDefinition) => { + if (taskDefinition.priority) { + acc[taskDefinition.type] = taskDefinition.priority; + } + return acc; + }, {}), + }, + }, + }, + }; +} + +export function getClaimSort(definitions: TaskTypeDictionary): estypes.SortCombinations[] { + const sortByPriority = getSortByPriority(definitions); + if (!sortByPriority) return [SortByRunAtAndRetryAt]; + return [sortByPriority, SortByRunAtAndRetryAt]; +} + export interface UpdateFieldsAndMarkAsFailedOpts { fieldUpdates: { [field: string]: string | number | Date; diff --git a/x-pack/plugins/task_manager/server/queries/task_claiming.test.ts b/x-pack/plugins/task_manager/server/queries/task_claiming.test.ts index 6c3b2dbc3d694..33e5a0074319d 100644 --- a/x-pack/plugins/task_manager/server/queries/task_claiming.test.ts +++ b/x-pack/plugins/task_manager/server/queries/task_claiming.test.ts @@ -66,21 +66,23 @@ describe('TaskClaiming', () => { .mockImplementation(() => mockApmTrans as any); }); - test(`should throw an error when invalid strategy specified`, () => { + test(`should log a warning when invalid strategy specified`, () => { const definitions = new TaskTypeDictionary(mockLogger()); - expect(() => { - new TaskClaiming({ - logger: taskManagerLogger, - strategy: 'non-default', - definitions, - excludedTaskTypes: [], - unusedTypes: [], - taskStore: taskStoreMock.create({ taskManagerId: '' }), - maxAttempts: 2, - getCapacity: () => 10, - }); - }).toThrowErrorMatchingInlineSnapshot(`"Unknown task claiming strategy (non-default)"`); + new TaskClaiming({ + logger: taskManagerLogger, + strategy: 'non-default', + definitions, + excludedTaskTypes: [], + unusedTypes: [], + taskStore: taskStoreMock.create({ taskManagerId: '' }), + maxAttempts: 2, + getCapacity: () => 10, + }); + + expect(taskManagerLogger.warn).toHaveBeenCalledWith( + 'Unknown task claiming strategy "non-default", falling back to default' + ); }); test(`should log when a certain task type is skipped due to having a zero concurency configuration`, () => { @@ -127,7 +129,7 @@ describe('TaskClaiming', () => { getCapacity: () => 10, }); - expect(taskManagerLogger.info).toHaveBeenCalledTimes(1); + expect(taskManagerLogger.info).toHaveBeenCalledTimes(2); expect(taskManagerLogger.info.mock.calls[0][0]).toMatchInlineSnapshot( `"Task Manager will never claim tasks of the following types as their \\"maxConcurrency\\" is set to 0: limitedToZero, anotherLimitedToZero"` ); diff --git a/x-pack/plugins/task_manager/server/queries/task_claiming.ts b/x-pack/plugins/task_manager/server/queries/task_claiming.ts index 8216e7c8d8dfb..ffd053656d72d 100644 --- a/x-pack/plugins/task_manager/server/queries/task_claiming.ts +++ b/x-pack/plugins/task_manager/server/queries/task_claiming.ts @@ -109,8 +109,10 @@ export class TaskClaiming { this.taskMaxAttempts = Object.fromEntries(this.normalizeMaxAttempts(this.definitions)); this.excludedTaskTypes = opts.excludedTaskTypes; this.unusedTypes = opts.unusedTypes; - this.taskClaimer = getTaskClaimer(opts.strategy); + this.taskClaimer = getTaskClaimer(this.logger, opts.strategy); this.events$ = new Subject<TaskClaim>(); + + this.logger.info(`using task claiming strategy: ${opts.strategy}`); } private partitionIntoClaimingBatches(definitions: TaskTypeDictionary): TaskClaimingBatches { @@ -175,6 +177,7 @@ export class TaskClaiming { definitions: this.definitions, taskMaxAttempts: this.taskMaxAttempts, excludedTaskTypes: this.excludedTaskTypes, + logger: this.logger, }; return this.taskClaimer(opts).pipe(map((claimResult) => asOk(claimResult))); } diff --git a/x-pack/plugins/task_manager/server/task.ts b/x-pack/plugins/task_manager/server/task.ts index 89c950dabf687..054b8f4686388 100644 --- a/x-pack/plugins/task_manager/server/task.ts +++ b/x-pack/plugins/task_manager/server/task.ts @@ -428,6 +428,17 @@ export interface ConcreteTaskInstance extends TaskInstance { ownerId: string | null; } +export interface ConcreteTaskInstanceVersion { + /** The _id of the the document (not the SO id) */ + esId: string; + /** The _seq_no of the document when using seq_no_primary_term on fetch */ + seqNo?: number; + /** The _primary_term of the document when using seq_no_primary_term on fetch */ + primaryTerm?: number; + /** The error found if trying to resolve the version info for this esId */ + error?: string; +} + /** * A task instance that has an id and is ready for storage. */ diff --git a/x-pack/plugins/task_manager/server/task_claimers/README.md b/x-pack/plugins/task_manager/server/task_claimers/README.md index 0c92f02031d2e..210e08b021af0 100644 --- a/x-pack/plugins/task_manager/server/task_claimers/README.md +++ b/x-pack/plugins/task_manager/server/task_claimers/README.md @@ -18,3 +18,17 @@ idea: - A search is then run on the documents updated from the update by query. + +`mget` task claiming strategy +------------------------------------------------------------------------ + +see: https://github.com/elastic/kibana/issues/155770 + +The idea is to get more tasks than we have workers for with a search, +and then validate that they are still valid (not been claimed) with an +mget, since they may be stale. + +There are lots of interesting potential things we can do here, like maybe +skipping polling completely for a round (think single Kibana, and the earlier +poll got 2 * workers tasks out). But we'll probably start with the bare +minimum to get it working. diff --git a/x-pack/plugins/task_manager/server/task_claimers/index.test.ts b/x-pack/plugins/task_manager/server/task_claimers/index.test.ts index be26f0d2f9efb..406a719baf5b1 100644 --- a/x-pack/plugins/task_manager/server/task_claimers/index.test.ts +++ b/x-pack/plugins/task_manager/server/task_claimers/index.test.ts @@ -6,18 +6,33 @@ */ import { getTaskClaimer } from '.'; +import { mockLogger } from '../test_utils'; import { claimAvailableTasksDefault } from './strategy_default'; +import { claimAvailableTasksMget } from './strategy_mget'; + +const logger = mockLogger(); describe('task_claimers/index', () => { + beforeEach(() => jest.resetAllMocks()); + describe('getTaskClaimer()', () => { test('returns expected result for default', () => { - const taskClaimer = getTaskClaimer('default'); + const taskClaimer = getTaskClaimer(logger, 'default'); expect(taskClaimer).toBe(claimAvailableTasksDefault); + expect(logger.warn).not.toHaveBeenCalled(); }); - test('throws error for unsupported parameter', () => { - expect(() => getTaskClaimer('not-supported')).toThrowErrorMatchingInlineSnapshot( - `"Unknown task claiming strategy (not-supported)"` + test('returns expected result for mget', () => { + const taskClaimer = getTaskClaimer(logger, 'unsafe_mget'); + expect(taskClaimer).toBe(claimAvailableTasksMget); + expect(logger.warn).not.toHaveBeenCalled(); + }); + + test('logs a warning for unsupported parameter', () => { + const taskClaimer = getTaskClaimer(logger, 'not-supported'); + expect(taskClaimer).toBe(claimAvailableTasksDefault); + expect(logger.warn).toHaveBeenCalledWith( + 'Unknown task claiming strategy "not-supported", falling back to default' ); }); }); diff --git a/x-pack/plugins/task_manager/server/task_claimers/index.ts b/x-pack/plugins/task_manager/server/task_claimers/index.ts index 8074197a147b6..927d4c762f625 100644 --- a/x-pack/plugins/task_manager/server/task_claimers/index.ts +++ b/x-pack/plugins/task_manager/server/task_claimers/index.ts @@ -6,6 +6,7 @@ */ import { Subject, Observable } from 'rxjs'; +import { Logger } from '@kbn/core/server'; import { TaskStore } from '../task_store'; import { TaskClaim, TaskTiming } from '../task_events'; @@ -13,7 +14,8 @@ import { TaskTypeDictionary } from '../task_type_dictionary'; import { TaskClaimingBatches } from '../queries/task_claiming'; import { ConcreteTaskInstance } from '../task'; import { claimAvailableTasksDefault } from './strategy_default'; -import { CLAIM_STRATEGY_DEFAULT } from '../config'; +import { claimAvailableTasksMget } from './strategy_mget'; +import { CLAIM_STRATEGY_DEFAULT, CLAIM_STRATEGY_MGET } from '../config'; export interface TaskClaimerOpts { getCapacity: (taskType?: string | undefined) => number; @@ -25,6 +27,7 @@ export interface TaskClaimerOpts { unusedTypes: string[]; excludedTaskTypes: string[]; taskMaxAttempts: Record<string, number>; + logger: Logger; } export interface ClaimOwnershipResult { @@ -39,10 +42,31 @@ export interface ClaimOwnershipResult { export type TaskClaimerFn = (opts: TaskClaimerOpts) => Observable<ClaimOwnershipResult>; -export function getTaskClaimer(strategy: string): TaskClaimerFn { +let WarnedOnInvalidClaimer = false; + +export function getTaskClaimer(logger: Logger, strategy: string): TaskClaimerFn { switch (strategy) { case CLAIM_STRATEGY_DEFAULT: return claimAvailableTasksDefault; + case CLAIM_STRATEGY_MGET: + return claimAvailableTasksMget; + } + + if (!WarnedOnInvalidClaimer) { + WarnedOnInvalidClaimer = true; + logger.warn(`Unknown task claiming strategy "${strategy}", falling back to default`); } - throw new Error(`Unknown task claiming strategy (${strategy})`); + return claimAvailableTasksDefault; +} + +export function getEmptyClaimOwnershipResult() { + return { + stats: { + tasksUpdated: 0, + tasksConflicted: 0, + tasksClaimed: 0, + tasksRejected: 0, + }, + docs: [], + }; } diff --git a/x-pack/plugins/task_manager/server/task_claimers/strategy_default.test.ts b/x-pack/plugins/task_manager/server/task_claimers/strategy_default.test.ts index bb511cdc0958f..e07038960d371 100644 --- a/x-pack/plugins/task_manager/server/task_claimers/strategy_default.test.ts +++ b/x-pack/plugins/task_manager/server/task_claimers/strategy_default.test.ts @@ -105,7 +105,7 @@ describe('TaskClaiming', () => { store.convertToSavedObjectIds.mockImplementation((ids) => ids.map((id) => `task:${id}`)); if (hits.length === 1) { - store.fetch.mockResolvedValue({ docs: hits[0] }); + store.fetch.mockResolvedValue({ docs: hits[0], versionMap: new Map() }); store.updateByQuery.mockResolvedValue({ updated: hits[0].length, version_conflicts: versionConflicts, @@ -113,7 +113,7 @@ describe('TaskClaiming', () => { }); } else { for (const docs of hits) { - store.fetch.mockResolvedValueOnce({ docs }); + store.fetch.mockResolvedValueOnce({ docs, versionMap: new Map() }); store.updateByQuery.mockResolvedValueOnce({ updated: docs.length, version_conflicts: versionConflicts, @@ -364,13 +364,13 @@ describe('TaskClaiming', () => { }, }, source: ` - String taskType = doc['task.taskType'].value; - if (params.priority_map.containsKey(taskType)) { - return params.priority_map[taskType]; - } else { - return 50; - } - `, + String taskType = doc['task.taskType'].value; + if (params.priority_map.containsKey(taskType)) { + return params.priority_map[taskType]; + } else { + return 50; + } + `, }, }, }, @@ -1227,7 +1227,7 @@ if (doc['task.runAt'].size()!=0) { const taskStore = taskStoreMock.create({ taskManagerId }); taskStore.convertToSavedObjectIds.mockImplementation((ids) => ids.map((id) => `task:${id}`)); for (const docs of taskCycles) { - taskStore.fetch.mockResolvedValueOnce({ docs }); + taskStore.fetch.mockResolvedValueOnce({ docs, versionMap: new Map() }); taskStore.updateByQuery.mockResolvedValueOnce({ updated: docs.length, version_conflicts: 0, @@ -1235,7 +1235,7 @@ if (doc['task.runAt'].size()!=0) { }); } - taskStore.fetch.mockResolvedValue({ docs: [] }); + taskStore.fetch.mockResolvedValue({ docs: [], versionMap: new Map() }); taskStore.updateByQuery.mockResolvedValue({ updated: 0, version_conflicts: 0, diff --git a/x-pack/plugins/task_manager/server/task_claimers/strategy_default.ts b/x-pack/plugins/task_manager/server/task_claimers/strategy_default.ts index 765be571eeb5d..6482c7e861dea 100644 --- a/x-pack/plugins/task_manager/server/task_claimers/strategy_default.ts +++ b/x-pack/plugins/task_manager/server/task_claimers/strategy_default.ts @@ -8,7 +8,6 @@ /* * This module contains helpers for managing the task manager storage layer. */ -import type { estypes } from '@elastic/elasticsearch'; import apm from 'elastic-apm-node'; import minimatch from 'minimatch'; import { Subject, Observable, from, of } from 'rxjs'; @@ -17,8 +16,8 @@ import { groupBy, pick } from 'lodash'; import { asOk } from '../lib/result_type'; import { TaskTypeDictionary } from '../task_type_dictionary'; -import { TaskClaimerOpts, ClaimOwnershipResult } from '.'; -import { ConcreteTaskInstance, TaskPriority } from '../task'; +import { TaskClaimerOpts, ClaimOwnershipResult, getEmptyClaimOwnershipResult } from '.'; +import { ConcreteTaskInstance } from '../task'; import { TASK_MANAGER_TRANSACTION_TYPE } from '../task_running'; import { isLimited, TASK_MANAGER_MARK_AS_CLAIMED } from '../queries/task_claiming'; import { TaskClaim, asTaskClaimEvent, startTaskTimer } from '../task_events'; @@ -29,7 +28,7 @@ import { IdleTaskWithExpiredRunAt, InactiveTasks, RunningOrClaimingTaskWithExpiredRetryAt, - SortByRunAtAndRetryAt, + getClaimSort, tasksClaimedByOwner, tasksOfType, EnabledTask, @@ -225,20 +224,8 @@ async function sweepForClaimedTasks( return docs; } -function emptyClaimOwnershipResult() { - return { - stats: { - tasksUpdated: 0, - tasksConflicted: 0, - tasksClaimed: 0, - tasksRejected: 0, - }, - docs: [], - }; -} - function accumulateClaimOwnershipResults( - prev: ClaimOwnershipResult = emptyClaimOwnershipResult(), + prev: ClaimOwnershipResult = getEmptyClaimOwnershipResult(), next?: ClaimOwnershipResult ) { if (next) { @@ -256,38 +243,3 @@ function accumulateClaimOwnershipResults( } return prev; } - -function getClaimSort(definitions: TaskTypeDictionary): estypes.SortCombinations[] { - // Sort by descending priority, then by ascending runAt/retryAt time - return [ - { - _script: { - type: 'number', - order: 'desc', - script: { - lang: 'painless', - // Use priority if explicitly specified in task definition, otherwise default to 50 (Normal) - source: ` - String taskType = doc['task.taskType'].value; - if (params.priority_map.containsKey(taskType)) { - return params.priority_map[taskType]; - } else { - return ${TaskPriority.Normal}; - } - `, - params: { - priority_map: definitions - .getAllDefinitions() - .reduce<Record<string, TaskPriority>>((acc, taskDefinition) => { - if (taskDefinition.priority) { - acc[taskDefinition.type] = taskDefinition.priority; - } - return acc; - }, {}), - }, - }, - }, - }, - SortByRunAtAndRetryAt, - ]; -} diff --git a/x-pack/plugins/task_manager/server/task_claimers/strategy_mget.test.ts b/x-pack/plugins/task_manager/server/task_claimers/strategy_mget.test.ts new file mode 100644 index 0000000000000..0306f9dda3da8 --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_claimers/strategy_mget.test.ts @@ -0,0 +1,463 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import _ from 'lodash'; +import { v4 as uuidv4 } from 'uuid'; +import { filter, take, toArray } from 'rxjs'; +import { CLAIM_STRATEGY_MGET } from '../config'; + +import { + TaskStatus, + ConcreteTaskInstance, + ConcreteTaskInstanceVersion, + TaskPriority, +} from '../task'; +import { StoreOpts } from '../task_store'; +import { asTaskClaimEvent, TaskEvent } from '../task_events'; +import { asOk, isOk, unwrap } from '../lib/result_type'; +import { TaskTypeDictionary } from '../task_type_dictionary'; +import { mockLogger } from '../test_utils'; +import { + TaskClaiming, + OwnershipClaimingOpts, + TaskClaimingOpts, + TASK_MANAGER_MARK_AS_CLAIMED, +} from '../queries/task_claiming'; +import { Observable } from 'rxjs'; +import { taskStoreMock } from '../task_store.mock'; +import apm from 'elastic-apm-node'; +import { TASK_MANAGER_TRANSACTION_TYPE } from '../task_running'; +import { ClaimOwnershipResult } from '.'; +import { FillPoolResult } from '../lib/fill_pool'; + +jest.mock('../constants', () => ({ + CONCURRENCY_ALLOW_LIST_BY_TASK_TYPE: [ + 'limitedToZero', + 'limitedToOne', + 'anotherLimitedToZero', + 'anotherLimitedToOne', + 'limitedToTwo', + 'limitedToFive', + ], +})); + +const taskManagerLogger = mockLogger(); + +beforeEach(() => jest.clearAllMocks()); + +const mockedDate = new Date('2019-02-12T21:01:22.479Z'); +// eslint-disable-next-line @typescript-eslint/no-explicit-any +(global as any).Date = class Date { + constructor() { + return mockedDate; + } + static now() { + return mockedDate.getTime(); + } +}; + +const taskDefinitions = new TaskTypeDictionary(taskManagerLogger); +taskDefinitions.registerTaskDefinitions({ + report: { + title: 'report', + createTaskRunner: jest.fn(), + }, + dernstraight: { + title: 'dernstraight', + createTaskRunner: jest.fn(), + }, + yawn: { + title: 'yawn', + createTaskRunner: jest.fn(), + }, +}); + +const mockApmTrans = { + end: jest.fn(), +}; + +// needs more tests in the similar to the `strategy_default.test.ts` test suite +describe('TaskClaiming', () => { + beforeEach(() => { + jest.clearAllMocks(); + jest + .spyOn(apm, 'startTransaction') + // eslint-disable-next-line @typescript-eslint/no-explicit-any + .mockImplementation(() => mockApmTrans as any); + }); + + describe('claimAvailableTasks', () => { + function initialiseTestClaiming({ + storeOpts = {}, + taskClaimingOpts = {}, + hits, + versionMaps, + excludedTaskTypes = [], + unusedTaskTypes = [], + }: { + storeOpts: Partial<StoreOpts>; + taskClaimingOpts: Partial<TaskClaimingOpts>; + hits?: ConcreteTaskInstance[][]; + versionMaps?: Array<Map<string, ConcreteTaskInstanceVersion>>; + excludedTaskTypes?: string[]; + unusedTaskTypes?: string[]; + }) { + const definitions = storeOpts.definitions ?? taskDefinitions; + const store = taskStoreMock.create({ taskManagerId: storeOpts.taskManagerId }); + store.convertToSavedObjectIds.mockImplementation((ids) => ids.map((id) => `task:${id}`)); + + if (hits == null) hits = [generateFakeTasks(1)]; + if (versionMaps == null) { + versionMaps = [new Map<string, ConcreteTaskInstanceVersion>()]; + for (const oneHit of hits) { + const map = new Map<string, ConcreteTaskInstanceVersion>(); + versionMaps.push(map); + for (const task of oneHit) { + map.set(task.id, { esId: task.id, seqNo: 32, primaryTerm: 32 }); + } + } + } + + for (let i = 0; i < hits.length; i++) { + store.fetch.mockResolvedValueOnce({ docs: hits[i], versionMap: versionMaps[i] }); + store.getDocVersions.mockResolvedValueOnce(versionMaps[i]); + const oneBulkResult = hits[i].map((hit) => asOk(hit)); + store.bulkUpdate.mockResolvedValueOnce(oneBulkResult); + } + + const taskClaiming = new TaskClaiming({ + logger: taskManagerLogger, + strategy: CLAIM_STRATEGY_MGET, + definitions, + taskStore: store, + excludedTaskTypes, + unusedTypes: unusedTaskTypes, + maxAttempts: taskClaimingOpts.maxAttempts ?? 2, + getCapacity: taskClaimingOpts.getCapacity ?? (() => 10), + ...taskClaimingOpts, + }); + + return { taskClaiming, store }; + } + + async function testClaimAvailableTasks({ + storeOpts = {}, + taskClaimingOpts = {}, + claimingOpts, + hits = [generateFakeTasks(1)], + excludedTaskTypes = [], + unusedTaskTypes = [], + }: { + storeOpts: Partial<StoreOpts>; + taskClaimingOpts: Partial<TaskClaimingOpts>; + claimingOpts: Omit<OwnershipClaimingOpts, 'size' | 'taskTypes'>; + hits?: ConcreteTaskInstance[][]; + excludedTaskTypes?: string[]; + unusedTaskTypes?: string[]; + }) { + const { taskClaiming, store } = initialiseTestClaiming({ + storeOpts, + taskClaimingOpts, + excludedTaskTypes, + unusedTaskTypes, + hits, + }); + + const resultsOrErr = await getAllAsPromise( + taskClaiming.claimAvailableTasksIfCapacityIsAvailable(claimingOpts) + ); + for (const resultOrErr of resultsOrErr) { + if (!isOk<ClaimOwnershipResult, FillPoolResult>(resultOrErr)) { + expect(resultOrErr).toBe(undefined); + } + } + + const results = resultsOrErr.map((resultOrErr) => { + if (!isOk<ClaimOwnershipResult, FillPoolResult>(resultOrErr)) { + expect(resultOrErr).toBe(undefined); + } + return unwrap(resultOrErr) as ClaimOwnershipResult; + }); + + expect(apm.startTransaction).toHaveBeenCalledWith( + TASK_MANAGER_MARK_AS_CLAIMED, + TASK_MANAGER_TRANSACTION_TYPE + ); + expect(mockApmTrans.end).toHaveBeenCalledWith('success'); + + expect(store.fetch.mock.calls).toMatchObject({}); + expect(store.getDocVersions.mock.calls).toMatchObject({}); + return results.map((result, index) => ({ + result, + args: {}, + })); + } + + test('makes calls to APM as expected when markAvailableTasksAsClaimed throws error', async () => { + const maxAttempts = _.random(2, 43); + const customMaxAttempts = _.random(44, 100); + + const definitions = new TaskTypeDictionary(mockLogger()); + definitions.registerTaskDefinitions({ + foo: { + title: 'foo', + createTaskRunner: jest.fn(), + }, + bar: { + title: 'bar', + maxAttempts: customMaxAttempts, + createTaskRunner: jest.fn(), + }, + }); + + const { taskClaiming, store } = initialiseTestClaiming({ + storeOpts: { + definitions, + }, + taskClaimingOpts: { + maxAttempts, + }, + }); + + store.fetch.mockReset(); + store.fetch.mockRejectedValue(new Error('Oh no')); + + await expect( + getAllAsPromise( + taskClaiming.claimAvailableTasksIfCapacityIsAvailable({ + claimOwnershipUntil: new Date(), + }) + ) + ).rejects.toMatchInlineSnapshot(`[Error: Oh no]`); + + expect(apm.startTransaction).toHaveBeenCalledWith( + TASK_MANAGER_MARK_AS_CLAIMED, + TASK_MANAGER_TRANSACTION_TYPE + ); + expect(mockApmTrans.end).toHaveBeenCalledWith('failure'); + }); + + test('it filters claimed tasks down by supported types, maxAttempts, status, and runAt', async () => { + const maxAttempts = _.random(2, 43); + const customMaxAttempts = _.random(44, 100); + + const definitions = new TaskTypeDictionary(mockLogger()); + definitions.registerTaskDefinitions({ + foo: { + title: 'foo', + priority: TaskPriority.Low, + createTaskRunner: jest.fn(), + }, + bar: { + title: 'bar', + maxAttempts: customMaxAttempts, + createTaskRunner: jest.fn(), + }, + foobar: { + title: 'foobar', + maxAttempts: customMaxAttempts, + createTaskRunner: jest.fn(), + }, + }); + + const result = await testClaimAvailableTasks({ + storeOpts: { definitions }, + taskClaimingOpts: { maxAttempts }, + claimingOpts: { claimOwnershipUntil: new Date() }, + excludedTaskTypes: ['foobar'], + }); + expect(result).toMatchObject({}); + }); + }); + + describe('task events', () => { + function generateTasks(taskManagerId: string) { + const runAt = new Date(); + const tasks = [ + { + id: 'claimed-by-id', + runAt, + taskType: 'foo', + schedule: undefined, + attempts: 0, + status: TaskStatus.Claiming, + params: { hello: 'world' }, + state: { baby: 'Henhen' }, + user: 'jimbo', + scope: ['reporting'], + ownerId: taskManagerId, + startedAt: null, + retryAt: null, + scheduledAt: new Date(), + traceparent: 'parent', + }, + { + id: 'claimed-by-schedule', + runAt, + taskType: 'bar', + schedule: { interval: '5m' }, + attempts: 2, + status: TaskStatus.Claiming, + params: { shazm: 1 }, + state: { henry: 'The 8th' }, + user: 'dabo', + scope: ['reporting', 'ceo'], + ownerId: taskManagerId, + startedAt: null, + retryAt: null, + scheduledAt: new Date(), + traceparent: 'newParent', + }, + { + id: 'already-running', + runAt, + taskType: 'bar', + schedule: { interval: '5m' }, + attempts: 2, + status: TaskStatus.Running, + params: { shazm: 1 }, + state: { henry: 'The 8th' }, + user: 'dabo', + scope: ['reporting', 'ceo'], + ownerId: taskManagerId, + startedAt: null, + retryAt: null, + scheduledAt: new Date(), + traceparent: '', + }, + ]; + + return { taskManagerId, runAt, tasks }; + } + + function instantiateStoreWithMockedApiResponses({ + taskManagerId = uuidv4(), + definitions = taskDefinitions, + getCapacity = () => 10, + tasksClaimed, + }: Partial<Pick<TaskClaimingOpts, 'definitions' | 'getCapacity'>> & { + taskManagerId?: string; + tasksClaimed?: ConcreteTaskInstance[][]; + } = {}) { + const { runAt, tasks: generatedTasks } = generateTasks(taskManagerId); + const taskCycles = tasksClaimed ?? [generatedTasks]; + + const taskStore = taskStoreMock.create({ taskManagerId }); + taskStore.convertToSavedObjectIds.mockImplementation((ids) => ids.map((id) => `task:${id}`)); + for (const docs of taskCycles) { + taskStore.fetch.mockResolvedValueOnce({ docs, versionMap: new Map() }); + taskStore.updateByQuery.mockResolvedValueOnce({ + updated: docs.length, + version_conflicts: 0, + total: docs.length, + }); + } + + taskStore.fetch.mockResolvedValue({ docs: [], versionMap: new Map() }); + taskStore.updateByQuery.mockResolvedValue({ + updated: 0, + version_conflicts: 0, + total: 0, + }); + + const taskClaiming = new TaskClaiming({ + logger: taskManagerLogger, + strategy: 'default', + definitions, + excludedTaskTypes: [], + unusedTypes: [], + taskStore, + maxAttempts: 2, + getCapacity, + }); + + return { taskManagerId, runAt, taskClaiming }; + } + + test('emits an event when a task is succesfully by scheduling', async () => { + const { taskManagerId, runAt, taskClaiming } = instantiateStoreWithMockedApiResponses(); + + const promise = taskClaiming.events + .pipe( + filter( + (event: TaskEvent<ConcreteTaskInstance, Error>) => event.id === 'claimed-by-schedule' + ), + take(1) + ) + .toPromise(); + + await getFirstAsPromise( + taskClaiming.claimAvailableTasksIfCapacityIsAvailable({ + claimOwnershipUntil: new Date(), + }) + ); + + const event = await promise; + expect(event).toMatchObject( + asTaskClaimEvent( + 'claimed-by-schedule', + asOk({ + id: 'claimed-by-schedule', + runAt, + taskType: 'bar', + schedule: { interval: '5m' }, + attempts: 2, + status: 'claiming' as TaskStatus, + params: { shazm: 1 }, + state: { henry: 'The 8th' }, + user: 'dabo', + scope: ['reporting', 'ceo'], + ownerId: taskManagerId, + startedAt: null, + retryAt: null, + scheduledAt: new Date(), + traceparent: 'newParent', + }) + ) + ); + }); + }); +}); + +function generateFakeTasks(count: number = 1) { + return _.times(count, (index) => mockInstance({ id: `task:id-${index}` })); +} + +function mockInstance(instance: Partial<ConcreteTaskInstance> = {}) { + return Object.assign( + { + id: uuidv4(), + taskType: 'bar', + sequenceNumber: 32, + primaryTerm: 32, + runAt: new Date(), + scheduledAt: new Date(), + startedAt: null, + retryAt: null, + attempts: 0, + params: {}, + scope: ['reporting'], + state: {}, + status: 'idle', + user: 'example', + ownerId: null, + traceparent: '', + }, + instance + ); +} + +function getFirstAsPromise<T>(obs$: Observable<T>): Promise<T> { + return new Promise((resolve, reject) => { + obs$.subscribe(resolve, reject); + }); +} +function getAllAsPromise<T>(obs$: Observable<T>): Promise<T[]> { + return new Promise((resolve, reject) => { + obs$.pipe(toArray()).subscribe(resolve, reject); + }); +} diff --git a/x-pack/plugins/task_manager/server/task_claimers/strategy_mget.ts b/x-pack/plugins/task_manager/server/task_claimers/strategy_mget.ts new file mode 100644 index 0000000000000..07d18a39a1dbc --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_claimers/strategy_mget.ts @@ -0,0 +1,326 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +// Basic operation of this task claimer: +// - search for candidate tasks to run, more than we actually can run +// - for each task found, do an mget to get the current seq_no and primary_term +// - if the mget result doesn't match the search result, the task is stale +// - from the non-stale search results, return as many as we can run + +import { SavedObjectsErrorHelpers } from '@kbn/core/server'; + +import apm from 'elastic-apm-node'; +import { Subject, Observable } from 'rxjs'; + +import { TaskTypeDictionary } from '../task_type_dictionary'; +import { TaskClaimerOpts, ClaimOwnershipResult, getEmptyClaimOwnershipResult } from '.'; +import { ConcreteTaskInstance, TaskStatus, ConcreteTaskInstanceVersion } from '../task'; +import { TASK_MANAGER_TRANSACTION_TYPE } from '../task_running'; +import { + isLimited, + TASK_MANAGER_MARK_AS_CLAIMED, + TaskClaimingBatches, +} from '../queries/task_claiming'; +import { TaskClaim, asTaskClaimEvent, startTaskTimer } from '../task_events'; +import { shouldBeOneOf, mustBeAllOf, filterDownBy, matchesClauses } from '../queries/query_clauses'; + +import { + IdleTaskWithExpiredRunAt, + InactiveTasks, + RunningOrClaimingTaskWithExpiredRetryAt, + getClaimSort, + EnabledTask, + OneOfTaskTypes, + RecognizedTask, +} from '../queries/mark_available_tasks_as_claimed'; + +import { TaskStore, SearchOpts } from '../task_store'; +import { isOk, asOk } from '../lib/result_type'; + +interface OwnershipClaimingOpts { + claimOwnershipUntil: Date; + size: number; + taskTypes: Set<string>; + removedTypes: Set<string>; + excludedTypes: Set<string>; + taskStore: TaskStore; + events$: Subject<TaskClaim>; + definitions: TaskTypeDictionary; + taskMaxAttempts: Record<string, number>; +} + +const SIZE_MULTIPLIER_FOR_TASK_FETCH = 4; + +export function claimAvailableTasksMget(opts: TaskClaimerOpts): Observable<ClaimOwnershipResult> { + const taskClaimOwnership$ = new Subject<ClaimOwnershipResult>(); + + claimAvailableTasksApm(opts) + .then((result) => { + taskClaimOwnership$.next(result); + }) + .catch((err) => { + taskClaimOwnership$.error(err); + }) + .finally(() => { + taskClaimOwnership$.complete(); + }); + + return taskClaimOwnership$; +} + +async function claimAvailableTasksApm(opts: TaskClaimerOpts): Promise<ClaimOwnershipResult> { + const apmTrans = apm.startTransaction( + TASK_MANAGER_MARK_AS_CLAIMED, + TASK_MANAGER_TRANSACTION_TYPE + ); + + try { + const result = await claimAvailableTasks(opts); + apmTrans.end('success'); + return result; + } catch (err) { + apmTrans.end('failure'); + throw err; + } +} + +async function claimAvailableTasks(opts: TaskClaimerOpts): Promise<ClaimOwnershipResult> { + const { getCapacity, claimOwnershipUntil, batches, events$, taskStore } = opts; + const { definitions, unusedTypes, excludedTaskTypes, taskMaxAttempts } = opts; + const { logger } = opts; + const loggerTag = claimAvailableTasksMget.name; + const logMeta = { tags: [loggerTag] }; + const initialCapacity = getCapacity(); + const stopTaskTimer = startTaskTimer(); + + const removedTypes = new Set(unusedTypes); // REMOVED_TYPES + const excludedTypes = new Set(excludedTaskTypes); // excluded via config + + // get a list of candidate tasks to claim, with their version info + const { docs, versionMap } = await searchAvailableTasks({ + definitions, + taskTypes: new Set(definitions.getAllTypes()), + excludedTypes, + removedTypes, + taskStore, + events$, + claimOwnershipUntil, + size: initialCapacity * SIZE_MULTIPLIER_FOR_TASK_FETCH, + taskMaxAttempts, + }); + + if (docs.length === 0) + return { + ...getEmptyClaimOwnershipResult(), + timing: stopTaskTimer(), + }; + + // use mget to get the latest version of each task + const docLatestVersions = await taskStore.getDocVersions(docs.map((doc) => `task:${doc.id}`)); + + // filter out stale, missing and removed tasks + const currentTasks: ConcreteTaskInstance[] = []; + const staleTasks: ConcreteTaskInstance[] = []; + const missingTasks: ConcreteTaskInstance[] = []; + const removedTasks: ConcreteTaskInstance[] = []; + + for (const searchDoc of docs) { + if (removedTypes.has(searchDoc.taskType)) { + removedTasks.push(searchDoc); + continue; + } + + const searchVersion = versionMap.get(searchDoc.id); + const latestVersion = docLatestVersions.get(`task:${searchDoc.id}`); + if (!searchVersion || !latestVersion) { + missingTasks.push(searchDoc); + continue; + } + + if ( + searchVersion.seqNo === latestVersion.seqNo && + searchVersion.primaryTerm === latestVersion.primaryTerm + ) { + currentTasks.push(searchDoc); + continue; + } else { + staleTasks.push(searchDoc); + continue; + } + } + // apply limited concurrency limits (TODO: can currently starve other tasks) + const candidateTasks = applyLimitedConcurrency(currentTasks, batches); + + // build the updated task objects we'll claim + const taskUpdates: ConcreteTaskInstance[] = Array.from(candidateTasks) + .slice(0, initialCapacity) + .map((task) => { + if (task.retryAt != null && new Date(task.retryAt).getTime() < Date.now()) { + task.scheduledAt = task.retryAt; + } else { + task.scheduledAt = task.runAt; + } + task.retryAt = claimOwnershipUntil; + task.ownerId = taskStore.taskManagerId; + task.status = TaskStatus.Claiming; + + return task; + }); + + // perform the task object updates, deal with errors + const finalResults: ConcreteTaskInstance[] = []; + let conflicts = staleTasks.length; + let bulkErrors = 0; + + try { + const updateResults = await taskStore.bulkUpdate(taskUpdates, { validate: false }); + for (const updateResult of updateResults) { + if (isOk(updateResult)) { + finalResults.push(updateResult.value); + } else { + const { id, type, error } = updateResult.error; + + // this check is needed so error will be typed correctly for isConflictError + if (SavedObjectsErrorHelpers.isSavedObjectsClientError(error)) { + if (SavedObjectsErrorHelpers.isConflictError(error)) { + conflicts++; + } else { + logger.warn( + `Saved Object error updating task ${id}:${type} during claim: ${error.error}`, + logMeta + ); + bulkErrors++; + } + } else { + logger.warn(`Error updating task ${id}:${type} during claim: ${error.message}`, logMeta); + bulkErrors++; + } + } + } + } catch (err) { + logger.warn(`Error updating tasks during claim: ${err}`, logMeta); + } + + // separate update for removed tasks; shouldn't happen often, so unlikely + // a performance concern, and keeps the rest of the logic simpler + let removedCount = 0; + if (removedTasks.length > 0) { + const tasksToRemove = Array.from(removedTasks); + for (const task of tasksToRemove) { + task.status = TaskStatus.Unrecognized; + } + + // don't worry too much about errors, we'll get them next time + try { + const removeResults = await taskStore.bulkUpdate(tasksToRemove, { validate: false }); + for (const removeResult of removeResults) { + if (isOk(removeResult)) { + removedCount++; + } else { + const { id, type, error } = removeResult.error; + logger.warn( + `Error updating task ${id}:${type} to mark as unrecognized during claim: ${error.message}`, + logMeta + ); + } + } + } catch (err) { + logger.warn(`Error updating tasks to mark as unrecognized during claim: ${err}`, logMeta); + } + } + + // TODO: need a better way to generate stats + const message = `task claimer claimed: ${finalResults.length}; stale: ${staleTasks.length}; conflicts: ${conflicts}; missing: ${missingTasks.length}; updateErrors: ${bulkErrors}; removed: ${removedCount};`; + logger.debug(message, logMeta); + + // build results + const finalResult = { + stats: { + tasksUpdated: finalResults.length, + tasksConflicted: conflicts, + tasksClaimed: finalResults.length, + }, + docs: finalResults, + timing: stopTaskTimer(), + }; + + for (const doc of finalResults) { + events$.next(asTaskClaimEvent(doc.id, asOk(doc), finalResult.timing)); + } + + return finalResult; +} + +interface SearchAvailableTasksResponse { + docs: ConcreteTaskInstance[]; + versionMap: Map<string, ConcreteTaskInstanceVersion>; +} + +async function searchAvailableTasks({ + definitions, + taskTypes, + removedTypes, + excludedTypes, + taskStore, + size, + taskMaxAttempts, +}: OwnershipClaimingOpts): Promise<SearchAvailableTasksResponse> { + const searchedTypes = Array.from(taskTypes) + .concat(Array.from(removedTypes)) + .filter((type) => !excludedTypes.has(type)); + const queryForScheduledTasks = mustBeAllOf( + // Task must be enabled + EnabledTask, + // a task type that's not excluded (may be removed or not) + OneOfTaskTypes('task.taskType', searchedTypes), + // Either a task with idle status and runAt <= now or + // status running or claiming with a retryAt <= now. + shouldBeOneOf(IdleTaskWithExpiredRunAt, RunningOrClaimingTaskWithExpiredRetryAt), + // must have a status that isn't 'unrecognized' + RecognizedTask + ); + + const sort: NonNullable<SearchOpts['sort']> = getClaimSort(definitions); + const query = matchesClauses(queryForScheduledTasks, filterDownBy(InactiveTasks)); + + return await taskStore.fetch({ + query, + sort, + size, + seq_no_primary_term: true, + }); +} + +function applyLimitedConcurrency( + tasks: ConcreteTaskInstance[], + batches: TaskClaimingBatches +): ConcreteTaskInstance[] { + // create a map of task type - concurrency + const limitedBatches = batches.filter(isLimited); + const limitedMap = new Map<string, number>(); + for (const limitedBatch of limitedBatches) { + const { tasksTypes, concurrency } = limitedBatch; + limitedMap.set(tasksTypes, concurrency); + } + + // apply the limited concurrency + const result: ConcreteTaskInstance[] = []; + for (const task of tasks) { + const concurrency = limitedMap.get(task.taskType); + if (concurrency == null) { + result.push(task); + continue; + } + + if (concurrency > 0) { + result.push(task); + limitedMap.set(task.taskType, concurrency - 1); + } + } + + return result; +} diff --git a/x-pack/plugins/task_manager/server/task_store.mock.ts b/x-pack/plugins/task_manager/server/task_store.mock.ts index 861f7d60bd221..c15518eaed510 100644 --- a/x-pack/plugins/task_manager/server/task_store.mock.ts +++ b/x-pack/plugins/task_manager/server/task_store.mock.ts @@ -31,6 +31,8 @@ export const taskStoreMock = { aggregate: jest.fn(), updateByQuery: jest.fn(), bulkGet: jest.fn(), + bulkGetVersions: jest.fn(), + getDocVersions: jest.fn(), index, taskManagerId, } as unknown as jest.Mocked<TaskStore>; diff --git a/x-pack/plugins/task_manager/server/task_store.test.ts b/x-pack/plugins/task_manager/server/task_store.test.ts index 68cc11b7da28e..925dc4d1a4c69 100644 --- a/x-pack/plugins/task_manager/server/task_store.test.ts +++ b/x-pack/plugins/task_manager/server/task_store.test.ts @@ -1269,4 +1269,201 @@ describe('TaskStore', () => { }); }); }); + + describe('bulkGetVersions', () => { + let store: TaskStore; + let esClient: ReturnType<typeof elasticsearchServiceMock.createClusterClient>['asInternalUser']; + let childEsClient: ReturnType< + typeof elasticsearchServiceMock.createClusterClient + >['asInternalUser']; + + beforeAll(() => { + esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; + childEsClient = elasticsearchServiceMock.createClusterClient().asInternalUser; + esClient.child.mockReturnValue(childEsClient as unknown as Client); + store = new TaskStore({ + logger: mockLogger(), + index: 'tasky', + taskManagerId: '', + serializer, + esClient, + definitions: taskDefinitions, + savedObjectsRepository: savedObjectsClient, + adHocTaskCounter, + allowReadingInvalidState: false, + requestTimeouts: { + update_by_query: 1000, + }, + }); + }); + + test('should return the version of the tasks when found', async () => { + childEsClient.mget.mockResponse({ + docs: [ + { + _index: 'ignored-1', + _id: 'task:some-task-a', + _version: 424242, + _seq_no: 123, + _primary_term: 1, + found: true, + }, + { + _index: 'ignored-2', + _id: 'task:some-task-b', + _version: 31415, + _seq_no: 456, + _primary_term: 2, + found: true, + }, + ], + }); + + const result = await store.bulkGetVersions(['task:some-task-a', 'task:some-task-b']); + expect(result).toMatchInlineSnapshot(` + Array [ + Object { + "esId": "task:some-task-a", + "primaryTerm": 1, + "seqNo": 123, + }, + Object { + "esId": "task:some-task-b", + "primaryTerm": 2, + "seqNo": 456, + }, + ] + `); + }); + + test('should handle errors and missing tasks', async () => { + childEsClient.mget.mockResponse({ + docs: [ + { + _index: 'ignored-1', + _id: 'task:some-task-a', + _version: 424242, + _seq_no: 123, + _primary_term: 1, + found: true, + }, + { + _index: 'ignored-2', + _id: 'task:some-task-b', + found: false, + }, + { + _index: 'ignored-3', + _id: 'task:some-task-c', + error: { + type: 'index_not_found_exception', + reason: 'no such index "ignored-4"', + }, + }, + ], + }); + + const result = await store.bulkGetVersions([ + 'task:some-task-a', + 'task:some-task-b', + 'task:some-task-c', + ]); + expect(result).toMatchInlineSnapshot(` + Array [ + Object { + "esId": "task:some-task-a", + "primaryTerm": 1, + "seqNo": 123, + }, + Object { + "error": "task \\"task:some-task-b\\" not found", + "esId": "task:some-task-b", + }, + Object { + "error": "error getting version for task:some-task-c: index_not_found_exception: no such index \\"ignored-4\\"", + "esId": "task:some-task-c", + }, + ] + `); + }); + }); + + describe('getDocVersions', () => { + let store: TaskStore; + let esClient: ReturnType<typeof elasticsearchServiceMock.createClusterClient>['asInternalUser']; + let childEsClient: ReturnType< + typeof elasticsearchServiceMock.createClusterClient + >['asInternalUser']; + + beforeAll(() => { + esClient = elasticsearchServiceMock.createClusterClient().asInternalUser; + childEsClient = elasticsearchServiceMock.createClusterClient().asInternalUser; + esClient.child.mockReturnValue(childEsClient as unknown as Client); + store = new TaskStore({ + logger: mockLogger(), + index: 'tasky', + taskManagerId: '', + serializer, + esClient, + definitions: taskDefinitions, + savedObjectsRepository: savedObjectsClient, + adHocTaskCounter, + allowReadingInvalidState: false, + requestTimeouts: { + update_by_query: 1000, + }, + }); + }); + + test('should return the version as expected, with errors included', async () => { + childEsClient.mget.mockResponse({ + docs: [ + { + _index: 'ignored-1', + _id: 'task:some-task-a', + _version: 424242, + _seq_no: 123, + _primary_term: 1, + found: true, + }, + { + _index: 'ignored-2', + _id: 'task:some-task-b', + found: false, + }, + { + _index: 'ignored-3', + _id: 'task:some-task-c', + error: { + type: 'index_not_found_exception', + reason: 'no such index "ignored-4"', + }, + }, + ], + }); + + const result = await store.getDocVersions([ + 'task:some-task-a', + 'task:some-task-b', + 'task:some-task-c', + ]); + expect(result).toMatchInlineSnapshot(` + Map { + "task:some-task-a" => Object { + "esId": "task:some-task-a", + "primaryTerm": 1, + "seqNo": 123, + }, + "task:some-task-b" => Object { + "error": "task \\"task:some-task-b\\" not found", + "esId": "task:some-task-b", + }, + "task:some-task-c" => Object { + "error": "error getting version for task:some-task-c: index_not_found_exception: no such index \\"ignored-4\\"", + "esId": "task:some-task-c", + }, + } + `); + }); + }); }); diff --git a/x-pack/plugins/task_manager/server/task_store.ts b/x-pack/plugins/task_manager/server/task_store.ts index 45bcb6589ab26..3cc50a05259a5 100644 --- a/x-pack/plugins/task_manager/server/task_store.ts +++ b/x-pack/plugins/task_manager/server/task_store.ts @@ -29,6 +29,7 @@ import { asOk, asErr, Result } from './lib/result_type'; import { ConcreteTaskInstance, + ConcreteTaskInstanceVersion, TaskInstance, TaskLifecycle, TaskLifecycleResult, @@ -77,6 +78,7 @@ export interface UpdateByQueryOpts extends SearchOpts { export interface FetchResult { docs: ConcreteTaskInstance[]; + versionMap: Map<string, ConcreteTaskInstanceVersion>; } export type BulkUpdateResult = Result< @@ -415,6 +417,55 @@ export class TaskStore { }); } + /** + * Gets task version info by ids + * + * @param {Array<string>} esIds + * @returns {Promise<ConcreteTaskInstance[]>} + */ + public async bulkGetVersions(ids: string[]): Promise<ConcreteTaskInstanceVersion[]> { + let taskVersions: estypes.MgetResponse<never>; + try { + taskVersions = await this.esClientWithoutRetries.mget<never>({ + index: this.index, + _source: false, + body: { + ids, + }, + }); + } catch (e) { + this.errors$.next(e); + throw e; + } + + const result = taskVersions.docs.map((taskVersion) => { + if (isMGetSuccess(taskVersion)) { + if (!taskVersion.found) { + return { + esId: taskVersion._id, + error: `task "${taskVersion._id}" not found`, + }; + } else { + return { + esId: taskVersion._id, + seqNo: taskVersion._seq_no, + primaryTerm: taskVersion._primary_term, + }; + } + } + + const type = taskVersion.error?.type || 'unknown type of error'; + const reason = taskVersion.error?.reason || 'unknown reason'; + const error = `error getting version for ${taskVersion._id}: ${type}: ${reason}`; + return { + esId: taskVersion._id, + error, + }; + }); + + return result; + } + /** * Gets task lifecycle step by id * @@ -437,9 +488,7 @@ export class TaskStore { const { query } = ensureQueryOnlyReturnsTaskObjects(opts); try { - const { - hits: { hits: tasks }, - } = await this.esClientWithoutRetries.search<SavedObjectsRawDoc['_source']>({ + const result = await this.esClientWithoutRetries.search<SavedObjectsRawDoc['_source']>({ index: this.index, ignore_unavailable: true, body: { @@ -447,6 +496,21 @@ export class TaskStore { query, }, }); + const { + hits: { hits: tasks }, + } = result; + + const versionMap = new Map<string, ConcreteTaskInstanceVersion>(); + for (const task of tasks) { + if (task._seq_no == null || task._primary_term == null) continue; + + const esId = task._id.startsWith('task:') ? task._id.slice(5) : task._id; + versionMap.set(esId, { + esId: task._id, + seqNo: task._seq_no, + primaryTerm: task._primary_term, + }); + } return { docs: tasks @@ -457,6 +521,7 @@ export class TaskStore { .map((doc) => omit(doc, 'namespace') as SavedObject<SerializedConcreteTaskInstance>) .map((doc) => savedObjectToConcreteTaskInstance(doc)) .filter((doc): doc is ConcreteTaskInstance => !!doc), + versionMap, }; } catch (e) { this.errors$.next(e); @@ -527,6 +592,15 @@ export class TaskStore { throw e; } } + + public async getDocVersions(esIds: string[]): Promise<Map<string, ConcreteTaskInstanceVersion>> { + const versions = await this.bulkGetVersions(esIds); + const result = new Map<string, ConcreteTaskInstanceVersion>(); + for (const version of versions) { + result.set(version.esId, version); + } + return result; + } } /** @@ -614,3 +688,7 @@ function ensureAggregationOnlyReturnsEnabledTaskObjects(opts: AggregationOpts): query, }; } + +function isMGetSuccess(doc: estypes.MgetResponseItem<unknown>): doc is estypes.GetGetResult { + return (doc as estypes.GetGetResult).found !== undefined; +} diff --git a/x-pack/plugins/task_manager/server/task_type_dictionary.ts b/x-pack/plugins/task_manager/server/task_type_dictionary.ts index ae46978562223..f45cbad172d5a 100644 --- a/x-pack/plugins/task_manager/server/task_type_dictionary.ts +++ b/x-pack/plugins/task_manager/server/task_type_dictionary.ts @@ -113,6 +113,10 @@ export class TaskTypeDictionary { return this.definitions.has(type); } + public size() { + return this.definitions.size; + } + public get(type: string): TaskDefinition { this.ensureHas(type); return this.definitions.get(type)!; diff --git a/x-pack/test/task_manager_claimer_mget/config.ts b/x-pack/test/task_manager_claimer_mget/config.ts new file mode 100644 index 0000000000000..ac2a6a6b98f67 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/config.ts @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import path from 'path'; +import { FtrConfigProviderContext, findTestPluginPaths } from '@kbn/test'; +import { services } from './services'; + +export default async function ({ readConfigFile }: FtrConfigProviderContext) { + const integrationConfig = await readConfigFile(require.resolve('../api_integration/config')); + + return { + testFiles: [require.resolve('./test_suites/task_manager')], + services, + servers: integrationConfig.get('servers'), + esTestCluster: integrationConfig.get('esTestCluster'), + apps: integrationConfig.get('apps'), + screenshots: integrationConfig.get('screenshots'), + junit: { + reportName: 'Task Manager MGet Claimer Functional Tests', + }, + kbnTestServer: { + ...integrationConfig.get('kbnTestServer'), + serverArgs: [ + ...integrationConfig.get('kbnTestServer.serverArgs'), + '--xpack.eventLog.logEntries=true', + '--xpack.eventLog.indexEntries=true', + '--xpack.task_manager.claim_strategy="unsafe_mget"', + '--xpack.task_manager.monitored_aggregated_stats_refresh_rate=5000', + '--xpack.task_manager.ephemeral_tasks.enabled=false', + '--xpack.task_manager.ephemeral_tasks.request_capacity=100', + `--xpack.stack_connectors.enableExperimental=${JSON.stringify(['crowdstrikeConnectorOn'])}`, + ...findTestPluginPaths(path.resolve(__dirname, 'plugins')), + ], + }, + }; +} diff --git a/x-pack/test/task_manager_claimer_mget/ftr_provider_context.d.ts b/x-pack/test/task_manager_claimer_mget/ftr_provider_context.d.ts new file mode 100644 index 0000000000000..aa56557c09df8 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/ftr_provider_context.d.ts @@ -0,0 +1,12 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { GenericFtrProviderContext } from '@kbn/test'; + +import { services } from './services'; + +export type FtrProviderContext = GenericFtrProviderContext<typeof services, {}>; diff --git a/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/kibana.jsonc b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/kibana.jsonc new file mode 100644 index 0000000000000..f030d59f9cde3 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/kibana.jsonc @@ -0,0 +1,13 @@ +{ + "type": "plugin", + "id": "@kbn/sample-task-plugin-mget", + "owner": "@elastic/response-ops", + "plugin": { + "id": "sampleTaskPluginMget", + "server": true, + "browser": false, + "requiredPlugins": [ + "taskManager" + ] + } +} diff --git a/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/package.json b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/package.json new file mode 100644 index 0000000000000..201d6a964ea1f --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/package.json @@ -0,0 +1,14 @@ +{ + "name": "@kbn/sample-task-plugin-mget", + "version": "1.0.0", + "kibana": { + "version": "kibana", + "templateVersion": "1.0.0" + }, + "main": "target/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget", + "scripts": { + "kbn": "node ../../../../../scripts/kbn.js", + "build": "rm -rf './target' && ../../../../../node_modules/.bin/tsc" + }, + "license": "Elastic License 2.0" +} \ No newline at end of file diff --git a/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/index.ts b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/index.ts new file mode 100644 index 0000000000000..d1354da22390b --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/index.ts @@ -0,0 +1,13 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { PluginInitializerContext } from '@kbn/core/server'; +import { SampleTaskManagerFixturePlugin } from './plugin'; + +export const plugin = async (initContext: PluginInitializerContext) => { + return new SampleTaskManagerFixturePlugin(initContext); +}; diff --git a/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/init_routes.ts b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/init_routes.ts new file mode 100644 index 0000000000000..3273fe855ad31 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/init_routes.ts @@ -0,0 +1,404 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { schema } from '@kbn/config-schema'; +import { + RequestHandlerContext, + KibanaRequest, + KibanaResponseFactory, + IKibanaResponse, + IRouter, + IScopedClusterClient, + Logger, +} from '@kbn/core/server'; +import { EventEmitter } from 'events'; +import { TaskManagerStartContract } from '@kbn/task-manager-plugin/server'; + +const scope = 'testing'; +const taskManagerQuery = { + bool: { + filter: { + bool: { + must: [ + { + term: { + 'task.scope': scope, + }, + }, + ], + }, + }, + }, +}; + +export function initRoutes( + logger: Logger, + router: IRouter, + taskManagerStart: Promise<TaskManagerStartContract>, + taskTestingEvents: EventEmitter +) { + async function ensureIndexIsRefreshed(client: IScopedClusterClient) { + return await client.asInternalUser.indices.refresh({ + index: '.kibana_task_manager', + }); + } + + logger.info('Initializing task manager testing routes'); + router.post( + { + path: `/api/sample_tasks/schedule`, + validate: { + body: schema.object({ + task: schema.object({ + enabled: schema.boolean({ defaultValue: true }), + taskType: schema.string(), + schedule: schema.maybe( + schema.object({ + interval: schema.string(), + }) + ), + interval: schema.maybe(schema.string()), + params: schema.recordOf(schema.string(), schema.any(), { defaultValue: {} }), + state: schema.recordOf(schema.string(), schema.any(), { defaultValue: {} }), + id: schema.maybe(schema.string()), + timeoutOverride: schema.maybe(schema.string()), + }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + const taskManager = await taskManagerStart; + const { task: taskFields } = req.body; + const task = { + ...taskFields, + scope: [scope], + }; + + const taskResult = await taskManager.schedule(task, { req }); + + return res.ok({ body: taskResult }); + } + ); + + router.post( + { + path: `/api/sample_tasks/run_soon`, + validate: { + body: schema.object({ + task: schema.object({ + id: schema.string({}), + }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + const { + task: { id }, + } = req.body; + try { + const taskManager = await taskManagerStart; + return res.ok({ body: await taskManager.runSoon(id) }); + } catch (err) { + return res.ok({ body: { id, error: `${err}` } }); + } + } + ); + + router.post( + { + path: `/api/sample_tasks/bulk_enable`, + validate: { + body: schema.object({ + taskIds: schema.arrayOf(schema.string()), + runSoon: schema.boolean({ defaultValue: true }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ) { + const { taskIds, runSoon } = req.body; + try { + const taskManager = await taskManagerStart; + return res.ok({ body: await taskManager.bulkEnable(taskIds, runSoon) }); + } catch (err) { + return res.ok({ body: { taskIds, error: `${err}` } }); + } + } + ); + + router.post( + { + path: `/api/sample_tasks/bulk_disable`, + validate: { + body: schema.object({ + taskIds: schema.arrayOf(schema.string()), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ) { + const { taskIds } = req.body; + try { + const taskManager = await taskManagerStart; + return res.ok({ body: await taskManager.bulkDisable(taskIds) }); + } catch (err) { + return res.ok({ body: { taskIds, error: `${err}` } }); + } + } + ); + + router.post( + { + path: `/api/sample_tasks/bulk_update_schedules`, + validate: { + body: schema.object({ + taskIds: schema.arrayOf(schema.string()), + schedule: schema.object({ interval: schema.string() }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ) { + const { taskIds, schedule } = req.body; + try { + const taskManager = await taskManagerStart; + return res.ok({ body: await taskManager.bulkUpdateSchedules(taskIds, schedule) }); + } catch (err) { + return res.ok({ body: { taskIds, error: `${err}` } }); + } + } + ); + + router.post( + { + path: `/api/sample_tasks/ephemeral_run_now`, + validate: { + body: schema.object({ + task: schema.object({ + taskType: schema.string(), + state: schema.recordOf(schema.string(), schema.any()), + params: schema.recordOf(schema.string(), schema.any()), + }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest< + any, + any, + { + task: { + taskType: string; + params: Record<string, any>; + state: Record<string, any>; + }; + }, + any + >, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + const { task } = req.body; + try { + const taskManager = await taskManagerStart; + return res.ok({ body: await taskManager.ephemeralRunNow(task) }); + } catch (err) { + return res.ok({ body: { task, error: `${err}` } }); + } + } + ); + + router.post( + { + path: `/api/sample_tasks/ensure_scheduled`, + validate: { + body: schema.object({ + task: schema.object({ + taskType: schema.string(), + params: schema.object({}), + state: schema.maybe(schema.object({})), + id: schema.maybe(schema.string()), + }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + try { + const { task: taskFields } = req.body; + const task = { + ...taskFields, + scope: [scope], + }; + + const taskManager = await taskManagerStart; + const taskResult = await taskManager.ensureScheduled(task, { req }); + + return res.ok({ body: taskResult }); + } catch (err) { + return res.ok({ body: err }); + } + } + ); + + router.post( + { + path: `/api/sample_tasks/event`, + validate: { + body: schema.object({ + event: schema.string(), + data: schema.recordOf(schema.string(), schema.any(), { defaultValue: {} }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + try { + const { event, data } = req.body; + taskTestingEvents.emit(event, data); + return res.ok({ body: event }); + } catch (err) { + return res.ok({ body: err }); + } + } + ); + + router.get( + { + path: `/api/sample_tasks`, + validate: {}, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + try { + const taskManager = await taskManagerStart; + return res.ok({ + body: await taskManager.fetch({ + size: 20, + query: taskManagerQuery, + }), + }); + } catch (err) { + return res.ok({ body: err }); + } + } + ); + + router.get( + { + path: `/api/sample_tasks/task/{taskId}`, + validate: { + params: schema.object({ + taskId: schema.string(), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + try { + await ensureIndexIsRefreshed((await context.core).elasticsearch.client); + const taskManager = await taskManagerStart; + return res.ok({ body: await taskManager.get(req.params.taskId) }); + } catch ({ isBoom, output, message }) { + return res.ok({ body: isBoom ? output.payload : { message } }); + } + } + ); + + router.get( + { + path: `/api/ensure_tasks_index_refreshed`, + validate: {}, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + await ensureIndexIsRefreshed((await context.core).elasticsearch.client); + return res.ok({ body: {} }); + } + ); + + router.delete( + { + path: `/api/sample_tasks`, + validate: {}, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> { + try { + await ensureIndexIsRefreshed((await context.core).elasticsearch.client); + let tasksFound = 0; + const taskManager = await taskManagerStart; + do { + const { docs: tasks } = await taskManager.fetch({ + query: taskManagerQuery, + }); + tasksFound = tasks.length; + await Promise.all(tasks.map((task) => taskManager.remove(task.id))); + } while (tasksFound > 0); + return res.ok({ body: 'OK' }); + } catch ({ isBoom, output, message }) { + return res.ok({ body: isBoom ? output.payload : { message } }); + } + } + ); + + router.get( + { + path: '/api/registered_tasks', + validate: {}, + }, + async ( + context: RequestHandlerContext, + req: KibanaRequest<any, any, any, any>, + res: KibanaResponseFactory + ): Promise<IKibanaResponse<any>> => { + try { + const tm = await taskManagerStart; + return res.ok({ + body: tm.getRegisteredTypes(), + }); + } catch (err) { + return res.badRequest({ body: err }); + } + } + ); +} diff --git a/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/plugin.ts b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/plugin.ts new file mode 100644 index 0000000000000..9f6944e4cff3a --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/server/plugin.ts @@ -0,0 +1,409 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { random } from 'lodash'; +import { schema } from '@kbn/config-schema'; +import { Plugin, CoreSetup, CoreStart, Logger, PluginInitializerContext } from '@kbn/core/server'; +import { throwRetryableError } from '@kbn/task-manager-plugin/server/task_running'; +import { EventEmitter } from 'events'; +import { firstValueFrom, Subject } from 'rxjs'; +import { + TaskManagerSetupContract, + TaskManagerStartContract, + ConcreteTaskInstance, + EphemeralTask, +} from '@kbn/task-manager-plugin/server'; +import { DEFAULT_MAX_WORKERS } from '@kbn/task-manager-plugin/server/config'; +import { TaskPriority } from '@kbn/task-manager-plugin/server/task'; +import { initRoutes } from './init_routes'; + +// this plugin's dependendencies +export interface SampleTaskManagerFixtureSetupDeps { + taskManager: TaskManagerSetupContract; +} +export interface SampleTaskManagerFixtureStartDeps { + taskManager: TaskManagerStartContract; +} + +export class SampleTaskManagerFixturePlugin + implements + Plugin<void, void, SampleTaskManagerFixtureSetupDeps, SampleTaskManagerFixtureStartDeps> +{ + taskManagerStart$: Subject<TaskManagerStartContract> = new Subject<TaskManagerStartContract>(); + taskManagerStart: Promise<TaskManagerStartContract> = firstValueFrom(this.taskManagerStart$); + logger: Logger; + + constructor(initContext: PluginInitializerContext) { + this.logger = initContext.logger.get(); + } + + public setup(core: CoreSetup, { taskManager }: SampleTaskManagerFixtureSetupDeps) { + const taskTestingEvents = new EventEmitter(); + taskTestingEvents.setMaxListeners(DEFAULT_MAX_WORKERS * 2); + + const tmStart = this.taskManagerStart; + + const defaultSampleTaskConfig = { + timeout: '1m', + // This task allows tests to specify its behavior (whether it reschedules itself, whether it errors, etc) + // taskInstance.params has the following optional fields: + // nextRunMilliseconds: number - If specified, the run method will return a runAt that is now + nextRunMilliseconds + // failWith: string - If specified, the task will throw an error with the specified message + // failOn: number - If specified, the task will only throw the `failWith` error when `count` equals to the failOn value + // waitForParams : boolean - should the task stall ands wait to receive params asynchronously before using the default params + // waitForEvent : string - if provided, the task will stall (after completing the run) and wait for an asyn event before completing + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({ + async run() { + const { params, state, id } = taskInstance; + const prevState = state || { count: 0 }; + + const count = (prevState.count || 0) + 1; + + const runParams = { + ...params, + // if this task requires custom params provided async - wait for them + ...(params.waitForParams ? await once(taskTestingEvents, id) : {}), + }; + + if (runParams.failWith) { + if (!runParams.failOn || (runParams.failOn && count === runParams.failOn)) { + throw new Error(runParams.failWith); + } + } + + const [{ elasticsearch }] = await core.getStartServices(); + await elasticsearch.client.asInternalUser.index({ + index: '.kibana_task_manager_test_result', + body: { + type: 'task', + taskId: taskInstance.id, + params: JSON.stringify(runParams), + state: JSON.stringify(state), + ranAt: new Date(), + }, + refresh: true, + }); + + // Stall task run until a certain event is triggered + if (runParams.waitForEvent) { + await once(taskTestingEvents, runParams.waitForEvent); + } + + return { + state: { count }, + runAt: millisecondsFromNow(runParams.nextRunMilliseconds), + }; + }, + }), + }; + + taskManager.registerTaskDefinitions({ + sampleTask: { + ...defaultSampleTaskConfig, + title: 'Sample Task', + description: 'A sample task for testing the task_manager.', + stateSchemaByVersion: { + 1: { + up: (state: Record<string, unknown>) => ({ count: state.count }), + schema: schema.object({ + count: schema.maybe(schema.number()), + }), + }, + }, + }, + singleAttemptSampleTask: { + ...defaultSampleTaskConfig, + title: 'Failing Sample Task', + description: + 'A sample task for testing the task_manager that fails on the first attempt to run.', + // fail after the first failed run + maxAttempts: 1, + stateSchemaByVersion: { + 1: { + up: (state: Record<string, unknown>) => ({ count: state.count }), + schema: schema.object({ + count: schema.maybe(schema.number()), + }), + }, + }, + }, + sampleTaskWithSingleConcurrency: { + ...defaultSampleTaskConfig, + title: 'Sample Task With Single Concurrency', + maxConcurrency: 1, + timeout: '60s', + description: 'A sample task that can only have one concurrent instance.', + stateSchemaByVersion: { + 1: { + up: (state: Record<string, unknown>) => ({ count: state.count }), + schema: schema.object({ + count: schema.maybe(schema.number()), + }), + }, + }, + }, + sampleTaskWithLimitedConcurrency: { + ...defaultSampleTaskConfig, + title: 'Sample Task With Max Concurrency of 2', + maxConcurrency: 2, + timeout: '60s', + description: 'A sample task that can only have two concurrent instance.', + stateSchemaByVersion: { + 1: { + up: (state: Record<string, unknown>) => ({ count: state.count }), + schema: schema.object({ + count: schema.maybe(schema.number()), + }), + }, + }, + }, + sampleRecurringTaskTimingOut: { + title: 'Sample Recurring Task that Times Out', + description: 'A sample task that times out each run.', + maxAttempts: 3, + timeout: '1s', + createTaskRunner: () => ({ + async run() { + return await new Promise((resolve) => {}); + }, + }), + }, + sampleAdHocTaskTimingOut: { + title: 'Sample Ad-Hoc Task that Times Out', + description: 'A sample task that times out.', + maxAttempts: 3, + timeout: '1s', + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => { + let isCancelled: boolean = false; + return { + async run() { + // wait for 15 seconds + await new Promise((r) => setTimeout(r, 15000)); + + if (!isCancelled) { + const [{ elasticsearch }] = await core.getStartServices(); + await elasticsearch.client.asInternalUser.index({ + index: '.kibana_task_manager_test_result', + body: { + type: 'task', + taskType: 'sampleAdHocTaskTimingOut', + taskId: taskInstance.id, + }, + refresh: true, + }); + } + }, + async cancel() { + isCancelled = true; + }, + }; + }, + }, + sampleRecurringTaskWhichHangs: { + title: 'Sample Recurring Task that Hangs for a minute', + description: 'A sample task that Hangs for a minute on each run.', + maxAttempts: 3, + timeout: '60s', + createTaskRunner: () => ({ + async run() { + return await new Promise((resolve) => {}); + }, + }), + }, + sampleOneTimeTaskThrowingError: { + title: 'Sample One-Time Task that throws an error', + description: 'A sample task that throws an error each run.', + maxAttempts: 3, + createTaskRunner: () => ({ + async run() { + throwRetryableError(new Error('Error'), new Date(Date.now() + random(2, 5) * 1000)); + }, + }), + }, + taskToDisable: { + title: 'Task used for testing it being disabled', + description: '', + maxAttempts: 1, + paramsSchema: schema.object({}), + createTaskRunner: () => ({ + async run() {}, + }), + }, + lowPriorityTask: { + title: 'Task used for testing priority claiming', + priority: TaskPriority.Low, + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({ + async run() { + const { state, schedule } = taskInstance; + const prevState = state || { count: 0 }; + + const count = (prevState.count || 0) + 1; + + const [{ elasticsearch }] = await core.getStartServices(); + await elasticsearch.client.asInternalUser.index({ + index: '.kibana_task_manager_test_result', + body: { + type: 'task', + taskType: 'lowPriorityTask', + taskId: taskInstance.id, + state: JSON.stringify(state), + ranAt: new Date(), + }, + refresh: true, + }); + + return { + state: { count }, + schedule, + }; + }, + }), + }, + }); + + const taskWithTiming = { + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({ + async run() { + const stopTiming = startTaskTimer(); + + const { + params: { delay = 0 }, + state: { timings = [] }, + } = taskInstance; + + if (delay) { + await new Promise((resolve) => { + setTimeout(resolve, delay); + }); + } + + return { + state: { timings: [...timings, stopTiming()] }, + }; + }, + }), + }; + + taskManager.registerTaskDefinitions({ + timedTask: { + title: 'Task With Tracked Timings', + timeout: '60s', + description: 'A task that tracks its execution timing.', + ...taskWithTiming, + }, + timedTaskWithSingleConcurrency: { + title: 'Task With Tracked Timings and Single Concurrency', + maxConcurrency: 1, + timeout: '60s', + description: + 'A task that can only have one concurrent instance and tracks its execution timing.', + ...taskWithTiming, + }, + timedTaskWithLimitedConcurrency: { + title: 'Task With Tracked Timings and Limited Concurrency', + maxConcurrency: 2, + timeout: '60s', + description: + 'A task that can only have two concurrent instance and tracks its execution timing.', + ...taskWithTiming, + }, + taskWhichExecutesOtherTasksEphemerally: { + title: 'Task Which Executes Other Tasks Ephemerally', + description: 'A sample task used to validate how ephemeral tasks are executed.', + maxAttempts: 1, + timeout: '60s', + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({ + async run() { + const { + params: { tasks = [] }, + } = taskInstance; + + const tm = await tmStart; + const executions = await Promise.all( + (tasks as EphemeralTask[]).map(async (task) => { + return tm + .ephemeralRunNow(task) + .then((result) => ({ + result, + })) + .catch((error) => ({ + error, + })); + }) + ); + + return { + state: { executions }, + }; + }, + }), + }, + }); + + taskManager.addMiddleware({ + async beforeSave({ taskInstance, ...opts }) { + const modifiedInstance = { + ...taskInstance, + params: { + originalParams: taskInstance.params, + superFly: 'My middleware param!', + }, + }; + + return { + ...opts, + taskInstance: modifiedInstance, + }; + }, + + async beforeRun({ taskInstance, ...opts }) { + return { + ...opts, + taskInstance: { + ...taskInstance, + params: taskInstance.params.originalParams, + }, + }; + }, + + async beforeMarkRunning(context) { + if (context.taskInstance?.params?.originalParams?.throwOnMarkAsRunning) { + throw new Error(`Sample task ${context.taskInstance.id} threw on MarkAsRunning`); + } + return context; + }, + }); + initRoutes(this.logger, core.http.createRouter(), this.taskManagerStart, taskTestingEvents); + } + + public start(core: CoreStart, { taskManager }: SampleTaskManagerFixtureStartDeps) { + this.taskManagerStart$.next(taskManager); + this.taskManagerStart$.complete(); + } + public stop() {} +} + +function millisecondsFromNow(ms: number) { + if (!ms) { + return; + } + + const dt = new Date(); + dt.setTime(dt.getTime() + ms); + return dt; +} + +const once = function (emitter: EventEmitter, event: string): Promise<Record<string, unknown>> { + return new Promise((resolve) => { + emitter.once(event, (data) => resolve(data || {})); + }); +}; + +function startTaskTimer(): () => { start: number; stop: number } { + const start = Date.now(); + return () => ({ start, stop: Date.now() }); +} diff --git a/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/tsconfig.json b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/tsconfig.json new file mode 100644 index 0000000000000..6038cbe20ae83 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../../../../../tsconfig.base.json", + "compilerOptions": { + "outDir": "target/types" + }, + "include": [ + "**/*.ts", + "**/*.tsx", + ], + "exclude": [ + "target/**/*", + ], + "kbn_references": [ + "@kbn/core", + "@kbn/task-manager-plugin", + "@kbn/config-schema", + ] +} diff --git a/x-pack/test/task_manager_claimer_mget/services.ts b/x-pack/test/task_manager_claimer_mget/services.ts new file mode 100644 index 0000000000000..59a0aff5ec00e --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/services.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { services } from '../api_integration/services'; diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/background_task_utilization_route.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/background_task_utilization_route.ts new file mode 100644 index 0000000000000..9c9dcbbe15126 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/background_task_utilization_route.ts @@ -0,0 +1,103 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import url from 'url'; +import supertest from 'supertest'; +import { MonitoredUtilization } from '@kbn/task-manager-plugin/server/routes/background_task_utilization'; +import { MonitoredStat } from '@kbn/task-manager-plugin/server/monitoring/monitoring_stats_stream'; +import { BackgroundTaskUtilizationStat } from '@kbn/task-manager-plugin/server/monitoring/background_task_utilization_statistics'; +import { FtrProviderContext } from '../../ftr_provider_context'; + +export default function ({ getService }: FtrProviderContext) { + const config = getService('config'); + const retry = getService('retry'); + const request = supertest(url.format(config.get('servers.kibana'))); + + const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + + function getUtilizationRequest(isInternal: boolean = true) { + return request + .get(`/${isInternal ? 'internal' : 'api'}/task_manager/_background_task_utilization`) + .set('kbn-xsrf', 'foo'); + } + + function getUtilization(isInternal: boolean = true): Promise<MonitoredUtilization> { + return getUtilizationRequest(isInternal) + .expect(200) + .then((response) => response.body); + } + + function getBackgroundTaskUtilization(isInternal: boolean = true): Promise<MonitoredUtilization> { + return retry.try(async () => { + const utilization = await getUtilization(isInternal); + + if (utilization.stats) { + return utilization; + } + + await delay(500); + throw new Error('Stats have not run yet'); + }); + } + + describe('background task utilization', () => { + it('should return the task manager background task utilization for recurring stats', async () => { + const { + value: { + recurring: { ran }, + }, + } = (await getBackgroundTaskUtilization(true)) + .stats as MonitoredStat<BackgroundTaskUtilizationStat>; + const serviceTime = ran.service_time; + expect(typeof serviceTime.actual).to.eql('number'); + expect(typeof serviceTime.adjusted).to.eql('number'); + expect(typeof serviceTime.task_counter).to.eql('number'); + }); + + it('should return the task manager background task utilization for adhoc stats', async () => { + const { + value: { + adhoc: { created, ran }, + }, + } = (await getBackgroundTaskUtilization(true)) + .stats as MonitoredStat<BackgroundTaskUtilizationStat>; + const serviceTime = ran.service_time; + expect(typeof created.counter).to.eql('number'); + + expect(typeof serviceTime.actual).to.eql('number'); + expect(typeof serviceTime.adjusted).to.eql('number'); + expect(typeof serviceTime.task_counter).to.eql('number'); + }); + + it('should include load stat', async () => { + const { + value: { load }, + } = (await getBackgroundTaskUtilization(true)) + .stats as MonitoredStat<BackgroundTaskUtilizationStat>; + expect(typeof load).to.eql('number'); + }); + + it('should return expected fields for internal route', async () => { + const monitoredStat = (await getBackgroundTaskUtilization(true)).stats; + expect(monitoredStat?.timestamp).not.to.be(undefined); + expect(monitoredStat?.value).not.to.be(undefined); + expect(monitoredStat?.value?.adhoc).not.to.be(undefined); + expect(monitoredStat?.value?.recurring).not.to.be(undefined); + expect(monitoredStat?.value?.load).not.to.be(undefined); + }); + + it('should return expected fields for public route', async () => { + const monitoredStat = (await getBackgroundTaskUtilization(false)).stats; + expect(monitoredStat?.timestamp).not.to.be(undefined); + expect(monitoredStat?.value).not.to.be(undefined); + expect(monitoredStat?.value?.adhoc).to.be(undefined); + expect(monitoredStat?.value?.recurring).to.be(undefined); + expect(monitoredStat?.value?.load).not.to.be(undefined); + }); + }); +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/health_route.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/health_route.ts new file mode 100644 index 0000000000000..066a004df3814 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/health_route.ts @@ -0,0 +1,339 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import url from 'url'; +import { keyBy, mapValues } from 'lodash'; +import supertest from 'supertest'; +import { ConcreteTaskInstance } from '@kbn/task-manager-plugin/server'; +import { FtrProviderContext } from '../../ftr_provider_context'; + +interface MonitoringStats { + last_update: string; + status: string; + stats: { + configuration: { + timestamp: string; + value: Record<string, object>; + }; + workload: { + timestamp: string; + value: { + count: number; + task_types: Record<string, object>; + schedule: Array<[string, number]>; + overdue: number; + non_recurring: number; + owner_ids: number; + estimated_schedule_density: number[]; + capacity_requirements: { + per_minute: number; + per_hour: number; + per_day: number; + }; + }; + }; + runtime: { + timestamp: string; + value: { + drift: Record<string, object>; + drift_by_type: Record<string, Record<string, object>>; + load: Record<string, object>; + execution: { + duration: Record<string, Record<string, object>>; + persistence: Record<string, number>; + result_frequency_percent_as_number: Record<string, Record<string, object>>; + }; + polling: { + last_successful_poll: string; + last_polling_delay: string; + duration: Record<string, object>; + claim_duration: Record<string, object>; + result_frequency_percent_as_number: Record<string, number>; + }; + }; + }; + capacity_estimation: { + timestamp: string; + value: { + observed: { + observed_kibana_instances: number; + max_throughput_per_minute: number; + max_throughput_per_minute_per_kibana: number; + minutes_to_drain_overdue: number; + avg_required_throughput_per_minute: number; + avg_required_throughput_per_minute_per_kibana: number; + avg_recurring_required_throughput_per_minute: number; + avg_recurring_required_throughput_per_minute_per_kibana: number; + }; + proposed: { + min_required_kibana: number; + avg_recurring_required_throughput_per_minute_per_kibana: number; + avg_required_throughput_per_minute: number; + avg_required_throughput_per_minute_per_kibana: number; + }; + }; + }; + }; +} + +export default function ({ getService }: FtrProviderContext) { + const config = getService('config'); + const retry = getService('retry'); + const request = supertest(url.format(config.get('servers.kibana'))); + + const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + + function getHealthRequest() { + return request.get('/api/task_manager/_health').set('kbn-xsrf', 'foo'); + } + + function getHealth(): Promise<MonitoringStats> { + return getHealthRequest() + .expect(200) + .then((response) => response.body); + } + + function getHealthForSampleTask(): Promise<MonitoringStats> { + return retry.try(async () => { + const health = await getHealth(); + + // only return health stats once they contain sampleTask, if requested + if (health.stats.runtime.value.drift_by_type.sampleTask) { + return health; + } + + // if sampleTask is not in the metrics, wait a bit and retry + await delay(500); + throw new Error('sampleTask has not yet run'); + }); + } + + function scheduleTask(task: Partial<ConcreteTaskInstance>): Promise<ConcreteTaskInstance> { + return request + .post('/api/sample_tasks/schedule') + .set('kbn-xsrf', 'xxx') + .send({ task }) + .expect(200) + .then((response: { body: ConcreteTaskInstance }) => response.body); + } + + const monitoredAggregatedStatsRefreshRate = 5000; + + describe('health', () => { + it('should return basic configuration of task manager', async () => { + const health = await getHealth(); + expect(health.status).to.eql('OK'); + expect(health.stats.configuration.value).to.eql({ + poll_interval: 3000, + monitored_aggregated_stats_refresh_rate: monitoredAggregatedStatsRefreshRate, + monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + custom: {}, + default: { + error_threshold: 90, + warn_threshold: 80, + }, + }, + request_capacity: 1000, + max_workers: 10, + }); + }); + + it('should return the task manager workload', async () => { + const health = await getHealth(); + const { + status, + stats: { workload }, + } = health; + + expect(status).to.eql('OK'); + + const sumSampleTaskInWorkload = + ( + workload.value.task_types as { + sampleTask?: { count: number }; + } + ).sampleTask?.count ?? 0; + const scheduledWorkload = mapValues( + keyBy(workload.value.schedule as Array<[string, number]>, ([interval, count]) => interval), + ([, count]) => count + ) as unknown as { '37m': number | undefined; '37s': number | undefined }; + + await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '37s' }, + }); + + await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '37m' }, + }); + + await retry.try(async () => { + // workload is configured to refresh every 5s in FTs + await delay(monitoredAggregatedStatsRefreshRate); + + const workloadAfterScheduling = (await getHealthForSampleTask()).stats.workload.value; + + expect( + (workloadAfterScheduling.task_types as { sampleTask: { count: number } }).sampleTask.count + ).to.eql(sumSampleTaskInWorkload + 2); + + const schedulesWorkloadAfterScheduling = mapValues( + keyBy( + workloadAfterScheduling.schedule as Array<[string, number]>, + ([interval]) => interval + ), + ([, count]) => count + ) as unknown as { + '37m': number; + '37s': number; + }; + expect(schedulesWorkloadAfterScheduling['37s']).to.eql(1 + (scheduledWorkload['37s'] ?? 0)); + expect(schedulesWorkloadAfterScheduling['37m']).to.eql(1 + (scheduledWorkload['37m'] ?? 0)); + }); + }); + + it('should return a breakdown of idleTasks in the task manager workload', async () => { + const { + capacity_estimation: { + value: { observed, proposed }, + }, + } = (await getHealth()).stats; + + expect(typeof observed.observed_kibana_instances).to.eql('number'); + expect(typeof observed.max_throughput_per_minute).to.eql('number'); + expect(typeof observed.max_throughput_per_minute_per_kibana).to.eql('number'); + expect(typeof observed.minutes_to_drain_overdue).to.eql('number'); + expect(typeof observed.avg_required_throughput_per_minute).to.eql('number'); + expect(typeof observed.avg_required_throughput_per_minute_per_kibana).to.eql('number'); + expect(typeof observed.avg_recurring_required_throughput_per_minute).to.eql('number'); + expect(typeof observed.avg_recurring_required_throughput_per_minute_per_kibana).to.eql( + 'number' + ); + + expect(typeof proposed.min_required_kibana).to.eql('number'); + expect(typeof proposed.avg_recurring_required_throughput_per_minute_per_kibana).to.eql( + 'number' + ); + expect(typeof proposed.avg_required_throughput_per_minute_per_kibana).to.eql('number'); + }); + + it('should return an estimation of task manager capacity as an array', async () => { + const { + workload: { value: workload }, + } = (await getHealth()).stats; + + expect(typeof workload.overdue).to.eql('number'); + + expect(typeof workload.non_recurring).to.eql('number'); + expect(typeof workload.owner_ids).to.eql('number'); + + expect(typeof workload.capacity_requirements.per_minute).to.eql('number'); + expect(typeof workload.capacity_requirements.per_hour).to.eql('number'); + expect(typeof workload.capacity_requirements.per_day).to.eql('number'); + + expect(Array.isArray(workload.estimated_schedule_density)).to.eql(true); + }); + + it('should return the task manager runtime stats', async () => { + await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '5s' }, + }); + + const { + runtime: { + // eslint-disable-next-line @typescript-eslint/naming-convention + value: { drift, drift_by_type, load, polling, execution }, + }, + } = (await getHealthForSampleTask()).stats; + + expect(isNaN(Date.parse(polling.last_successful_poll as string))).to.eql(false); + expect(isNaN(Date.parse(polling.last_polling_delay as string))).to.eql(false); + expect(typeof polling.result_frequency_percent_as_number.NoTasksClaimed).to.eql('number'); + expect(typeof polling.result_frequency_percent_as_number.RanOutOfCapacity).to.eql('number'); + expect(typeof polling.result_frequency_percent_as_number.PoolFilled).to.eql('number'); + expect(typeof polling.result_frequency_percent_as_number.NoAvailableWorkers).to.eql('number'); + expect(typeof polling.result_frequency_percent_as_number.RunningAtCapacity).to.eql('number'); + expect(typeof polling.result_frequency_percent_as_number.Failed).to.eql('number'); + + expect(typeof polling.duration.p50).to.eql('number'); + expect(typeof polling.duration.p90).to.eql('number'); + expect(typeof polling.duration.p95).to.eql('number'); + expect(typeof polling.duration.p99).to.eql('number'); + + expect(typeof polling.claim_duration.p50).to.eql('number'); + expect(typeof polling.claim_duration.p90).to.eql('number'); + expect(typeof polling.claim_duration.p95).to.eql('number'); + expect(typeof polling.claim_duration.p99).to.eql('number'); + + expect(typeof drift.p50).to.eql('number'); + expect(typeof drift.p90).to.eql('number'); + expect(typeof drift.p95).to.eql('number'); + expect(typeof drift.p99).to.eql('number'); + + expect(typeof drift_by_type.sampleTask.p50).to.eql('number'); + expect(typeof drift_by_type.sampleTask.p90).to.eql('number'); + expect(typeof drift_by_type.sampleTask.p95).to.eql('number'); + expect(typeof drift_by_type.sampleTask.p99).to.eql('number'); + + expect(typeof load.p50).to.eql('number'); + expect(typeof load.p90).to.eql('number'); + expect(typeof load.p95).to.eql('number'); + expect(typeof load.p99).to.eql('number'); + + expect(typeof execution.duration.sampleTask.p50).to.eql('number'); + expect(typeof execution.duration.sampleTask.p90).to.eql('number'); + expect(typeof execution.duration.sampleTask.p95).to.eql('number'); + expect(typeof execution.duration.sampleTask.p99).to.eql('number'); + + expect(typeof execution.persistence.ephemeral).to.eql('number'); + expect(typeof execution.persistence.non_recurring).to.eql('number'); + expect(typeof execution.persistence.recurring).to.eql('number'); + + expect(typeof execution.result_frequency_percent_as_number.sampleTask.Success).to.eql( + 'number' + ); + expect(typeof execution.result_frequency_percent_as_number.sampleTask.RetryScheduled).to.eql( + 'number' + ); + expect(typeof execution.result_frequency_percent_as_number.sampleTask.Failed).to.eql( + 'number' + ); + }); + + it('should exclude disabled tasks', async () => { + const interval = '9s'; + await scheduleTask({ + enabled: false, + taskType: 'taskToDisable', + schedule: { interval }, + }); + + const timestamp = new Date(); + + const health = await retry.try(async () => { + const result = await getHealth(); + expect(new Date(result.stats.runtime.timestamp).getTime()).to.be.greaterThan( + timestamp.getTime() + ); + expect(new Date(result.stats.workload.timestamp).getTime()).to.be.greaterThan( + timestamp.getTime() + ); + return result; + }); + + expect(health.stats.runtime.value.execution.duration.taskToDisable).to.eql(undefined); + expect(health.stats.workload.value.task_types.taskToDisable).to.eql(undefined); + expect(health.stats.workload.value.schedule.find(([val]) => val === interval)).to.eql( + undefined + ); + }); + }); +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/index.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/index.ts new file mode 100644 index 0000000000000..66ba9a0108afc --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/index.ts @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { FtrProviderContext } from '../../ftr_provider_context'; + +export default function ({ loadTestFile }: FtrProviderContext) { + describe('task_manager with mget task claimer', function taskManagerSuite() { + loadTestFile(require.resolve('./task_priority')); + loadTestFile(require.resolve('./background_task_utilization_route')); + loadTestFile(require.resolve('./metrics_route')); + loadTestFile(require.resolve('./health_route')); + loadTestFile(require.resolve('./task_management')); + loadTestFile(require.resolve('./task_management_scheduled_at')); + loadTestFile(require.resolve('./task_management_removed_types')); + + loadTestFile(require.resolve('./migrations')); + }); +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/metrics_route.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/metrics_route.ts new file mode 100644 index 0000000000000..4fad194abf368 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/metrics_route.ts @@ -0,0 +1,328 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import url from 'url'; +import supertest from 'supertest'; +import { NodeMetrics } from '@kbn/task-manager-plugin/server/routes/metrics'; +import { ALERTING_CASES_SAVED_OBJECT_INDEX } from '@kbn/core-saved-objects-server'; +import { FtrProviderContext } from '../../ftr_provider_context'; + +export default function ({ getService }: FtrProviderContext) { + const config = getService('config'); + const retry = getService('retry'); + const request = supertest(url.format(config.get('servers.kibana'))); + const es = getService('es'); + + const delay = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + + function getMetricsRequest(reset: boolean = false) { + return request + .get(`/api/task_manager/metrics${reset ? '' : '?reset=false'}`) + .set('kbn-xsrf', 'foo') + .expect(200) + .then((response) => response.body); + } + + function getMetrics( + reset: boolean = false, + callback?: (metrics: NodeMetrics) => boolean + ): Promise<NodeMetrics> { + return retry.try(async () => { + const metrics = await getMetricsRequest(reset); + + if (metrics.metrics) { + if ((callback && callback(metrics)) || !callback) { + return metrics; + } + } + + await delay(500); + throw new Error('Expected metrics not received'); + }); + } + + describe('task manager metrics', () => { + describe('task claim', () => { + it('should increment task claim success/total counters', async () => { + // counters are reset every 30 seconds, so wait until the start of a + // fresh counter cycle to make sure values are incrementing + const initialMetrics = ( + await getMetrics(false, (metrics) => metrics?.metrics?.task_claim?.value.total === 1) + ).metrics; + expect(initialMetrics).not.to.be(null); + expect(initialMetrics?.task_claim).not.to.be(null); + expect(initialMetrics?.task_claim?.value).not.to.be(null); + + let previousTaskClaimSuccess = initialMetrics?.task_claim?.value.total!; + let previousTaskClaimTotal = initialMetrics?.task_claim?.value.success!; + let previousTaskClaimTimestamp: string = initialMetrics?.task_claim?.timestamp!; + + for (let i = 0; i < 5; ++i) { + const metrics = ( + await getMetrics( + false, + (m: NodeMetrics) => m.metrics?.task_claim?.timestamp !== previousTaskClaimTimestamp + ) + ).metrics; + expect(metrics).not.to.be(null); + expect(metrics?.task_claim).not.to.be(null); + expect(metrics?.task_claim?.value).not.to.be(null); + + expect(metrics?.task_claim?.value.success).to.be.greaterThan(previousTaskClaimSuccess); + expect(metrics?.task_claim?.value.total).to.be.greaterThan(previousTaskClaimTotal); + + previousTaskClaimTimestamp = metrics?.task_claim?.timestamp!; + previousTaskClaimSuccess = metrics?.task_claim?.value.success!; + previousTaskClaimTotal = metrics?.task_claim?.value.total!; + + // check that duration histogram exists + expect(metrics?.task_claim?.value.duration).not.to.be(null); + expect(Array.isArray(metrics?.task_claim?.value.duration.counts)).to.be(true); + expect(Array.isArray(metrics?.task_claim?.value.duration.values)).to.be(true); + } + }); + + it('should reset task claim success/total counters at an interval', async () => { + const initialCounterValue = 7; + const initialMetrics = ( + await getMetrics( + false, + (metrics) => metrics?.metrics?.task_claim?.value.total === initialCounterValue + ) + ).metrics; + expect(initialMetrics).not.to.be(null); + expect(initialMetrics?.task_claim).not.to.be(null); + expect(initialMetrics?.task_claim?.value).not.to.be(null); + + // retry until counter value resets + const resetMetrics = ( + await getMetrics(false, (m: NodeMetrics) => m?.metrics?.task_claim?.value.total === 1) + ).metrics; + expect(resetMetrics).not.to.be(null); + expect(resetMetrics?.task_claim).not.to.be(null); + expect(resetMetrics?.task_claim?.value).not.to.be(null); + }); + + it('should increment task run framework_error counter', async () => { + const initialCounterValue = 1; + const initialMetrics = ( + await getMetrics( + false, + (metrics) => metrics?.metrics?.task_claim?.value.total === initialCounterValue + ) + ).metrics; + expect(initialMetrics).not.to.be(null); + expect(initialMetrics?.task_claim).not.to.be(null); + expect(initialMetrics?.task_claim?.value).not.to.be(null); + + let previousTaskClaimTimestamp: string = initialMetrics?.task_claim?.timestamp!; + + for (let i = 0; i < 5; ++i) { + const metrics = ( + await getMetrics( + true, + (m: NodeMetrics) => m.metrics?.task_claim?.timestamp !== previousTaskClaimTimestamp + ) + ).metrics; + expect(metrics).not.to.be(null); + expect(metrics?.task_claim).not.to.be(null); + expect(metrics?.task_claim?.value).not.to.be(null); + + expect(metrics?.task_claim?.value.success).to.equal(1); + expect(metrics?.task_claim?.value.total).to.equal(1); + + previousTaskClaimTimestamp = metrics?.task_claim?.timestamp!; + + // check that duration histogram exists + expect(metrics?.task_claim?.value.duration).not.to.be(null); + expect(Array.isArray(metrics?.task_claim?.value.duration.counts)).to.be(true); + expect(Array.isArray(metrics?.task_claim?.value.duration.values)).to.be(true); + } + }); + }); + + describe('task run', () => { + let ruleId: string | null = null; + before(async () => { + // create a rule that fires actions + const rule = await request + .post(`/api/alerting/rule`) + .set('kbn-xsrf', 'foo') + .send({ + enabled: true, + name: 'test rule', + tags: [], + rule_type_id: '.es-query', + consumer: 'alerts', + // set schedule long so we can control when it runs + schedule: { interval: '1d' }, + actions: [], + params: { + aggType: 'count', + esQuery: '{\n "query":{\n "match_all" : {}\n }\n}', + excludeHitsFromPreviousRun: false, + groupBy: 'all', + index: ['.kibana-event-log*'], + searchType: 'esQuery', + size: 100, + termSize: 5, + threshold: [0], + thresholdComparator: '>', + timeField: '@timestamp', + timeWindowSize: 5, + timeWindowUnit: 'm', + }, + }) + .expect(200) + .then((response) => response.body); + + ruleId = rule.id; + }); + + after(async () => { + // delete rule + await request.delete(`/api/alerting/rule/${ruleId}`).set('kbn-xsrf', 'foo').expect(204); + }); + + it('should increment task run success/not_timed_out/total counters', async () => { + const initialMetrics = ( + await getMetrics( + false, + (metrics) => + metrics?.metrics?.task_run?.value.by_type.alerting?.total === 1 && + metrics?.metrics?.task_run?.value.by_type.alerting?.not_timed_out === 1 && + metrics?.metrics?.task_run?.value.by_type.alerting?.success === 1 && + metrics?.metrics?.task_run?.value.by_type.alerting?.user_errors === 0 && + metrics?.metrics?.task_run?.value.by_type.alerting?.framework_errors === 0 + ) + ).metrics; + expect(initialMetrics).not.to.be(null); + expect(initialMetrics?.task_claim).not.to.be(null); + expect(initialMetrics?.task_claim?.value).not.to.be(null); + + for (let i = 0; i < 1; ++i) { + // run the rule and expect counters to increment + await request + .post('/api/sample_tasks/run_soon') + .set('kbn-xsrf', 'xxx') + .send({ task: { id: ruleId } }) + .expect(200); + + const metrics = ( + await getMetrics( + false, + (m) => + m?.metrics?.task_run?.value.by_type.alerting?.total === i + 2 && + m?.metrics?.task_run?.value.by_type.alerting?.not_timed_out === i + 2 && + m?.metrics?.task_run?.value.by_type.alerting?.success === i + 2 && + m?.metrics?.task_run?.value.by_type.alerting?.user_errors === 0 && + m?.metrics?.task_run?.value.by_type.alerting?.framework_errors === 0 + ) + ).metrics; + + // check that delay histogram exists + expect(metrics?.task_run?.value?.overall?.delay).not.to.be(null); + expect(Array.isArray(metrics?.task_run?.value?.overall?.delay.counts)).to.be(true); + expect(Array.isArray(metrics?.task_run?.value?.overall?.delay.values)).to.be(true); + } + + // counter should reset on its own + await getMetrics( + false, + (metrics) => + metrics?.metrics?.task_run?.value.by_type.alerting?.total === 0 && + metrics?.metrics?.task_run?.value.by_type.alerting?.not_timed_out === 0 && + metrics?.metrics?.task_run?.value.by_type.alerting?.success === 0 + ); + }); + + it('should increment task run framework_error counter', async () => { + // modify the rule to get it fire a decryption error + await es.updateByQuery({ + index: ALERTING_CASES_SAVED_OBJECT_INDEX, + body: { + script: { + lang: 'painless', + source: 'ctx._source.alert.params.foo = "bar"', + }, + query: { ids: { values: [`alert:${ruleId}`] } }, + }, + refresh: true, + conflicts: 'proceed', + }); + + // run the rule and expect counters to increment + await request + .post('/api/sample_tasks/run_soon') + .set('kbn-xsrf', 'xxx') + .send({ task: { id: ruleId } }) + .expect(200); + + const metrics = ( + await getMetrics(true, (m) => m?.metrics?.task_run?.value.overall.framework_errors! === 1) + ).metrics; + + const total = metrics?.task_run?.value.overall.total || 0; + const success = metrics?.task_run?.value.overall.success || 0; + + expect(total - success).to.be(1); + }); + + it('should increment task run user_errors counter', async () => { + // modify the rule to get it fire a validation error + await es.updateByQuery({ + index: ALERTING_CASES_SAVED_OBJECT_INDEX, + body: { + script: { + lang: 'painless', + source: 'ctx._source.alert.params.foo = "bar"', + }, + query: { ids: { values: [`alert:${ruleId}`] } }, + }, + refresh: true, + conflicts: 'proceed', + }); + + // update apiKey to fix decryption error + await request + .post(`/api/alerts/alert/${ruleId}/_update_api_key`) + .set('kbn-xsrf', 'xxx') + .expect(204); + + // run the rule and expect counters to increment + await request + .post('/api/sample_tasks/run_soon') + .set('kbn-xsrf', 'xxx') + .send({ task: { id: ruleId } }) + .expect(200); + + const metrics = ( + await getMetrics(true, (m) => m?.metrics?.task_run?.value.overall.user_errors! === 1) + ).metrics; + + const total = metrics?.task_run?.value.overall.total || 0; + const success = metrics?.task_run?.value.overall.success || 0; + + expect(total - success).to.be(1); + }); + }); + + describe('task overdue', () => { + it('histograms should exist', async () => { + const metrics = (await getMetrics(false)).metrics; + expect(metrics).not.to.be(null); + expect(metrics?.task_overdue).not.to.be(null); + expect(metrics?.task_overdue?.value).not.to.be(null); + expect(metrics?.task_overdue?.value.overall).not.to.be(null); + expect(metrics?.task_overdue?.value.overall.overdue_by).not.to.be(null); + expect(Array.isArray(metrics?.task_overdue?.value.overall.overdue_by.counts)).to.be(true); + expect(Array.isArray(metrics?.task_overdue?.value.overall.overdue_by.values)).to.be(true); + }); + }); + }); +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/migrations.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/migrations.ts new file mode 100644 index 0000000000000..8497f8bdc9678 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/migrations.ts @@ -0,0 +1,276 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import type { TransportResult } from '@elastic/elasticsearch'; +import { + ConcreteTaskInstance, + SerializedConcreteTaskInstance, + TaskInstanceWithDeprecatedFields, + TaskStatus, +} from '@kbn/task-manager-plugin/server/task'; +import { SavedObjectsUtils } from '@kbn/core/server'; +import type { RuleTaskState, WrappedLifecycleRuleState } from '@kbn/alerting-state-types'; +import { FtrProviderContext } from '../../../common/ftr_provider_context'; + +export default function createGetTests({ getService }: FtrProviderContext) { + const es = getService('es'); + const esArchiver = getService('esArchiver'); + const ALERT_ID = '0359d7fcc04da9878ee9aadbda38ba55'; + const ACTION_TASK_PARAMS_ID = '6e96ac5e648f57523879661ea72525b7'; + + describe('migrations', () => { + before(async () => { + await esArchiver.load('x-pack/test/functional/es_archives/task_manager_tasks'); + }); + + after(async () => { + await esArchiver.unload('x-pack/test/functional/es_archives/task_manager_tasks'); + }); + + it('8.0.0 migrates actions tasks with legacy id to saved object ids', async () => { + // NOTE: We hae to use elastic search directly against the ".kibana" index because alerts do not expose the references which we want to test exists + const response = await es.get<{ task: TaskInstanceWithDeprecatedFields }>( + { + index: '.kibana_task_manager', + id: 'task:be7e1250-3322-11eb-94c1-db6995e84f6a', + }, + { + meta: true, + } + ); + expect(response.statusCode).to.eql(200); + expect(response.body._source?.task.params).to.eql( + `{"spaceId":"user1","alertId":"${SavedObjectsUtils.getConvertedObjectId( + 'user1', + 'alert', + ALERT_ID + )}"}` + ); + }); + + it('8.0.0 migrates actions tasks from legacy id to saved object ids', async () => { + const searchResult: TransportResult< + estypes.SearchResponse<{ task: TaskInstanceWithDeprecatedFields }>, + unknown + > = await es.search( + { + index: '.kibana_task_manager', + body: { + query: { + term: { + _id: 'task:be7e1250-3322-11eb-94c1-db6995e8389f', + }, + }, + }, + }, + { meta: true } + ); + expect(searchResult.statusCode).to.equal(200); + expect((searchResult.body.hits.total as estypes.SearchTotalHits).value).to.equal(1); + const hit = searchResult.body.hits.hits[0]; + expect(hit!._source!.task.params!).to.equal( + `{"spaceId":"user1","actionTaskParamsId":"${SavedObjectsUtils.getConvertedObjectId( + 'user1', + 'action_task_params', + ACTION_TASK_PARAMS_ID + )}"}` + ); + }); + + it('8.2.0 migrates alerting tasks that has no schedule.interval', async () => { + const searchResult: TransportResult< + estypes.SearchResponse<{ task: ConcreteTaskInstance }>, + unknown + > = await es.search( + { + index: '.kibana_task_manager', + body: { + query: { + term: { + _id: 'task:d33d7590-8377-11ec-8c11-2dfe94229b95', + }, + }, + }, + }, + { meta: true } + ); + expect(searchResult.statusCode).to.equal(200); + expect((searchResult.body.hits.total as estypes.SearchTotalHits).value).to.equal(1); + const hit = searchResult.body.hits.hits[0]; + expect(hit!._source!.task.attempts).to.be(0); + expect(hit!._source!.task.status).to.be(TaskStatus.Idle); + }); + + it('8.2.0 migrates tasks with unrecognized status to idle if task type is removed', async () => { + const response = await es.get<{ task: ConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + id: 'task:ce7e1250-3322-11eb-94c1-db6995e84f6d', + }, + { + meta: true, + } + ); + expect(response.statusCode).to.eql(200); + expect(response.body._source?.task.taskType).to.eql( + `alerting:0359d7fcc04da9878ee9aadbda38ba55` + ); + expect(response.body._source?.task.status).to.eql(`idle`); + }); + + it('8.2.0 does not migrate tasks with unrecognized status if task type is valid', async () => { + const response = await es.get<{ task: ConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + id: 'task:fe7e1250-3322-11eb-94c1-db6395e84f6e', + }, + { + meta: true, + } + ); + expect(response.statusCode).to.eql(200); + expect(response.body._source?.task.taskType).to.eql(`sampleTaskRemovedType`); + expect(response.body._source?.task.status).to.eql(`unrecognized`); + }); + + it('8.5.0 migrates active tasks to set enabled to true', async () => { + const response = await es.search<{ task: ConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + size: 100, + body: { + query: { + match_all: {}, + }, + }, + }, + { + meta: true, + } + ); + expect(response.statusCode).to.eql(200); + const tasks = response.body.hits.hits; + tasks + .filter( + (task) => + task._source?.task.status !== 'failed' && task._source?.task.status !== 'unrecognized' + ) + .forEach((task) => { + expect(task._source?.task.enabled).to.eql(true); + }); + }); + + it('8.5.0 does not migrates failed and unrecognized', async () => { + const response = await es.search<{ task: ConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + size: 100, + body: { + query: { + match_all: {}, + }, + }, + }, + { + meta: true, + } + ); + expect(response.statusCode).to.eql(200); + const tasks = response.body.hits.hits; + tasks + .filter( + (task) => + task._source?.task.status === 'failed' || task._source?.task.status === 'unrecognized' + ) + .forEach((task) => { + expect(task._source?.task.enabled).to.be(undefined); + }); + }); + + describe('8.8.0', async () => { + it('adds UUIDs to all alerts', async () => { + const response = await es.search<{ task: SerializedConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + size: 100, + body: { query: { match_all: {} } }, + }, + { meta: true } + ); + expect(response.statusCode).to.eql(200); + const tasks = response.body.hits.hits; + tasks.forEach((task) => { + const stateString = task._source?.task.state; + expect(stateString).to.be.ok(); + const state: RuleTaskState = JSON.parse(stateString!); + const uuids = new Set<string>(); + + for (const alert of Object.values(state.alertInstances || {})) { + const uuid = alert?.meta?.uuid || 'uuid-is-missing'; + expect(uuid).to.match(/^.{8}-.{4}-.{4}-.{4}-.{12}$/); + expect(uuids.has(uuid)).to.be(false); + uuids.add(uuid); + } + + for (const alert of Object.values(state.alertRecoveredInstances || {})) { + const uuid = alert?.meta?.uuid || 'uuid-is-missing'; + expect(uuid).to.match(/^.{8}-.{4}-.{4}-.{4}-.{12}$/); + expect(uuids.has(uuid)).to.be(false); + uuids.add(uuid); + } + }); + }); + + it('copies UUIDs from rule registry wrapper to alerting framework', async () => { + const response = await es.search<{ task: SerializedConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + size: 100, + body: { query: { match_all: {} } }, + }, + { meta: true } + ); + expect(response.statusCode).to.eql(200); + const tasks = response.body.hits.hits; + tasks.forEach((task) => { + const stateString = task._source?.task.state; + expect(stateString).to.be.ok(); + + const state: RuleTaskState = JSON.parse(stateString!); + if (!state?.alertTypeState?.wrapped) return; + + const wrappedUUIDs = new Map<string, string>(); + const wrappedState = state.alertTypeState as WrappedLifecycleRuleState<any>; + + for (const alert of Object.values(wrappedState.trackedAlerts || {})) { + const id = alert.alertId; + const uuid = alert.alertUuid; + wrappedUUIDs.set(id, uuid); + } + + for (const alert of Object.values(wrappedState.trackedAlertsRecovered || {})) { + const id = alert.alertId; + const uuid = alert.alertUuid; + wrappedUUIDs.set(id, uuid); + } + + for (const [id, alert] of Object.entries(state.alertInstances || {})) { + const uuid = alert?.meta?.uuid || 'uuid-is-missing'; + expect(uuid).to.be(wrappedUUIDs.get(id)); + } + + for (const [id, alert] of Object.entries(state.alertRecoveredInstances || {})) { + const uuid = alert?.meta?.uuid || 'uuid-is-missing'; + expect(uuid).to.be(wrappedUUIDs.get(id)); + } + }); + }); + }); + }); +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management.ts new file mode 100644 index 0000000000000..c2d5e0edccf64 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management.ts @@ -0,0 +1,1170 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import moment from 'moment'; +import { random } from 'lodash'; +import expect from '@kbn/expect'; +import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import { taskMappings as TaskManagerMapping } from '@kbn/task-manager-plugin/server/saved_objects/mappings'; +import { ConcreteTaskInstance, BulkUpdateTaskResult } from '@kbn/task-manager-plugin/server'; +import { FtrProviderContext } from '../../ftr_provider_context'; + +const { properties: taskManagerIndexMapping } = TaskManagerMapping; + +export interface RawDoc { + _id: string; + _source: any; + _type?: string; +} +export interface SearchResults { + hits: { + hits: RawDoc[]; + }; +} + +type DeprecatedConcreteTaskInstance = Omit<ConcreteTaskInstance, 'schedule'> & { + interval: string; +}; + +type SerializedConcreteTaskInstance<State = string, Params = string> = Omit< + ConcreteTaskInstance, + 'state' | 'params' | 'scheduledAt' | 'startedAt' | 'retryAt' | 'runAt' +> & { + state: State; + params: Params; + scheduledAt: string; + startedAt: string | null; + retryAt: string | null; + runAt: string; +}; + +export default function ({ getService }: FtrProviderContext) { + const es = getService('es'); + const log = getService('log'); + const retry = getService('retry'); + const supertest = getService('supertest'); + const testHistoryIndex = '.kibana_task_manager_test_result'; + + describe('scheduling and running tasks', () => { + beforeEach(async () => { + // clean up before each test + return await supertest.delete('/api/sample_tasks').set('kbn-xsrf', 'xxx').expect(200); + }); + + beforeEach(async () => { + const exists = await es.indices.exists({ index: testHistoryIndex }); + if (exists) { + await es.deleteByQuery({ + index: testHistoryIndex, + refresh: true, + body: { query: { term: { type: 'task' } } }, + }); + } else { + await es.indices.create({ + index: testHistoryIndex, + body: { + mappings: { + properties: { + type: { + type: 'keyword', + }, + taskId: { + type: 'keyword', + }, + params: taskManagerIndexMapping.params, + state: taskManagerIndexMapping.state, + runAt: taskManagerIndexMapping.runAt, + } as Record<string, estypes.MappingProperty>, + }, + }, + }); + } + }); + + after(async () => { + // clean up after last test + return await supertest.delete('/api/sample_tasks').set('kbn-xsrf', 'xxx').expect(200); + }); + + function currentTasks<State = unknown, Params = unknown>(): Promise<{ + docs: Array<SerializedConcreteTaskInstance<State, Params>>; + }> { + return supertest + .get('/api/sample_tasks') + .expect(200) + .then((response) => response.body); + } + + function currentTask<State = unknown, Params = unknown>( + task: string + ): Promise<SerializedConcreteTaskInstance<State, Params>> { + return supertest + .get(`/api/sample_tasks/task/${task}`) + .send({ task }) + .expect((response) => { + expect(response.status).to.eql(200); + expect(typeof JSON.parse(response.text).id).to.eql(`string`); + }) + .then((response) => response.body); + } + + function currentTaskError<State = unknown, Params = unknown>( + task: string + ): Promise<{ + statusCode: number; + error: string; + message: string; + }> { + return supertest + .get(`/api/sample_tasks/task/${task}`) + .send({ task }) + .expect(function (response) { + expect(response.status).to.eql(200); + expect(typeof JSON.parse(response.text).message).to.eql(`string`); + }) + .then((response) => response.body); + } + + function ensureTasksIndexRefreshed() { + return supertest.get(`/api/ensure_tasks_index_refreshed`).send({}).expect(200); + } + + async function historyDocs(taskId?: string): Promise<RawDoc[]> { + return es + .search({ + index: testHistoryIndex, + body: { + query: { + term: { type: 'task' }, + }, + }, + }) + .then((result) => + (result as unknown as SearchResults).hits.hits.filter((task) => + taskId ? task._source?.taskId === taskId : true + ) + ); + } + + function scheduleTask( + task: Partial<ConcreteTaskInstance | DeprecatedConcreteTaskInstance> + ): Promise<SerializedConcreteTaskInstance> { + return supertest + .post('/api/sample_tasks/schedule') + .set('kbn-xsrf', 'xxx') + .send({ task }) + .expect(200) + .then((response: { body: SerializedConcreteTaskInstance }) => { + log.debug(`Task Scheduled: ${response.body.id}`); + return response.body; + }); + } + + function runTaskSoon(task: { id: string }) { + return supertest + .post('/api/sample_tasks/run_soon') + .set('kbn-xsrf', 'xxx') + .send({ task }) + .expect(200) + .then((response) => response.body); + } + + function bulkEnable(taskIds: string[], runSoon: boolean) { + return supertest + .post('/api/sample_tasks/bulk_enable') + .set('kbn-xsrf', 'xxx') + .send({ taskIds, runSoon }) + .expect(200) + .then((response) => response.body); + } + + function bulkDisable(taskIds: string[]) { + return supertest + .post('/api/sample_tasks/bulk_disable') + .set('kbn-xsrf', 'xxx') + .send({ taskIds }) + .expect(200) + .then((response) => response.body); + } + + function bulkUpdateSchedules(taskIds: string[], schedule: { interval: string }) { + return supertest + .post('/api/sample_tasks/bulk_update_schedules') + .set('kbn-xsrf', 'xxx') + .send({ taskIds, schedule }) + .expect(200) + .then((response: { body: BulkUpdateTaskResult }) => response.body); + } + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // function runEphemeralTaskNow(task: { + // taskType: string; + // params: Record<string, any>; + // state: Record<string, any>; + // }) { + // return supertest + // .post('/api/sample_tasks/ephemeral_run_now') + // .set('kbn-xsrf', 'xxx') + // .send({ task }) + // .expect(200) + // .then((response) => response.body); + // } + + function scheduleTaskIfNotExists(task: Partial<ConcreteTaskInstance>) { + return supertest + .post('/api/sample_tasks/ensure_scheduled') + .set('kbn-xsrf', 'xxx') + .send({ task }) + .expect(200) + .then((response: { body: ConcreteTaskInstance }) => response.body); + } + + function releaseTasksWaitingForEventToComplete(event: string) { + return supertest + .post('/api/sample_tasks/event') + .set('kbn-xsrf', 'xxx') + .send({ event }) + .expect(200); + } + + function getTaskById<State = unknown, Params = unknown>( + tasks: Array<SerializedConcreteTaskInstance<State, Params>>, + id: string + ) { + return tasks.filter((task) => task.id === id)[0]; + } + + async function provideParamsToTasksWaitingForParams( + taskId: string, + data: Record<string, unknown> = {} + ) { + // wait for task to start running and stall on waitForParams + await retry.try(async () => { + const tasks = (await currentTasks()).docs; + expect(getTaskById(tasks, taskId).status).to.eql('running'); + }); + + return supertest + .post('/api/sample_tasks/event') + .set('kbn-xsrf', 'xxx') + .send({ event: taskId, data }) + .expect(200); + } + + it('should support middleware', async () => { + const historyItem = random(1, 100); + + const scheduledTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '30m' }, + params: { historyItem }, + }); + log.debug(`Task created: ${scheduledTask.id}`); + + await retry.try(async () => { + expect((await historyDocs()).length).to.eql(1); + + const [task] = (await currentTasks<{ count: number }>()).docs; + log.debug(`Task found: ${task.id}`); + log.debug(`Task status: ${task.status}`); + log.debug(`Task state: ${JSON.stringify(task.state, null, 2)}`); + log.debug(`Task params: ${JSON.stringify(task.params, null, 2)}`); + + expect(task.state.count).to.eql(1); + + expect(task.params).to.eql({ + superFly: 'My middleware param!', + originalParams: { historyItem }, + }); + }); + }); + + it('should remove non-recurring tasks after they complete', async () => { + await scheduleTask({ + taskType: 'sampleTask', + params: {}, + }); + + await retry.try(async () => { + const history = await historyDocs(); + expect(history.length).to.eql(1); + expect((await currentTasks()).docs).to.eql([]); + }); + }); + + it('should use a given ID as the task document ID', async () => { + const result = await scheduleTask({ + id: 'test-task-for-sample-task-plugin-to-test-task-manager', + taskType: 'sampleTask', + params: {}, + }); + + expect(result.id).to.be('test-task-for-sample-task-plugin-to-test-task-manager'); + }); + + it('should allow a task with a given ID to be scheduled multiple times', async () => { + const result = await scheduleTaskIfNotExists({ + id: 'test-task-to-reschedule-in-task-manager', + taskType: 'sampleTask', + params: {}, + }); + + expect(result.id).to.be('test-task-to-reschedule-in-task-manager'); + + const rescheduleResult = await scheduleTaskIfNotExists({ + id: 'test-task-to-reschedule-in-task-manager', + taskType: 'sampleTask', + params: {}, + }); + + expect(rescheduleResult.id).to.be('test-task-to-reschedule-in-task-manager'); + }); + + it('should reschedule if task errors', async () => { + const task = await scheduleTask({ + taskType: 'sampleTask', + params: { failWith: 'Dangit!!!!!' }, + }); + + await retry.try(async () => { + const scheduledTask = await currentTask(task.id); + expect(scheduledTask.attempts).to.be.greaterThan(1); + expect(Date.parse(scheduledTask.runAt)).to.be.greaterThan( + Date.parse(task.runAt) + 30 * 1000 + ); + }); + }); + + it('should schedule the retry of recurring tasks to run at the next schedule when they time out', async () => { + const intervalInMinutes = 30; + const intervalInMilliseconds = intervalInMinutes * 60 * 1000; + const task = await scheduleTask({ + taskType: 'sampleRecurringTaskWhichHangs', + schedule: { interval: `${intervalInMinutes}m` }, + params: {}, + }); + + await retry.try(async () => { + const scheduledTask = await currentTask(task.id); + const retryAt = Date.parse(scheduledTask.retryAt!); + expect(isNaN(retryAt)).to.be(false); + + const buffer = 10000; // 10 second buffer + const retryDelay = retryAt - Date.parse(task.runAt); + expect(retryDelay).to.be.greaterThan(intervalInMilliseconds - buffer); + expect(retryDelay).to.be.lessThan(intervalInMilliseconds + buffer); + }); + }); + + it('should reschedule if task returns runAt', async () => { + const nextRunMilliseconds = random(60000, 200000); + const count = random(1, 20); + + const originalTask = await scheduleTask({ + taskType: 'sampleTask', + params: { nextRunMilliseconds }, + state: { count }, + }); + + await retry.try(async () => { + expect((await historyDocs(originalTask.id)).length).to.eql(1); + + const task = await currentTask<{ count: number }>(originalTask.id); + expect(task.attempts).to.eql(0); + expect(task.state.count).to.eql(count + 1); + + expectReschedule(Date.parse(originalTask.runAt), task, nextRunMilliseconds); + }); + }); + + it('should reschedule if task has an interval', async () => { + const interval = random(5, 200); + const intervalMilliseconds = interval * 60000; + + const originalTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: `${interval}m` }, + params: {}, + }); + + await retry.try(async () => { + expect((await historyDocs()).length).to.eql(1); + + const [task] = (await currentTasks<{ count: number }>()).docs; + expect(task.attempts).to.eql(0); + expect(task.state.count).to.eql(1); + + expectReschedule(Date.parse(originalTask.runAt), task, intervalMilliseconds); + }); + }); + + it('should support the deprecated interval field', async () => { + const interval = random(5, 200); + const intervalMilliseconds = interval * 60000; + + const originalTask = await scheduleTask({ + taskType: 'sampleTask', + interval: `${interval}m`, + params: {}, + }); + + await retry.try(async () => { + expect((await historyDocs()).length).to.eql(1); + + const [task] = (await currentTasks<{ count: number }>()).docs; + expect(task.attempts).to.eql(0); + expect(task.state.count).to.eql(1); + + expectReschedule(Date.parse(originalTask.runAt), task, intervalMilliseconds); + }); + }); + + it('should return a task run result when asked to run a task now', async () => { + const originalTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: `30m` }, + params: {}, + }); + + await retry.try(async () => { + const docs = await historyDocs(); + expect(docs.filter((taskDoc) => taskDoc._source.taskId === originalTask.id).length).to.eql( + 1 + ); + + const [task] = (await currentTasks<{ count: number }>()).docs.filter( + (taskDoc) => taskDoc.id === originalTask.id + ); + + expect(task.state.count).to.eql(1); + + // ensure this task shouldnt run for another half hour + expectReschedule(Date.parse(originalTask.runAt), task, 30 * 60000); + }); + + const now = Date.now(); + const runSoonResult = await runTaskSoon({ + id: originalTask.id, + }); + + expect(runSoonResult).to.eql({ id: originalTask.id }); + + await retry.try(async () => { + expect( + (await historyDocs()).filter((taskDoc) => taskDoc._source.taskId === originalTask.id) + .length + ).to.eql(2); + + const [task] = (await currentTasks<{ count: number }>()).docs.filter( + (taskDoc) => taskDoc.id === originalTask.id + ); + expect(task.state.count).to.eql(2); + + // ensure this task shouldnt run for another half hour + expectReschedule(now, task, 30 * 60000); + }); + }); + + // always failing + it.skip('should only run as many instances of a task as its maxConcurrency will allow', async () => { + // should run as there's only one and maxConcurrency on this TaskType is 1 + const firstWithSingleConcurrency = await scheduleTask({ + taskType: 'sampleTaskWithSingleConcurrency', + params: { + waitForEvent: 'releaseFirstWaveOfTasks', + }, + }); + + // should run as there's only two and maxConcurrency on this TaskType is 2 + const [firstLimitedConcurrency, secondLimitedConcurrency] = await Promise.all([ + scheduleTask({ + taskType: 'sampleTaskWithLimitedConcurrency', + params: { + waitForEvent: 'releaseFirstWaveOfTasks', + }, + }), + scheduleTask({ + taskType: 'sampleTaskWithLimitedConcurrency', + params: { + waitForEvent: 'releaseSecondWaveOfTasks', + }, + }), + ]); + + await retry.try(async () => { + expect((await historyDocs(firstWithSingleConcurrency.id)).length).to.eql(1); + expect((await historyDocs(firstLimitedConcurrency.id)).length).to.eql(1); + expect((await historyDocs(secondLimitedConcurrency.id)).length).to.eql(1); + }); + + // should not run as there one running and maxConcurrency on this TaskType is 1 + const secondWithSingleConcurrency = await scheduleTask({ + taskType: 'sampleTaskWithSingleConcurrency', + params: { + waitForEvent: 'releaseSecondWaveOfTasks', + }, + }); + + // should not run as there are two running and maxConcurrency on this TaskType is 2 + const thirdWithLimitedConcurrency = await scheduleTask({ + taskType: 'sampleTaskWithLimitedConcurrency', + params: { + waitForEvent: 'releaseSecondWaveOfTasks', + }, + }); + + // schedule a task that should get picked up before the two blocked tasks + const taskWithUnlimitedConcurrency = await scheduleTask({ + taskType: 'sampleTask', + params: {}, + }); + + await retry.try(async () => { + expect((await historyDocs(taskWithUnlimitedConcurrency.id)).length).to.eql(1); + expect((await currentTask(secondWithSingleConcurrency.id)).status).to.eql('idle'); + expect((await currentTask(thirdWithLimitedConcurrency.id)).status).to.eql('idle'); + }); + + // release the running SingleConcurrency task and only one of the LimitedConcurrency tasks + await releaseTasksWaitingForEventToComplete('releaseFirstWaveOfTasks'); + + await retry.try(async () => { + // ensure the completed tasks were deleted + expect((await currentTaskError(firstWithSingleConcurrency.id)).message).to.eql( + `Saved object [task/${firstWithSingleConcurrency.id}] not found` + ); + expect((await currentTaskError(firstLimitedConcurrency.id)).message).to.eql( + `Saved object [task/${firstLimitedConcurrency.id}] not found` + ); + + // ensure blocked tasks is still running + expect((await currentTask(secondLimitedConcurrency.id)).status).to.eql('running'); + + // ensure the blocked tasks begin running + expect((await currentTask(secondWithSingleConcurrency.id)).status).to.eql('running'); + expect((await currentTask(thirdWithLimitedConcurrency.id)).status).to.eql('running'); + }); + + // release blocked task + await releaseTasksWaitingForEventToComplete('releaseSecondWaveOfTasks'); + }); + + it('should increment attempts when task fails on markAsRunning', async () => { + const originalTask = await scheduleTask({ + taskType: 'sampleTask', + params: { throwOnMarkAsRunning: true }, + }); + + expect(originalTask.attempts).to.eql(0); + + // Wait for task manager to attempt running the task a second time + await retry.try(async () => { + const task = await currentTask(originalTask.id); + expect(task.attempts).to.eql(2); + }); + }); + + it('should return a task run error result when trying to run a non-existent task', async () => { + // runSoon should fail + const failedRunSoonResult = await runTaskSoon({ + id: 'i-dont-exist', + }); + expect(failedRunSoonResult).to.eql({ + error: `Error: Saved object [task/i-dont-exist] not found`, + id: 'i-dont-exist', + }); + }); + + it('should return a task run error result when trying to run a task now which is already running', async () => { + const longRunningTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '30m' }, + params: { + waitForParams: true, + }, + }); + + // tell the task to wait for the 'runSoonHasBeenAttempted' event + await provideParamsToTasksWaitingForParams(longRunningTask.id, { + waitForEvent: 'runSoonHasBeenAttempted', + }); + + await retry.try(async () => { + const docs = await historyDocs(); + expect( + docs.filter((taskDoc) => taskDoc._source.taskId === longRunningTask.id).length + ).to.eql(1); + + const task = await currentTask(longRunningTask.id); + expect(task.status).to.eql('running'); + }); + + await ensureTasksIndexRefreshed(); + + // first runSoon should fail + const failedRunSoonResult = await runTaskSoon({ + id: longRunningTask.id, + }); + + expect(failedRunSoonResult).to.eql({ + error: `Error: Failed to run task "${longRunningTask.id}" as it is currently running`, + id: longRunningTask.id, + }); + + // finish first run by emitting 'runSoonHasBeenAttempted' event + await releaseTasksWaitingForEventToComplete('runSoonHasBeenAttempted'); + await retry.try(async () => { + const tasks = (await currentTasks<{ count: number }>()).docs; + expect(getTaskById(tasks, longRunningTask.id).state.count).to.eql(1); + + const task = await currentTask(longRunningTask.id); + expect(task.status).to.eql('idle'); + }); + + await ensureTasksIndexRefreshed(); + + // second runSoon should be successful + const successfulRunSoonResult = runTaskSoon({ + id: longRunningTask.id, + }); + + await provideParamsToTasksWaitingForParams(longRunningTask.id); + + expect(await successfulRunSoonResult).to.eql({ id: longRunningTask.id }); + }); + + it('should disable and reenable task and run it when runSoon = true', async () => { + const historyItem = random(1, 100); + const scheduledTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '1h' }, + params: { historyItem }, + }); + + await retry.try(async () => { + expect((await historyDocs()).length).to.eql(1); + const task = await currentTask(scheduledTask.id); + + expect(task.enabled).to.eql(true); + }); + + await retry.try(async () => { + // disable the task + await bulkDisable([scheduledTask.id]); + const task = await currentTask(scheduledTask.id); + log.debug( + `bulkDisable:task(${scheduledTask.id}) enabled: ${task.enabled}, when runSoon = true` + ); + expect(task.enabled).to.eql(false); + }); + + // re-enable the task + await bulkEnable([scheduledTask.id], true); + + await retry.try(async () => { + const task = await currentTask(scheduledTask.id); + + expect(task.enabled).to.eql(true); + log.debug( + `bulkEnable:task(${scheduledTask.id}) enabled: ${task.enabled}, when runSoon = true` + ); + expect(Date.parse(task.scheduledAt)).to.be.greaterThan( + Date.parse(scheduledTask.scheduledAt) + ); + expect(Date.parse(task.runAt)).to.be.greaterThan(Date.parse(scheduledTask.runAt)); + }); + }); + + it('should disable and reenable task and not run it when runSoon = false', async () => { + const historyItem = random(1, 100); + const scheduledTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '1h' }, + params: { historyItem }, + }); + + await retry.try(async () => { + expect((await historyDocs()).length).to.eql(1); + + const task = await currentTask(scheduledTask.id); + expect(task.enabled).to.eql(true); + }); + + // disable the task + await bulkDisable([scheduledTask.id]); + + let disabledTask: SerializedConcreteTaskInstance; + await retry.try(async () => { + disabledTask = await currentTask(scheduledTask.id); + log.debug( + `bulkDisable:task(${scheduledTask.id}) enabled: ${disabledTask.enabled}, when runSoon = false` + ); + expect(disabledTask.enabled).to.eql(false); + }); + + // re-enable the task + await bulkEnable([scheduledTask.id], false); + + await retry.try(async () => { + const task = await currentTask(scheduledTask.id); + log.debug( + `bulkEnable:task(${scheduledTask.id}) enabled: ${task.enabled}, when runSoon = true` + ); + expect(task.enabled).to.eql(true); + expect(Date.parse(task.scheduledAt)).to.eql(Date.parse(disabledTask.scheduledAt)); + }); + }); + + function expectReschedule( + originalRunAt: number, + task: SerializedConcreteTaskInstance<any, any>, + expectedDiff: number + ) { + const buffer = 10000; + expect(Date.parse(task.runAt) - originalRunAt).to.be.greaterThan(expectedDiff - buffer); + expect(Date.parse(task.runAt) - originalRunAt).to.be.lessThan(expectedDiff + buffer); + } + + it('should run tasks in parallel, allowing for long running tasks along side faster tasks', async () => { + /** + * It's worth noting this test relies on the /event endpoint that forces Task Manager to hold off + * on completing a task until a call is made by the test suite. + * If we begin testing with multiple Kibana instacnes in Parallel this will likely become flaky. + * If you end up here because the test is flaky, this might be why. + */ + const fastTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: `1s` }, + params: {}, + }); + + const longRunningTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: `1s` }, + params: { + waitForEvent: 'rescheduleHasHappened', + }, + }); + + await retry.try(async () => { + const tasks = (await currentTasks<{ count: number }>()).docs; + expect(getTaskById(tasks, fastTask.id).state.count).to.eql(2); + }); + + await releaseTasksWaitingForEventToComplete('rescheduleHasHappened'); + + await retry.try(async () => { + const tasks = (await currentTasks<{ count: number }>()).docs; + + expect(getTaskById(tasks, fastTask.id).state.count).to.greaterThan(2); + expect(getTaskById(tasks, longRunningTask.id).state.count).to.eql(1); + }); + }); + + it('should delete the task if it is still running but maxAttempts has been reached', async () => { + await scheduleTask({ + taskType: 'sampleOneTimeTaskThrowingError', + params: {}, + }); + + await retry.try(async () => { + const results = (await currentTasks()).docs; + expect(results.length).to.eql(0); + }); + }); + + // flaky + it.skip('should continue claiming recurring task even if maxAttempts has been reached', async () => { + const task = await scheduleTask({ + taskType: 'sampleRecurringTaskTimingOut', + schedule: { interval: '1s' }, + params: {}, + }); + + await retry.try(async () => { + const [scheduledTask] = (await currentTasks()).docs; + expect(scheduledTask.id).to.eql(task.id); + expect(scheduledTask.status).to.eql('claiming'); + expect(scheduledTask.attempts).to.be.greaterThan(3); + }); + }); + + it('should fail to schedule recurring task with timeout override', async () => { + const task = await scheduleTask({ + taskType: 'sampleRecurringTaskTimingOut', + schedule: { interval: '1s' }, + timeoutOverride: '30s', + params: {}, + }); + + expect(task.timeoutOverride).to.be(undefined); + }); + + it('should allow timeout override for ad hoc tasks', async () => { + const task = await scheduleTask({ + taskType: 'sampleAdHocTaskTimingOut', + timeoutOverride: '30s', + params: {}, + }); + + expect(task.timeoutOverride).to.be('30s'); + + // this task type is set to time out after 1s but the task runner + // will wait 15 seconds and then index a document if it hasn't timed out + // this test overrides the timeout to 30s and checks if the expected + // document was indexed. presence of indexed document means the task + // timeout override was respected + await retry.try(async () => { + const [scheduledTask] = (await currentTasks()).docs; + expect(scheduledTask?.id).to.eql(task.id); + }); + + await retry.try(async () => { + const docs: RawDoc[] = await historyDocs(task.id); + expect(docs.length).to.eql(1); + expect(docs[0]._source.taskType).to.eql('sampleAdHocTaskTimingOut'); + }); + }); + + it('should bulk update schedules for multiple tasks', async () => { + const initialTime = Date.now(); + const tasks = await Promise.all([ + scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '1h' }, + params: {}, + }), + + scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: '5m' }, + params: {}, + }), + ]); + + const taskIds = tasks.map(({ id }) => id); + + await retry.try(async () => { + // ensure each task has ran at least once and been rescheduled for future run + for (const task of tasks) { + const { state } = await currentTask<{ count: number }>(task.id); + expect(state.count).to.be(1); + } + + // first task to be scheduled in 1h + expect(Date.parse((await currentTask(tasks[0].id)).runAt) - initialTime).to.be.greaterThan( + moment.duration(1, 'hour').asMilliseconds() + ); + + // second task to be scheduled in 5m + expect(Date.parse((await currentTask(tasks[1].id)).runAt) - initialTime).to.be.greaterThan( + moment.duration(5, 'minutes').asMilliseconds() + ); + }); + + await retry.try(async () => { + const updates = await bulkUpdateSchedules(taskIds, { interval: '3h' }); + + expect(updates.tasks.length).to.be(2); + expect(updates.errors.length).to.be(0); + }); + + await retry.try(async () => { + const updatedTasks = (await currentTasks()).docs; + + updatedTasks.forEach((task) => { + expect(task.schedule).to.eql({ interval: '3h' }); + // should be scheduled to run in 3 hours + expect(Date.parse(task.runAt) - initialTime).to.be.greaterThan( + moment.duration(3, 'hours').asMilliseconds() + ); + }); + }); + }); + + it('should not bulk update schedules for task in running status', async () => { + // this task should be in running status for 60s until it will be time outed + const longRunningTask = await scheduleTask({ + taskType: 'sampleRecurringTaskWhichHangs', + schedule: { interval: '1h' }, + params: {}, + }); + + runTaskSoon({ id: longRunningTask.id }); + + let scheduledRunAt: string; + // ensure task is running and store scheduled runAt + await retry.try(async () => { + const task = await currentTask(longRunningTask.id); + + expect(task.status).to.be('running'); + + scheduledRunAt = task.runAt; + }); + + await retry.try(async () => { + const updates = await bulkUpdateSchedules([longRunningTask.id], { interval: '3h' }); + + // length should be 0, as task in running status won't be updated + expect(updates.tasks.length).to.be(0); + expect(updates.errors.length).to.be(0); + }); + + // ensure task wasn't updated + await retry.try(async () => { + const task = await currentTask(longRunningTask.id); + + // interval shouldn't be changed + expect(task.schedule).to.eql({ interval: '1h' }); + + // scheduledRunAt shouldn't be changed + expect(task.runAt).to.eql(scheduledRunAt); + }); + }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('should return the resulting task state when asked to run an ephemeral task now', async () => { + // const ephemeralTask = await runEphemeralTaskNow({ + // taskType: 'sampleTask', + // params: {}, + // state: {}, + // }); + + // await retry.try(async () => { + // expect( + // (await historyDocs()).filter((taskDoc) => taskDoc._source.taskId === ephemeralTask.id) + // .length + // ).to.eql(1); + + // expect(ephemeralTask.state.count).to.eql(1); + // }); + + // const secondEphemeralTask = await runEphemeralTaskNow({ + // taskType: 'sampleTask', + // params: {}, + // // pass state from previous ephemeral run as input for the second run + // state: ephemeralTask.state, + // }); + + // // ensure state is cumulative + // expect(secondEphemeralTask.state.count).to.eql(2); + + // await retry.try(async () => { + // // ensure new id is produced for second task execution + // expect( + // (await historyDocs()).filter((taskDoc) => taskDoc._source.taskId === ephemeralTask.id) + // .length + // ).to.eql(1); + // expect( + // (await historyDocs()).filter( + // (taskDoc) => taskDoc._source.taskId === secondEphemeralTask.id + // ).length + // ).to.eql(1); + // }); + // }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('Epheemral task run should only run one instance of a task if its maxConcurrency is 1', async () => { + // const ephemeralTaskWithSingleConcurrency: { + // state: { + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>; + // }; + // } = await runEphemeralTaskNow({ + // taskType: 'taskWhichExecutesOtherTasksEphemerally', + // params: { + // tasks: [ + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // ], + // }, + // state: {}, + // }); + + // ensureOverlappingTasksDontExceedThreshold( + // ephemeralTaskWithSingleConcurrency.state.executions, + // // make sure each task intersects with any other task + // 0 + // ); + // }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('Ephemeral task run should only run as many instances of a task as its maxConcurrency will allow', async () => { + // const ephemeralTaskWithSingleConcurrency: { + // state: { + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>; + // }; + // } = await runEphemeralTaskNow({ + // taskType: 'taskWhichExecutesOtherTasksEphemerally', + // params: { + // tasks: [ + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // ], + // }, + // state: {}, + // }); + + // ensureOverlappingTasksDontExceedThreshold( + // ephemeralTaskWithSingleConcurrency.state.executions, + // // make sure each task intersects with, at most, 1 other task + // 1 + // ); + // }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('Ephemeral task executions cant exceed the max workes in Task Manager', async () => { + // const ephemeralTaskWithSingleConcurrency: { + // state: { + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>; + // }; + // } = await runEphemeralTaskNow({ + // taskType: 'taskWhichExecutesOtherTasksEphemerally', + // params: { + // tasks: times(20, () => ({ + // taskType: 'timedTask', + // params: { delay: 100 }, + // state: {}, + // })), + // }, + // state: {}, + // }); + + // ensureOverlappingTasksDontExceedThreshold( + // ephemeralTaskWithSingleConcurrency.state.executions, + // // make sure each task intersects with, at most, 9 other tasks (as max workes is 10) + // 9 + // ); + // }); + }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // function ensureOverlappingTasksDontExceedThreshold( + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>, + // threshold: number + // ) { + // const executionRanges = executions.map((execution) => ({ + // id: execution.result.id, + // range: range( + // // calculate range of milliseconds + // // in which the task was running (that should be good enough) + // execution.result.state.timings[0].start, + // execution.result.state.timings[0].stop + // ), + // })); + + // const intersections = new Map<string, string[]>(); + // for (const currentExecution of executionRanges) { + // for (const executionToComparteTo of executionRanges) { + // if (currentExecution.id !== executionToComparteTo.id) { + // // find all executions that intersect + // if (intersection(currentExecution.range, executionToComparteTo.range).length) { + // intersections.set(currentExecution.id, [ + // ...(intersections.get(currentExecution.id) ?? []), + // executionToComparteTo.id, + // ]); + // } + // } + // } + // } + + // const tooManyIntersectingTasks = [...intersections.entries()].find( + // // make sure each task intersects with, at most, threshold of other task + // ([, intersectingTasks]) => intersectingTasks.length > threshold + // ); + // if (tooManyIntersectingTasks) { + // throw new Error( + // `Invalid execution found: ${tooManyIntersectingTasks[0]} overlaps with ${tooManyIntersectingTasks[1]}` + // ); + // } + // } +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_removed_types.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_removed_types.ts new file mode 100644 index 0000000000000..e13615cceab0c --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_removed_types.ts @@ -0,0 +1,108 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import url from 'url'; +import supertest from 'supertest'; +import { ConcreteTaskInstance } from '@kbn/task-manager-plugin/server'; +import { FtrProviderContext } from '../../ftr_provider_context'; + +export interface RawDoc { + _id: string; + _source: any; + _type?: string; +} +export interface SearchResults { + hits: { + hits: RawDoc[]; + }; +} + +type DeprecatedConcreteTaskInstance = Omit<ConcreteTaskInstance, 'schedule'> & { + interval: string; +}; + +type SerializedConcreteTaskInstance<State = string, Params = string> = Omit< + ConcreteTaskInstance, + 'state' | 'params' | 'scheduledAt' | 'startedAt' | 'retryAt' | 'runAt' +> & { + state: State; + params: Params; + scheduledAt: string; + startedAt: string | null; + retryAt: string | null; + runAt: string; +}; + +export default function ({ getService }: FtrProviderContext) { + const esArchiver = getService('esArchiver'); + const retry = getService('retry'); + const config = getService('config'); + const request = supertest(url.format(config.get('servers.kibana'))); + + const UNREGISTERED_TASK_TYPE_ID = 'ce7e1250-3322-11eb-94c1-db6995e83f6b'; + const REMOVED_TASK_TYPE_ID = 'be7e1250-3322-11eb-94c1-db6995e83f6a'; + + describe('not registered task types', () => { + before(async () => { + await esArchiver.load('x-pack/test/functional/es_archives/task_manager_removed_types'); + }); + + after(async () => { + await esArchiver.unload('x-pack/test/functional/es_archives/task_manager_removed_types'); + }); + + function scheduleTask( + task: Partial<ConcreteTaskInstance | DeprecatedConcreteTaskInstance> + ): Promise<SerializedConcreteTaskInstance> { + return request + .post('/api/sample_tasks/schedule') + .set('kbn-xsrf', 'xxx') + .send({ task }) + .expect(200) + .then((response: { body: SerializedConcreteTaskInstance }) => response.body); + } + + function currentTasks<State = unknown, Params = unknown>(): Promise<{ + docs: Array<SerializedConcreteTaskInstance<State, Params>>; + }> { + return request + .get('/api/sample_tasks') + .expect(200) + .then((response) => response.body); + } + + // flaky + it.skip('should successfully schedule registered tasks, not claim unregistered tasks and mark removed task types as unrecognized', async () => { + const scheduledTask = await scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: `1s` }, + params: {}, + }); + + await retry.try(async () => { + const tasks = (await currentTasks()).docs; + expect(tasks.length).to.eql(3); + + const taskIds = tasks.map((task) => task.id); + expect(taskIds).to.contain(scheduledTask.id); + expect(taskIds).to.contain(UNREGISTERED_TASK_TYPE_ID); + expect(taskIds).to.contain(REMOVED_TASK_TYPE_ID); + + const scheduledTaskInstance = tasks.find((task) => task.id === scheduledTask.id); + const unregisteredTaskInstance = tasks.find( + (task) => task.id === UNREGISTERED_TASK_TYPE_ID + ); + const removedTaskInstance = tasks.find((task) => task.id === REMOVED_TASK_TYPE_ID); + + expect(scheduledTaskInstance?.status).to.eql('claiming'); + expect(unregisteredTaskInstance?.status).to.eql('idle'); + expect(removedTaskInstance?.status).to.eql('unrecognized'); + }); + }); + }); +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_scheduled_at.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_scheduled_at.ts new file mode 100644 index 0000000000000..a70225035d03c --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_management_scheduled_at.ts @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import { ConcreteTaskInstance } from '@kbn/task-manager-plugin/server/task'; +import { FtrProviderContext } from '../../../common/ftr_provider_context'; + +export default function createTaskManagementScheduledAtTests({ getService }: FtrProviderContext) { + const es = getService('es'); + const esArchiver = getService('esArchiver'); + const retry = getService('retry'); + + describe('task management scheduled at', () => { + before(async () => { + await esArchiver.load('x-pack/test/functional/es_archives/task_manager_tasks'); + }); + + after(async () => { + await esArchiver.unload('x-pack/test/functional/es_archives/task_manager_tasks'); + await esArchiver.emptyKibanaIndex(); + }); + + it('sets scheduledAt to runAt if retryAt is null', async () => { + await retry.try(async () => { + const response = await es.get<{ task: ConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + id: 'task:ge7e1250-3322-11eb-94c1-db6395e84f6g', + }, + { + meta: true, + } + ); + expect(response.statusCode).to.eql(200); + expect(response.body._source?.task.scheduledAt).to.eql('2020-11-30T16:00:00.000Z'); + }); + }); + + it('sets scheduledAt to retryAt if retryAt time has passed', async () => { + await retry.try(async () => { + const response = await es.get<{ task: ConcreteTaskInstance }>( + { + index: '.kibana_task_manager', + id: 'task:ie7e1250-3322-11eb-94c1-db6395e84f6i', + }, + { + meta: true, + } + ); + expect(response.statusCode).to.eql(200); + expect(response.body._source?.task.scheduledAt).to.eql('2020-11-30T17:00:00.000Z'); + }); + }); + }); +} diff --git a/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_priority.ts b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_priority.ts new file mode 100644 index 0000000000000..f8fc3f63987b9 --- /dev/null +++ b/x-pack/test/task_manager_claimer_mget/test_suites/task_manager/task_priority.ts @@ -0,0 +1,216 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import { ConcreteTaskInstance } from '@kbn/task-manager-plugin/server'; +import { taskMappings as TaskManagerMapping } from '@kbn/task-manager-plugin/server/saved_objects/mappings'; +import { asyncForEach } from '@kbn/std'; +import { FtrProviderContext } from '../../ftr_provider_context'; + +const { properties: taskManagerIndexMapping } = TaskManagerMapping; + +export interface RawDoc { + _id: string; + _source: any; + _type?: string; +} +export interface SearchResults { + hits: { + hits: RawDoc[]; + }; +} + +type DeprecatedConcreteTaskInstance = Omit<ConcreteTaskInstance, 'schedule'> & { + interval: string; +}; + +type SerializedConcreteTaskInstance<State = string, Params = string> = Omit< + ConcreteTaskInstance, + 'state' | 'params' | 'scheduledAt' | 'startedAt' | 'retryAt' | 'runAt' +> & { + state: State; + params: Params; + scheduledAt: string; + startedAt: string | null; + retryAt: string | null; + runAt: string; +}; + +export default function ({ getService }: FtrProviderContext) { + const es = getService('es'); + const retry = getService('retry'); + const supertest = getService('supertest'); + + const testHistoryIndex = '.kibana_task_manager_test_result'; + + function scheduleTask( + task: Partial<ConcreteTaskInstance | DeprecatedConcreteTaskInstance> + ): Promise<SerializedConcreteTaskInstance> { + return supertest + .post('/api/sample_tasks/schedule') + .set('kbn-xsrf', 'xxx') + .send({ task }) + .expect(200) + .then((response: { body: SerializedConcreteTaskInstance }) => response.body); + } + + function currentTasks<State = unknown, Params = unknown>(): Promise<{ + docs: Array<SerializedConcreteTaskInstance<State, Params>>; + }> { + return supertest + .get('/api/sample_tasks') + .expect(200) + .then((response) => response.body); + } + + async function historyDocs({ + taskId, + taskType, + }: { + taskId?: string; + taskType?: string; + }): Promise<RawDoc[]> { + const filter: any[] = [{ term: { type: 'task' } }]; + if (taskId) { + filter.push({ term: { taskId } }); + } + if (taskType) { + filter.push({ term: { taskType } }); + } + return es + .search({ + index: testHistoryIndex, + body: { + query: { + bool: { + filter, + }, + }, + }, + }) + .then((result) => (result as unknown as SearchResults).hits.hits); + } + + describe('task priority', () => { + beforeEach(async () => { + const exists = await es.indices.exists({ index: testHistoryIndex }); + if (exists) { + await es.deleteByQuery({ + index: testHistoryIndex, + refresh: true, + body: { query: { term: { type: 'task' } } }, + }); + } else { + await es.indices.create({ + index: testHistoryIndex, + body: { + mappings: { + properties: { + type: { + type: 'keyword', + }, + taskId: { + type: 'keyword', + }, + params: taskManagerIndexMapping.params, + state: taskManagerIndexMapping.state, + runAt: taskManagerIndexMapping.runAt, + } as Record<string, estypes.MappingProperty>, + }, + }, + }); + } + }); + + afterEach(async () => { + await supertest.delete('/api/sample_tasks').set('kbn-xsrf', 'xxx').expect(200); + }); + + it('should claim low priority tasks if there is capacity', async () => { + // schedule 5 normal tasks and 1 low priority task + // setting the schedule long so they should only run once + const tasksToSchedule = []; + for (let i = 0; i < 5; i++) { + tasksToSchedule.push( + scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: `1d` }, + params: {}, + }) + ); + } + tasksToSchedule.push( + scheduleTask({ + taskType: 'lowPriorityTask', + schedule: { interval: `1d` }, + params: {}, + }) + ); + const scheduledTasks = await Promise.all(tasksToSchedule); + + await retry.try(async () => { + const tasks = (await currentTasks()).docs; + expect(tasks.length).to.eql(6); + + const taskIds = tasks.map((task) => task.id); + const taskDocs: RawDoc[] = []; + await asyncForEach(scheduledTasks, async (scheduledTask) => { + expect(taskIds).to.contain(scheduledTask.id); + const doc: RawDoc[] = await historyDocs({ taskId: scheduledTask.id }); + expect(doc.length).to.eql(1); + taskDocs.push(...doc); + }); + + expect( + taskDocs.findIndex((taskDoc) => taskDoc._source.taskType === 'lowPriorityTask') + ).to.be.greaterThan(-1); + }); + }); + + it('should not claim low priority tasks when there is no capacity', async () => { + // schedule a bunch of normal priority tasks that run frequently + const tasksToSchedule = []; + for (let i = 0; i < 10; i++) { + tasksToSchedule.push( + scheduleTask({ + taskType: 'sampleTask', + schedule: { interval: `1s` }, + params: {}, + }) + ); + } + + // schedule a low priority task + tasksToSchedule.push( + scheduleTask({ + taskType: 'lowPriorityTask', + schedule: { interval: `1s` }, + params: {}, + }) + ); + const scheduledTasks = await Promise.all(tasksToSchedule); + + // make sure all tasks get created + await retry.try(async () => { + const tasks = (await currentTasks()).docs; + expect(tasks.length).to.eql(11); + + const taskIds = tasks.map((task) => task.id); + scheduledTasks.forEach((scheduledTask) => { + expect(taskIds).to.contain(scheduledTask.id); + }); + }); + + // wait for 30 seconds to let the multiple task claiming cycles run + await new Promise((r) => setTimeout(r, 30000)); + + const docs: RawDoc[] = await historyDocs({ taskType: 'lowPriorityTask' }); + expect(docs.length).to.eql(0); + }); + }); +} diff --git a/yarn.lock b/yarn.lock index 61bd27eecdc9a..37a051f177767 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5932,6 +5932,10 @@ version "0.0.0" uid "" +"@kbn/sample-task-plugin-mget@link:x-pack/test/task_manager_claimer_mget/plugins/sample_task_plugin_mget": + version "0.0.0" + uid "" + "@kbn/sample-task-plugin@link:x-pack/test/plugin_api_integration/plugins/sample_task_plugin": version "0.0.0" uid ""