From dee61dc51c4d1f007130cc14e475a8572c0fdb32 Mon Sep 17 00:00:00 2001 From: Gidi Meir Morris Date: Mon, 13 Jan 2020 17:16:25 +0000 Subject: [PATCH] [alerting] gracefully handle error in initialization of Alert TaskRunner (#54335) Prevents an edge cases where Alerts can end up in a zombie state. 1. Decrypting attributes throws an error 2. Fetching an Api Key throws an error 3. Getting Services with user permissions throws an error --- .../alerting/server/lib/result_type.ts | 8 ++ .../server/task_runner/task_runner.test.ts | 93 +++++++++++++++++++ .../server/task_runner/task_runner.ts | 75 ++++++++++++--- 3 files changed, 162 insertions(+), 14 deletions(-) diff --git a/x-pack/legacy/plugins/alerting/server/lib/result_type.ts b/x-pack/legacy/plugins/alerting/server/lib/result_type.ts index 644ae5129224..52843f636230 100644 --- a/x-pack/legacy/plugins/alerting/server/lib/result_type.ts +++ b/x-pack/legacy/plugins/alerting/server/lib/result_type.ts @@ -15,6 +15,10 @@ export interface Err { } export type Result = Ok | Err; +export type Resultable = { + [P in keyof T]: Result; +}; + export function asOk(value: T): Ok { return { tag: 'ok', @@ -52,3 +56,7 @@ export function map( ): Resolution { return isOk(result) ? onOk(result.value) : onErr(result.error); } + +export function resolveErr(result: Result, onErr: (error: E) => T): T { + return isOk(result) ? result.value : onErr(result.error); +} diff --git a/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.test.ts b/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.test.ts index 87fa33a9cea5..3ff9e6360427 100644 --- a/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.test.ts +++ b/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.test.ts @@ -405,4 +405,97 @@ describe('Task Runner', () => { } `); }); + + test('recovers gracefully when the Alert Task Runner throws an exception when fetching the encrypted attributes', async () => { + encryptedSavedObjectsPlugin.getDecryptedAsInternalUser.mockImplementation(() => { + throw new Error('OMG'); + }); + + const taskRunner = new TaskRunner( + alertType, + mockedTaskInstance, + taskRunnerFactoryInitializerParams + ); + + savedObjectsClient.get.mockResolvedValueOnce(mockedAlertTypeSavedObject); + + const runnerResult = await taskRunner.run(); + + expect(runnerResult).toMatchInlineSnapshot(` + Object { + "runAt": 1970-01-01T00:05:00.000Z, + "state": Object { + "previousStartedAt": 1970-01-01T00:00:00.000Z, + "startedAt": 1969-12-31T23:55:00.000Z, + }, + } + `); + }); + + test('recovers gracefully when the Alert Task Runner throws an exception when getting internal Services', async () => { + taskRunnerFactoryInitializerParams.getServices.mockImplementation(() => { + throw new Error('OMG'); + }); + + const taskRunner = new TaskRunner( + alertType, + mockedTaskInstance, + taskRunnerFactoryInitializerParams + ); + + savedObjectsClient.get.mockResolvedValueOnce(mockedAlertTypeSavedObject); + encryptedSavedObjectsPlugin.getDecryptedAsInternalUser.mockResolvedValueOnce({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + + const runnerResult = await taskRunner.run(); + + expect(runnerResult).toMatchInlineSnapshot(` + Object { + "runAt": 1970-01-01T00:05:00.000Z, + "state": Object { + "previousStartedAt": 1970-01-01T00:00:00.000Z, + "startedAt": 1969-12-31T23:55:00.000Z, + }, + } + `); + }); + + test('recovers gracefully when the Alert Task Runner throws an exception when fetching attributes', async () => { + savedObjectsClient.get.mockImplementation(() => { + throw new Error('OMG'); + }); + + const taskRunner = new TaskRunner( + alertType, + mockedTaskInstance, + taskRunnerFactoryInitializerParams + ); + + encryptedSavedObjectsPlugin.getDecryptedAsInternalUser.mockResolvedValueOnce({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + + const runnerResult = await taskRunner.run(); + + expect(runnerResult).toMatchInlineSnapshot(` + Object { + "runAt": 1970-01-01T00:05:00.000Z, + "state": Object { + "previousStartedAt": 1970-01-01T00:00:00.000Z, + "startedAt": 1969-12-31T23:55:00.000Z, + }, + } + `); + }); }); diff --git a/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.ts b/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.ts index 42c332e82e03..f266ac3058d1 100644 --- a/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.ts +++ b/x-pack/legacy/plugins/alerting/server/task_runner/task_runner.ts @@ -14,10 +14,17 @@ import { AlertInstance, createAlertInstanceFactory } from '../alert_instance'; import { getNextRunAt } from './get_next_run_at'; import { validateAlertTypeParams } from '../lib'; import { AlertType, RawAlert, IntervalSchedule, Services, State, AlertInfoParams } from '../types'; -import { promiseResult, map } from '../lib/result_type'; +import { promiseResult, map, Resultable, asOk, asErr, resolveErr } from '../lib/result_type'; type AlertInstances = Record; +const FALLBACK_RETRY_INTERVAL: IntervalSchedule = { interval: '5m' }; + +interface AlertTaskRunResult { + state: State; + runAt: Date; +} + export class TaskRunner { private context: TaskRunnerContext; private logger: Logger; @@ -190,7 +197,7 @@ export class TaskRunner { }; } - async validateAndRunAlert( + async validateAndExecuteAlert( services: Services, apiKey: string | null, attributes: RawAlert, @@ -217,11 +224,9 @@ export class TaskRunner { ); } - async run() { + async loadAlertAttributesAndRun(): Promise> { const { params: { alertId, spaceId }, - startedAt: previousStartedAt, - state: originalState, } = this.taskInstance; const apiKey = await this.getApiKeyForAlertPermissions(alertId, spaceId); @@ -233,11 +238,34 @@ export class TaskRunner { alertId ); + return { + state: await promiseResult( + this.validateAndExecuteAlert(services, apiKey, attributes, references) + ), + runAt: asOk( + getNextRunAt( + new Date(this.taskInstance.startedAt!), + // we do not currently have a good way of returning the type + // from SavedObjectsClient, and as we currenrtly require a schedule + // and we only support `interval`, we can cast this safely + attributes.schedule as IntervalSchedule + ) + ), + }; + } + + async run(): Promise { + const { + params: { alertId }, + startedAt: previousStartedAt, + state: originalState, + } = this.taskInstance; + + const { state, runAt } = await errorAsAlertTaskRunResult(this.loadAlertAttributesAndRun()); + return { state: map( - await promiseResult( - this.validateAndRunAlert(services, apiKey, attributes, references) - ), + state, (stateUpdates: State) => { return { ...stateUpdates, @@ -252,13 +280,32 @@ export class TaskRunner { }; } ), - runAt: getNextRunAt( - new Date(this.taskInstance.startedAt!), - // we do not currently have a good way of returning the type - // from SavedObjectsClient, and as we currenrtly require a schedule - // and we only support `interval`, we can cast this safely - attributes.schedule as IntervalSchedule + runAt: resolveErr(runAt, () => + getNextRunAt( + new Date(), + // if we fail at this point we wish to recover but don't have access to the Alert's + // attributes, so we'll use a default interval to prevent the underlying task from + // falling into a failed state + FALLBACK_RETRY_INTERVAL + ) ), }; } } + +/** + * If an error is thrown, wrap it in an AlertTaskRunResult + * so that we can treat each field independantly + */ +async function errorAsAlertTaskRunResult( + future: Promise> +): Promise> { + try { + return await future; + } catch (e) { + return { + state: asErr(e), + runAt: asErr(e), + }; + } +}