diff --git a/x-pack/plugins/monitoring/common/constants.ts b/x-pack/plugins/monitoring/common/constants.ts index acbea037663e5..feed8764e545e 100644 --- a/x-pack/plugins/monitoring/common/constants.ts +++ b/x-pack/plugins/monitoring/common/constants.ts @@ -251,6 +251,7 @@ export const ALERT_MEMORY_USAGE = `${ALERT_PREFIX}alert_jvm_memory_usage`; export const ALERT_MISSING_MONITORING_DATA = `${ALERT_PREFIX}alert_missing_monitoring_data`; export const ALERT_THREAD_POOL_SEARCH_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_search_rejections`; export const ALERT_THREAD_POOL_WRITE_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_write_rejections`; +export const ALERT_CCR_READ_EXCEPTIONS = `${ALERT_PREFIX}ccr_read_exceptions`; /** * Legacy alerts details/label for server and public use @@ -451,6 +452,25 @@ export const ALERT_DETAILS = { 'Alert when the number of rejections in the write thread pool exceeds the threshold.', }), }, + [ALERT_CCR_READ_EXCEPTIONS]: { + paramDetails: { + duration: { + label: i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.paramDetails.duration.label', + { + defaultMessage: `In the last`, + } + ), + type: AlertParamType.Duration, + }, + }, + label: i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.label', { + defaultMessage: 'CCR read exceptions', + }), + description: i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.description', { + defaultMessage: 'Alert if any CCR read exceptions have been detected.', + }), + }, }; export const ALERT_PANEL_MENU = [ @@ -485,6 +505,7 @@ export const ALERT_PANEL_MENU = [ { alertName: ALERT_LICENSE_EXPIRATION }, { alertName: ALERT_THREAD_POOL_SEARCH_REJECTIONS }, { alertName: ALERT_THREAD_POOL_WRITE_REJECTIONS }, + { alertName: ALERT_CCR_READ_EXCEPTIONS }, ], }, ]; @@ -505,6 +526,7 @@ export const ALERTS = [ ALERT_MISSING_MONITORING_DATA, ALERT_THREAD_POOL_SEARCH_REJECTIONS, ALERT_THREAD_POOL_WRITE_REJECTIONS, + ALERT_CCR_READ_EXCEPTIONS, ]; /** diff --git a/x-pack/plugins/monitoring/common/types/alerts.ts b/x-pack/plugins/monitoring/common/types/alerts.ts index 0f10e0e48962b..93807f9df12b0 100644 --- a/x-pack/plugins/monitoring/common/types/alerts.ts +++ b/x-pack/plugins/monitoring/common/types/alerts.ts @@ -22,6 +22,7 @@ export interface CommonAlertState { export interface CommonAlertFilter { nodeUuid?: string; + shardId?: string; } export interface CommonAlertParamDetail { @@ -103,6 +104,7 @@ export interface AlertUiState { export interface AlertMessage { text: string; // Do this. #link this is a link #link + code?: string; nextSteps?: AlertMessage[]; tokens?: AlertMessageToken[]; } @@ -165,6 +167,20 @@ export interface AlertMemoryUsageNodeStats extends AlertNodeStats { export interface AlertMissingData extends AlertNodeStats { gapDuration: number; } +export interface CCRReadExceptionsStats { + remoteCluster: string; + followerIndex: string; + shardId: number; + leaderIndex: string; + lastReadException: { type: string; reason: string }; + clusterUuid: string; + ccs: string; +} + +export interface CCRReadExceptionsUIMeta extends CCRReadExceptionsStats { + instanceId: string; + itemLabel: string; +} export interface AlertData { nodeName?: string; diff --git a/x-pack/plugins/monitoring/public/alerts/callout.tsx b/x-pack/plugins/monitoring/public/alerts/callout.tsx index d3feb148cf986..af2d8c3fef60a 100644 --- a/x-pack/plugins/monitoring/public/alerts/callout.tsx +++ b/x-pack/plugins/monitoring/public/alerts/callout.tsx @@ -15,6 +15,7 @@ import { EuiFlexGroup, EuiFlexItem, EuiIcon, + EuiCodeBlock, } from '@elastic/eui'; import { replaceTokens } from './lib/replace_tokens'; import { AlertMessage } from '../../common/types/alerts'; @@ -66,12 +67,24 @@ export const AlertsCallout: React.FC = (props: Props) => { ); + const { code } = status.state.state.ui.message; const accordion = ( + {code?.length ? ( + + {code} + + ) : null} = (props: Props) => { paddingLeft: `0.5rem`, }} > - {(status.state.state.ui.message.nextSteps || []).map((step: AlertMessage) => { - return {}} label={replaceTokens(step)} />; - })} + {(status.state.state.ui.message.nextSteps || []).map( + (step: AlertMessage, stepIndex: number) => { + return ( + {}} + label={replaceTokens(step)} + key={index + stepIndex} + /> + ); + } + )} } + label={} /> diff --git a/x-pack/plugins/monitoring/public/alerts/ccr_read_exceptions_alert/index.tsx b/x-pack/plugins/monitoring/public/alerts/ccr_read_exceptions_alert/index.tsx new file mode 100644 index 0000000000000..2dafadf272608 --- /dev/null +++ b/x-pack/plugins/monitoring/public/alerts/ccr_read_exceptions_alert/index.tsx @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import React from 'react'; +import { i18n } from '@kbn/i18n'; +import { Expression, Props } from '../components/duration/expression'; +import { AlertTypeModel, ValidationResult } from '../../../../triggers_actions_ui/public'; +import { ALERT_CCR_READ_EXCEPTIONS, ALERT_DETAILS } from '../../../common/constants'; + +interface ValidateOptions { + duration: string; +} + +const validate = (inputValues: ValidateOptions): ValidationResult => { + const validationResult = { errors: {} }; + const errors: { [key: string]: string[] } = { + duration: [], + }; + if (!inputValues.duration) { + errors.duration.push( + i18n.translate('xpack.monitoring.alerts.validation.duration', { + defaultMessage: 'A valid duration is required.', + }) + ); + } + validationResult.errors = errors; + return validationResult; +}; + +export function createCCRReadExceptionsAlertType(): AlertTypeModel { + return { + id: ALERT_CCR_READ_EXCEPTIONS, + name: ALERT_DETAILS[ALERT_CCR_READ_EXCEPTIONS].label, + description: ALERT_DETAILS[ALERT_CCR_READ_EXCEPTIONS].description, + iconClass: 'bell', + documentationUrl(docLinks) { + return `${docLinks.ELASTIC_WEBSITE_URL}guide/en/kibana/${docLinks.DOC_LINK_VERSION}/kibana-alerts.html`; + }, + alertParamsExpression: (props: Props) => ( + + ), + validate, + defaultActionMessage: '{{context.internalFullMessage}}', + requiresAppContext: true, + }; +} diff --git a/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_category.tsx b/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_category.tsx index 82a1a1f841a22..bbea32e4d2d04 100644 --- a/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_category.tsx +++ b/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_category.tsx @@ -171,6 +171,7 @@ export function getAlertPanelsByCategory( for (const { alert, states } of category.alerts) { const items = []; for (const alertState of states.filter(({ state }) => stateFilter(state))) { + const { nodeName, itemLabel } = alertState.state; items.push({ name: ( @@ -188,7 +189,7 @@ export function getAlertPanelsByCategory( )} - {alertState.state.nodeName} + {nodeName || itemLabel} ), panel: ++tertiaryPanelIndex, diff --git a/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_node.tsx b/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_node.tsx index c48706f4edcb9..735b9c3637cdd 100644 --- a/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_node.tsx +++ b/x-pack/plugins/monitoring/public/alerts/lib/get_alert_panels_by_node.tsx @@ -69,10 +69,11 @@ export function getAlertPanelsByNode( const states = (statesByNodes[nodeUuid] as CommonAlertState[]).filter(({ state }) => stateFilter(state) ); + const { nodeName, itemLabel } = states[0].state; return { name: ( - {states[0].state.nodeName} ({states.length}) + {nodeName || itemLabel} ({states.length}) ), panel: index + 1, @@ -86,7 +87,8 @@ export function getAlertPanelsByNode( let title = ''; for (const { alert, states } of alertsForNode) { for (const alertState of states) { - title = alertState.state.nodeName; + const { nodeName, itemLabel } = alertState.state; + title = nodeName || itemLabel; panelItems.push({ name: ( diff --git a/x-pack/plugins/monitoring/public/alerts/lib/replace_tokens.tsx b/x-pack/plugins/monitoring/public/alerts/lib/replace_tokens.tsx index b8ac69cbae68a..0ddda96a1100d 100644 --- a/x-pack/plugins/monitoring/public/alerts/lib/replace_tokens.tsx +++ b/x-pack/plugins/monitoring/public/alerts/lib/replace_tokens.tsx @@ -77,6 +77,7 @@ export function replaceTokens(alertMessage: AlertMessage): JSX.Element | string } const url = linkToken.partialUrl + .replace('{basePath}', Legacy.shims.getBasePath()) .replace('{elasticWebsiteUrl}', Legacy.shims.docLinks.ELASTIC_WEBSITE_URL) .replace('{docLinkVersion}', Legacy.shims.docLinks.DOC_LINK_VERSION); const index = text.indexOf(linkPart[0]); diff --git a/x-pack/plugins/monitoring/public/alerts/panel.tsx b/x-pack/plugins/monitoring/public/alerts/panel.tsx index 139010a3d2446..2d319a81dd063 100644 --- a/x-pack/plugins/monitoring/public/alerts/panel.tsx +++ b/x-pack/plugins/monitoring/public/alerts/panel.tsx @@ -10,6 +10,7 @@ import { EuiHorizontalRule, EuiListGroup, EuiListGroupItem, + EuiCodeBlock, } from '@elastic/eui'; import { CommonAlert, CommonAlertState, AlertMessage } from '../../common/types/alerts'; @@ -47,12 +48,24 @@ export const AlertPanel: React.FC = (props: Props) => { ) : null; + const { code } = alertState.state.ui.message; return (
{replaceTokens(alertState.state.ui.message)}
+ {code?.length ? ( + + {code} + + ) : null} {nextStepsUi ? : null} {nextStepsUi}
diff --git a/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js b/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js index ded309ce64e2e..8849fb05fcf3c 100644 --- a/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js +++ b/x-pack/plugins/monitoring/public/components/cluster/overview/elasticsearch_panel.js @@ -47,6 +47,7 @@ import { ALERT_NODES_CHANGED, ALERT_ELASTICSEARCH_VERSION_MISMATCH, ALERT_MISSING_MONITORING_DATA, + ALERT_CCR_READ_EXCEPTIONS, } from '../../../../common/constants'; import { AlertsBadge } from '../../../alerts/badge'; import { shouldShowAlertBadge } from '../../../alerts/lib/should_show_alert_badge'; @@ -159,7 +160,11 @@ function renderLog(log) { ); } -const OVERVIEW_PANEL_ALERTS = [ALERT_CLUSTER_HEALTH, ALERT_LICENSE_EXPIRATION]; +const OVERVIEW_PANEL_ALERTS = [ + ALERT_CLUSTER_HEALTH, + ALERT_LICENSE_EXPIRATION, + ALERT_CCR_READ_EXCEPTIONS, +]; const NODES_PANEL_ALERTS = [ ALERT_CPU_USAGE, diff --git a/x-pack/plugins/monitoring/public/components/elasticsearch/ccr/__snapshots__/ccr.test.js.snap b/x-pack/plugins/monitoring/public/components/elasticsearch/ccr/__snapshots__/ccr.test.js.snap index 1a28ef7fceca8..794982a0b6193 100644 --- a/x-pack/plugins/monitoring/public/components/elasticsearch/ccr/__snapshots__/ccr.test.js.snap +++ b/x-pack/plugins/monitoring/public/components/elasticsearch/ccr/__snapshots__/ccr.test.js.snap @@ -3,6 +3,15 @@ exports[`Ccr that it renders normally 1`] = ` + +

+ +

+
{ + const [itemIdToExpandedRowMap, setItemIdToExpandedRowMap] = useState({}); + const toggleShards = (index, shards) => { + const itemIdToExpandedRowMapValues = { ...itemIdToExpandedRowMap }; - if (itemIdToExpandedRowMap[index]) { - delete itemIdToExpandedRowMap[index]; + if (itemIdToExpandedRowMapValues[index]) { + delete itemIdToExpandedRowMapValues[index]; } else { let pagination = { initialPageSize: 5, @@ -50,7 +44,7 @@ export class Ccr extends Component { pagination = false; } - itemIdToExpandedRowMap[index] = ( + itemIdToExpandedRowMapValues[index] = ( null, }, + { + field: 'alerts', + sortable: true, + name: i18n.translate( + 'xpack.monitoring.elasticsearch.ccr.shardsTable.alertsColumnTitle', + { + defaultMessage: 'Alerts', + } + ), + render: (_field, item) => { + return ( + state.meta.shardId === item.shardId} + /> + ); + }, + }, { field: 'syncLagOps', name: i18n.translate( @@ -156,11 +169,11 @@ export class Ccr extends Component { /> ); } - this.setState({ itemIdToExpandedRowMap }); - } + setItemIdToExpandedRowMap(itemIdToExpandedRowMapValues); + }; - renderTable() { - const { data } = this.props; + const renderTable = () => { + const { data, alerts } = props; const items = data; let pagination = { @@ -193,9 +206,9 @@ export class Ccr extends Component { ), sortable: true, render: (index, { shards }) => { - const expanded = !!this.state.itemIdToExpandedRowMap[index]; + const expanded = !!itemIdToExpandedRowMap[index]; return ( - this.toggleShards(index, shards)}> + toggleShards(index, shards)}> {index}   {expanded ? : } @@ -213,6 +226,25 @@ export class Ccr extends Component { } ), }, + { + field: 'alerts', + sortable: true, + name: i18n.translate( + 'xpack.monitoring.elasticsearch.ccr.ccrListingTable.alertsColumnTitle', + { + defaultMessage: 'Alerts', + } + ), + render: (_field, item) => { + return ( + state.meta.followerIndex === item.index} + /> + ); + }, + }, { field: 'syncLagOps', sortable: true, @@ -263,20 +295,26 @@ export class Ccr extends Component { }} sorting={sorting} itemId="id" - itemIdToExpandedRowMap={this.state.itemIdToExpandedRowMap} + itemIdToExpandedRowMap={itemIdToExpandedRowMap} /> ); - } + }; - render() { - return ( - - - - {this.renderTable()} - - - - ); - } -} + return ( + + + +

+ +

+
+ + {renderTable()} + +
+
+ ); +}; diff --git a/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/__snapshots__/ccr_shard.test.js.snap b/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/__snapshots__/ccr_shard.test.js.snap index e35d2ba6108f5..81398c1d8e836 100644 --- a/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/__snapshots__/ccr_shard.test.js.snap +++ b/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/__snapshots__/ccr_shard.test.js.snap @@ -2,50 +2,46 @@ exports[`CcrShard that is renders an exception properly 1`] = ` - -

- - - -

-
- -
`; @@ -59,44 +55,50 @@ exports[`CcrShard that it renders normally 1`] = ` } > - + + + + - - + + + + + + {this.renderErrors()} {this.renderCharts()} diff --git a/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/status.js b/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/status.js index 52de0659ed527..657301d6e1cb3 100644 --- a/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/status.js +++ b/x-pack/plugins/monitoring/public/components/elasticsearch/ccr_shard/status.js @@ -8,8 +8,9 @@ import React from 'react'; import { SummaryStatus } from '../../summary_status'; import { formatMetric } from '../../../lib/format_number'; import { i18n } from '@kbn/i18n'; +import { AlertsStatus } from '../../../alerts/status'; -export function Status({ stat, formattedLeader, oldestStat }) { +export function Status({ stat, formattedLeader, oldestStat, alerts = {} }) { const { follower_index: followerIndex, shard_id: shardId, @@ -23,6 +24,12 @@ export function Status({ stat, formattedLeader, oldestStat }) { } = oldestStat; const metrics = [ + { + label: i18n.translate('xpack.monitoring.elasticsearch.ccrShard.status.alerts', { + defaultMessage: 'Alerts', + }), + value: , + }, { label: i18n.translate('xpack.monitoring.elasticsearch.ccrShard.status.followerIndexLabel', { defaultMessage: 'Follower Index', diff --git a/x-pack/plugins/monitoring/public/plugin.ts b/x-pack/plugins/monitoring/public/plugin.ts index 0439b47569e72..a0de3a7663a12 100644 --- a/x-pack/plugins/monitoring/public/plugin.ts +++ b/x-pack/plugins/monitoring/public/plugin.ts @@ -156,6 +156,7 @@ export class MonitoringPlugin './alerts/thread_pool_rejections_alert' ); const { createMemoryUsageAlertType } = await import('./alerts/memory_usage_alert'); + const { createCCRReadExceptionsAlertType } = await import('./alerts/ccr_read_exceptions_alert'); const { triggersActionsUi: { alertTypeRegistry }, @@ -176,6 +177,7 @@ export class MonitoringPlugin ALERT_DETAILS[ALERT_THREAD_POOL_WRITE_REJECTIONS] ) ); + alertTypeRegistry.register(createCCRReadExceptionsAlertType()); const legacyAlertTypes = createLegacyAlertTypes(); for (const legacyAlertType of legacyAlertTypes) { alertTypeRegistry.register(legacyAlertType); diff --git a/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/index.js b/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/index.js index 6569340785736..9e26d453d76a3 100644 --- a/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/index.js +++ b/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/index.js @@ -12,7 +12,13 @@ import { routeInitProvider } from '../../../lib/route_init'; import template from './index.html'; import { Ccr } from '../../../components/elasticsearch/ccr'; import { MonitoringViewBaseController } from '../../base_controller'; -import { CODE_PATH_ELASTICSEARCH } from '../../../../common/constants'; +import { + CODE_PATH_ELASTICSEARCH, + ALERT_CCR_READ_EXCEPTIONS, + ELASTICSEARCH_SYSTEM_ID, +} from '../../../../common/constants'; +import { SetupModeRenderer } from '../../../components/renderers'; +import { SetupModeContext } from '../../../components/setup_mode/setup_mode_context'; uiRoutes.when('/elasticsearch/ccr', { template, @@ -37,6 +43,12 @@ uiRoutes.when('/elasticsearch/ccr', { getPageData, $scope, $injector, + alerts: { + shouldFetch: true, + options: { + alertTypeIds: [ALERT_CCR_READ_EXCEPTIONS], + }, + }, }); $scope.$watch( @@ -45,7 +57,20 @@ uiRoutes.when('/elasticsearch/ccr', { if (!data) { return; } - this.renderReact(); + this.renderReact( + ( + + {flyoutComponent} + + {bottomBarComponent} + + )} + /> + ); } ); } diff --git a/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/shard/index.js b/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/shard/index.js index 33a2d27f39856..6c1c4218568e3 100644 --- a/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/shard/index.js +++ b/x-pack/plugins/monitoring/public/views/elasticsearch/ccr/shard/index.js @@ -13,7 +13,13 @@ import { routeInitProvider } from '../../../../lib/route_init'; import template from './index.html'; import { MonitoringViewBaseController } from '../../../base_controller'; import { CcrShard } from '../../../../components/elasticsearch/ccr_shard'; -import { CODE_PATH_ELASTICSEARCH } from '../../../../../common/constants'; +import { + CODE_PATH_ELASTICSEARCH, + ALERT_CCR_READ_EXCEPTIONS, + ELASTICSEARCH_SYSTEM_ID, +} from '../../../../../common/constants'; +import { SetupModeRenderer } from '../../../../components/renderers'; +import { SetupModeContext } from '../../../../components/setup_mode/setup_mode_context'; uiRoutes.when('/elasticsearch/ccr/:index/shard/:shardId', { template, @@ -27,6 +33,7 @@ uiRoutes.when('/elasticsearch/ccr/:index/shard/:shardId', { controllerAs: 'elasticsearchCcr', controller: class ElasticsearchCcrController extends MonitoringViewBaseController { constructor($injector, $scope, pageData) { + const $route = $injector.get('$route'); super({ title: i18n.translate('xpack.monitoring.elasticsearch.ccr.shard.routeTitle', { defaultMessage: 'Elasticsearch - Ccr - Shard', @@ -35,6 +42,17 @@ uiRoutes.when('/elasticsearch/ccr/:index/shard/:shardId', { getPageData, $scope, $injector, + alerts: { + shouldFetch: true, + options: { + alertTypeIds: [ALERT_CCR_READ_EXCEPTIONS], + filters: [ + { + shardId: $route.current.pathParams.shardId, + }, + ], + }, + }, }); $scope.instance = i18n.translate('xpack.monitoring.elasticsearch.ccr.shard.instanceTitle', { @@ -62,7 +80,20 @@ uiRoutes.when('/elasticsearch/ccr/:index/shard/:shardId', { }) ); - this.renderReact(); + this.renderReact( + ( + + {flyoutComponent} + + {bottomBarComponent} + + )} + /> + ); } ); } diff --git a/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts b/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts index b43a56562a2aa..64b7148d87d9e 100644 --- a/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts +++ b/x-pack/plugins/monitoring/server/alerts/alerts_factory.ts @@ -5,6 +5,7 @@ */ import { + CCRReadExceptionsAlert, CpuUsageAlert, MissingMonitoringDataAlert, DiskUsageAlert, @@ -32,6 +33,7 @@ import { ALERT_LOGSTASH_VERSION_MISMATCH, ALERT_KIBANA_VERSION_MISMATCH, ALERT_ELASTICSEARCH_VERSION_MISMATCH, + ALERT_CCR_READ_EXCEPTIONS, } from '../../common/constants'; import { AlertsClient } from '../../../alerts/server'; import { Alert } from '../../../alerts/common'; @@ -49,6 +51,7 @@ const BY_TYPE = { [ALERT_LOGSTASH_VERSION_MISMATCH]: LogstashVersionMismatchAlert, [ALERT_KIBANA_VERSION_MISMATCH]: KibanaVersionMismatchAlert, [ALERT_ELASTICSEARCH_VERSION_MISMATCH]: ElasticsearchVersionMismatchAlert, + [ALERT_CCR_READ_EXCEPTIONS]: CCRReadExceptionsAlert, }; export class AlertsFactory { @@ -68,7 +71,6 @@ export class AlertsFactory { if (!alertClientAlerts.total || !alertClientAlerts.data?.length) { return; - // return new alertCls() as BaseAlert; } const [rawAlert] = alertClientAlerts.data as [Alert]; diff --git a/x-pack/plugins/monitoring/server/alerts/base_alert.ts b/x-pack/plugins/monitoring/server/alerts/base_alert.ts index ebff72a255777..a3bcc310b8084 100644 --- a/x-pack/plugins/monitoring/server/alerts/base_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/base_alert.ts @@ -345,7 +345,7 @@ export class BaseAlert { const firingNodeUuids = nodes .filter((node) => node.shouldFire) - .map((node) => node.meta.nodeId) + .map((node) => node.meta.nodeId || node.meta.instanceId) .join(','); const instanceId = `${this.alertOptions.id}:${cluster.clusterUuid}:${firingNodeUuids}`; const instance = services.alertInstanceFactory(instanceId); @@ -355,13 +355,16 @@ export class BaseAlert { if (!node.shouldFire) { continue; } - const stat = node.meta as AlertNodeState; + const { meta } = node; const nodeState = this.getDefaultAlertState(cluster, node) as AlertNodeState; if (key) { - nodeState[key] = stat[key]; + nodeState[key] = meta[key]; } - nodeState.nodeId = stat.nodeId || node.nodeId!; - nodeState.nodeName = stat.nodeName || node.nodeName || nodeState.nodeId; + nodeState.nodeId = meta.nodeId || node.nodeId! || meta.instanceId; + // TODO: make these functions more generic, so it's node/item agnostic + nodeState.nodeName = meta.itemLabel || meta.nodeName || node.nodeName || nodeState.nodeId; + nodeState.itemLabel = meta.itemLabel; + nodeState.meta = meta; nodeState.ui.triggeredMS = currentUTC; nodeState.ui.isFiring = true; nodeState.ui.severity = node.severity; diff --git a/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.ts b/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.ts new file mode 100644 index 0000000000000..6034f32a8c659 --- /dev/null +++ b/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.ts @@ -0,0 +1,289 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { i18n } from '@kbn/i18n'; +import { BaseAlert } from './base_alert'; +import { + AlertData, + AlertCluster, + AlertState, + AlertMessage, + CCRReadExceptionsUIMeta, + AlertMessageTimeToken, + AlertMessageLinkToken, + AlertInstanceState, + CommonAlertParams, + CommonAlertFilter, + CCRReadExceptionsStats, +} from '../../common/types/alerts'; +import { AlertInstance } from '../../../alerts/server'; +import { + INDEX_PATTERN_ELASTICSEARCH, + ALERT_CCR_READ_EXCEPTIONS, + ALERT_DETAILS, +} from '../../common/constants'; +import { fetchCCRReadExceptions } from '../lib/alerts/fetch_ccr_read_exceptions'; +import { getCcsIndexPattern } from '../lib/alerts/get_ccs_index_pattern'; +import { AlertMessageTokenType, AlertSeverity } from '../../common/enums'; +import { parseDuration } from '../../../alerts/common/parse_duration'; +import { SanitizedAlert, RawAlertInstance } from '../../../alerts/common'; +import { AlertingDefaults, createLink } from './alert_helpers'; +import { appendMetricbeatIndex } from '../lib/alerts/append_mb_index'; +import { Globals } from '../static_globals'; + +export class CCRReadExceptionsAlert extends BaseAlert { + constructor(public rawAlert?: SanitizedAlert) { + super(rawAlert, { + id: ALERT_CCR_READ_EXCEPTIONS, + name: ALERT_DETAILS[ALERT_CCR_READ_EXCEPTIONS].label, + throttle: '6h', + defaultParams: { + duration: '1h', + }, + actionVariables: [ + { + name: 'remoteClusters', + description: i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.actionVariables.remoteClusters', + { + defaultMessage: 'List of remote clusters that are experiencing CCR read exceptions.', + } + ), + }, + { + name: 'followerIndices', + description: i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.actionVariables.followerIndices', + { + defaultMessage: 'List of follower indices reporting CCR read exceptions.', + } + ), + }, + ...Object.values(AlertingDefaults.ALERT_TYPE.context), + ], + }); + } + + protected async fetchData( + params: CommonAlertParams, + callCluster: any, + clusters: AlertCluster[], + availableCcs: string[] + ): Promise { + let esIndexPattern = appendMetricbeatIndex(Globals.app.config, INDEX_PATTERN_ELASTICSEARCH); + if (availableCcs) { + esIndexPattern = getCcsIndexPattern(esIndexPattern, availableCcs); + } + const { duration: durationString } = params; + const duration = parseDuration(durationString); + const endMs = +new Date(); + const startMs = endMs - duration; + const stats = await fetchCCRReadExceptions( + callCluster, + esIndexPattern, + startMs, + endMs, + Globals.app.config.ui.max_bucket_size + ); + + return stats.map((stat) => { + const { + remoteCluster, + followerIndex, + shardId, + leaderIndex, + lastReadException, + clusterUuid, + ccs, + } = stat; + return { + shouldFire: true, + severity: AlertSeverity.Danger, + meta: { + remoteCluster, + followerIndex, + shardId, + leaderIndex, + lastReadException, + instanceId: `${remoteCluster}:${followerIndex}`, + itemLabel: followerIndex, + }, + clusterUuid, + ccs, + }; + }); + } + + protected getUiMessage(alertState: AlertState, item: AlertData): AlertMessage { + const { + remoteCluster, + followerIndex, + shardId, + lastReadException, + } = item.meta as CCRReadExceptionsUIMeta; + return { + text: i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.ui.firingMessage', { + defaultMessage: `Follower index #start_link{followerIndex}#end_link is reporting CCR read exceptions on remote cluster: {remoteCluster} at #absolute`, + values: { + remoteCluster, + followerIndex, + }, + }), + code: JSON.stringify(lastReadException, null, 2), + nextSteps: [ + createLink( + i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.ui.nextSteps.identifyCCRStats', + { + defaultMessage: '#start_linkIdentify CCR usage/stats#end_link', + } + ), + 'elasticsearch/ccr', + AlertMessageTokenType.Link + ), + createLink( + i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.ui.nextSteps.stackManagmentFollow', + { + defaultMessage: '#start_linkManage CCR follower indices#end_link', + } + ), + `{basePath}management/data/cross_cluster_replication/follower_indices` + ), + createLink( + i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.ui.nextSteps.stackManagmentAutoFollow', + { + defaultMessage: '#start_linkCreate auto-follow patterns#end_link', + } + ), + `{basePath}management/data/cross_cluster_replication/auto_follow_patterns` + ), + createLink( + i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.ui.nextSteps.followerAPIDoc', { + defaultMessage: '#start_linkAdd follower index API (Docs)#end_link', + }), + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/ccr-put-follow.html` + ), + createLink( + i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.ui.nextSteps.ccrDocs', { + defaultMessage: '#start_linkCross-cluster replication (Docs)#end_link', + }), + `{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/xpack-ccr.html` + ), + createLink( + i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.ui.nextSteps.biDirectionalReplication', + { + defaultMessage: '#start_linkBi-directional replication (Blog)#end_link', + } + ), + `{elasticWebsiteUrl}blog/bi-directional-replication-with-elasticsearch-cross-cluster-replication-ccr` + ), + createLink( + i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.ui.nextSteps.followTheLeader', { + defaultMessage: '#start_linkFollow the Leader (Blog)#end_link', + }), + `{elasticWebsiteUrl}blog/follow-the-leader-an-introduction-to-cross-cluster-replication-in-elasticsearch` + ), + ], + tokens: [ + { + startToken: '#absolute', + type: AlertMessageTokenType.Time, + isAbsolute: true, + isRelative: false, + timestamp: alertState.ui.triggeredMS, + } as AlertMessageTimeToken, + { + startToken: '#start_link', + endToken: '#end_link', + type: AlertMessageTokenType.Link, + url: `elasticsearch/ccr/${followerIndex}/shard/${shardId}`, + } as AlertMessageLinkToken, + ], + }; + } + + protected filterAlertInstance(alertInstance: RawAlertInstance, filters: CommonAlertFilter[]) { + const alertInstanceStates = alertInstance.state?.alertStates as AlertState[]; + const alertFilter = filters?.find((filter) => filter.shardId); + if (!filters || !filters.length || !alertInstanceStates?.length || !alertFilter?.shardId) { + return alertInstance; + } + const shardIdInt = parseInt(alertFilter.shardId!, 10); + const alertStates = alertInstanceStates.filter( + ({ meta }) => (meta as CCRReadExceptionsStats).shardId === shardIdInt + ); + return { state: { alertStates } }; + } + + protected executeActions( + instance: AlertInstance, + { alertStates }: AlertInstanceState, + item: AlertData | null, + cluster: AlertCluster + ) { + const remoteClustersList = alertStates + .map((alertState) => (alertState.meta as CCRReadExceptionsUIMeta).remoteCluster) + .join(', '); + const followerIndicesList = alertStates + .map((alertState) => (alertState.meta as CCRReadExceptionsUIMeta).followerIndex) + .join(', '); + + const shortActionText = i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.shortAction', + { + defaultMessage: + 'Verify follower and leader index relationships across the affected remote clusters.', + } + ); + const fullActionText = i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.fullAction', { + defaultMessage: 'View CCR stats', + }); + + const ccs = alertStates.find((state) => state.ccs)?.ccs; + const globalStateLink = this.createGlobalStateLink( + 'elasticsearch/ccr', + cluster.clusterUuid, + ccs + ); + + const action = `[${fullActionText}](${globalStateLink})`; + const internalShortMessage = i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.firing.internalShortMessage', + { + defaultMessage: `CCR read exceptions alert is firing for the following remote clusters: {remoteClustersList}. {shortActionText}`, + values: { + remoteClustersList, + shortActionText, + }, + } + ); + const internalFullMessage = i18n.translate( + 'xpack.monitoring.alerts.ccrReadExceptions.firing.internalFullMessage', + { + defaultMessage: `CCR read exceptions alert is firing for the following remote clusters: {remoteClustersList}. Current 'follower_index' indices are affected: {followerIndicesList}. {action}`, + values: { + action, + remoteClustersList, + followerIndicesList, + }, + } + ); + + instance.scheduleActions('default', { + internalShortMessage, + internalFullMessage, + state: AlertingDefaults.ALERT_STATE.firing, + remoteClusters: remoteClustersList, + followerIndices: followerIndicesList, + clusterName: cluster.clusterName, + action, + actionPlain: shortActionText, + }); + } +} diff --git a/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts b/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts index 63195621fb9c8..4622f73b9feb0 100644 --- a/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts +++ b/x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts @@ -125,6 +125,13 @@ describe('CpuUsageAlert', () => { ccs: undefined, cluster: { clusterUuid, clusterName }, cpuUsage, + itemLabel: undefined, + meta: { + clusterUuid, + cpuUsage, + nodeId, + nodeName, + }, nodeId, nodeName, ui: { diff --git a/x-pack/plugins/monitoring/server/alerts/index.ts b/x-pack/plugins/monitoring/server/alerts/index.ts index 5fa718dfb34cd..b58476a01dc14 100644 --- a/x-pack/plugins/monitoring/server/alerts/index.ts +++ b/x-pack/plugins/monitoring/server/alerts/index.ts @@ -4,6 +4,7 @@ * you may not use this file except in compliance with the Elastic License. */ +export { CCRReadExceptionsAlert } from './ccr_read_exceptions_alert'; export { BaseAlert } from './base_alert'; export { CpuUsageAlert } from './cpu_usage_alert'; export { MissingMonitoringDataAlert } from './missing_monitoring_data_alert'; diff --git a/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts b/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts index 6ba4333309f00..65205738f82c3 100644 --- a/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts +++ b/x-pack/plugins/monitoring/server/alerts/missing_monitoring_data_alert.test.ts @@ -131,6 +131,14 @@ describe('MissingMonitoringDataAlert', () => { nodeId, nodeName, gapDuration, + itemLabel: undefined, + meta: { + clusterUuid, + gapDuration, + limit: 86400000, + nodeId, + nodeName, + }, ui: { isFiring: true, message: { diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_ccr_read_exceptions.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_ccr_read_exceptions.ts new file mode 100644 index 0000000000000..c8933a7cd14a9 --- /dev/null +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_ccr_read_exceptions.ts @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { get } from 'lodash'; +import { CCRReadExceptionsStats } from '../../../common/types/alerts'; + +export async function fetchCCRReadExceptions( + callCluster: any, + index: string, + startMs: number, + endMs: number, + size: number +): Promise { + const params = { + index, + filterPath: ['aggregations.remote_clusters.buckets'], + body: { + size: 0, + query: { + bool: { + filter: [ + { + nested: { + path: 'ccr_stats.read_exceptions', + query: { + exists: { + field: 'ccr_stats.read_exceptions.exception', + }, + }, + }, + }, + { + term: { + type: 'ccr_stats', + }, + }, + { + range: { + timestamp: { + format: 'epoch_millis', + gte: startMs, + lte: endMs, + }, + }, + }, + ], + }, + }, + aggs: { + remote_clusters: { + terms: { + field: 'ccr_stats.remote_cluster', + size, + }, + aggs: { + follower_indices: { + terms: { + field: 'ccr_stats.follower_index', + size, + }, + aggs: { + hits: { + top_hits: { + sort: [ + { + timestamp: { + order: 'desc', + unmapped_type: 'long', + }, + }, + ], + _source: { + includes: [ + 'cluster_uuid', + 'ccr_stats.read_exceptions', + 'ccr_stats.shard_id', + 'ccr_stats.leader_index', + ], + }, + size: 1, + }, + }, + }, + }, + }, + }, + }, + }, + }; + + const response = await callCluster('search', params); + const stats: CCRReadExceptionsStats[] = []; + const { buckets: remoteClusterBuckets = [] } = response.aggregations.remote_clusters; + + if (!remoteClusterBuckets.length) { + return stats; + } + + for (const remoteClusterBucket of remoteClusterBuckets) { + const followerIndicesBuckets = remoteClusterBucket.follower_indices.buckets; + const remoteCluster = remoteClusterBucket.key; + + for (const followerIndexBucket of followerIndicesBuckets) { + const followerIndex = followerIndexBucket.key; + const { + _index: monitoringIndexName, + _source: { ccr_stats: ccrStats, cluster_uuid: clusterUuid }, + } = get(followerIndexBucket, 'hits.hits.hits[0]'); + const { + read_exceptions: readExceptions, + leader_index: leaderIndex, + shard_id: shardId, + } = ccrStats; + const { exception: lastReadException } = readExceptions[readExceptions.length - 1]; + + stats.push({ + clusterUuid, + remoteCluster, + followerIndex, + shardId, + leaderIndex, + lastReadException, + ccs: monitoringIndexName.includes(':') ? monitoringIndexName.split(':')[0] : null, + }); + } + } + return stats; +}