Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Monitoring][Alerting] CCR read exceptions alert #85908

Merged
merged 13 commits into from
Dec 18, 2020
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions x-pack/plugins/monitoring/common/constants.ts
Original file line number Diff line number Diff line change
@@ -251,6 +251,7 @@ export const ALERT_MEMORY_USAGE = `${ALERT_PREFIX}alert_jvm_memory_usage`;
export const ALERT_MISSING_MONITORING_DATA = `${ALERT_PREFIX}alert_missing_monitoring_data`;
export const ALERT_THREAD_POOL_SEARCH_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_search_rejections`;
export const ALERT_THREAD_POOL_WRITE_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_write_rejections`;
export const ALERT_CCR_READ_EXCEPTIONS = `${ALERT_PREFIX}ccr_read_exceptions`;

/**
* Legacy alerts details/label for server and public use
@@ -451,6 +452,25 @@ export const ALERT_DETAILS = {
'Alert when the number of rejections in the write thread pool exceeds the threshold.',
}),
},
[ALERT_CCR_READ_EXCEPTIONS]: {
paramDetails: {
duration: {
label: i18n.translate(
'xpack.monitoring.alerts.ccrReadExceptions.paramDetails.duration.label',
{
defaultMessage: `In the last`,
}
),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.label', {
defaultMessage: 'CCR read exceptions',
}),
description: i18n.translate('xpack.monitoring.alerts.ccrReadExceptions.description', {
defaultMessage: 'Alert if any CCR read exceptions have been detected.',
}),
},
};

export const ALERT_PANEL_MENU = [
@@ -485,6 +505,7 @@ export const ALERT_PANEL_MENU = [
{ alertName: ALERT_LICENSE_EXPIRATION },
{ alertName: ALERT_THREAD_POOL_SEARCH_REJECTIONS },
{ alertName: ALERT_THREAD_POOL_WRITE_REJECTIONS },
{ alertName: ALERT_CCR_READ_EXCEPTIONS },
],
},
];
@@ -505,6 +526,7 @@ export const ALERTS = [
ALERT_MISSING_MONITORING_DATA,
ALERT_THREAD_POOL_SEARCH_REJECTIONS,
ALERT_THREAD_POOL_WRITE_REJECTIONS,
ALERT_CCR_READ_EXCEPTIONS,
];

/**
14 changes: 14 additions & 0 deletions x-pack/plugins/monitoring/common/types/alerts.ts
Original file line number Diff line number Diff line change
@@ -165,6 +165,20 @@ export interface AlertMemoryUsageNodeStats extends AlertNodeStats {
export interface AlertMissingData extends AlertNodeStats {
gapDuration: number;
}
export interface CCRReadExceptionsStats {
remoteCluster: string;
followerIndex: string;
shardId: number;
leaderIndex: string;
lastReadException: { type: string; reason: string };
clusterUuid: string;
ccs: string;
}

export interface CCRReadExceptionsUIMeta extends CCRReadExceptionsStats {
instanceId: string;
itemLabel: string;
}

export interface AlertData {
nodeName?: string;
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import React from 'react';
import { i18n } from '@kbn/i18n';
import { Expression, Props } from '../components/duration/expression';
import { AlertTypeModel, ValidationResult } from '../../../../triggers_actions_ui/public';
import { ALERT_CCR_READ_EXCEPTIONS, ALERT_DETAILS } from '../../../common/constants';

interface ValidateOptions {
duration: string;
}

const validate = (inputValues: ValidateOptions): ValidationResult => {
const validationResult = { errors: {} };
const errors: { [key: string]: string[] } = {
duration: [],
};
if (!inputValues.duration) {
errors.duration.push(
i18n.translate('xpack.monitoring.alerts.validation.duration', {
defaultMessage: 'A valid duration is required.',
})
);
}
validationResult.errors = errors;
return validationResult;
};

export function createCCRReadExceptionsAlertType(): AlertTypeModel {
return {
id: ALERT_CCR_READ_EXCEPTIONS,
name: ALERT_DETAILS[ALERT_CCR_READ_EXCEPTIONS].label,
description: ALERT_DETAILS[ALERT_CCR_READ_EXCEPTIONS].description,
iconClass: 'bell',
documentationUrl(docLinks) {
return `${docLinks.ELASTIC_WEBSITE_URL}guide/en/kibana/${docLinks.DOC_LINK_VERSION}/kibana-alerts.html`;
},
alertParamsExpression: (props: Props) => (
<Expression {...props} paramDetails={ALERT_DETAILS[ALERT_CCR_READ_EXCEPTIONS].paramDetails} />
),
validate,
defaultActionMessage: '{{context.internalFullMessage}}',
requiresAppContext: true,
};
}
Original file line number Diff line number Diff line change
@@ -171,6 +171,7 @@ export function getAlertPanelsByCategory(
for (const { alert, states } of category.alerts) {
const items = [];
for (const alertState of states.filter(({ state }) => stateFilter(state))) {
const { nodeName, itemLabel } = alertState.state;
items.push({
name: (
<Fragment>
@@ -188,7 +189,7 @@ export function getAlertPanelsByCategory(
)}
</EuiText>
</EuiToolTip>
<EuiText size="s">{alertState.state.nodeName}</EuiText>
<EuiText size="s">{nodeName || itemLabel}</EuiText>
</Fragment>
),
panel: ++tertiaryPanelIndex,
Original file line number Diff line number Diff line change
@@ -69,10 +69,11 @@ export function getAlertPanelsByNode(
const states = (statesByNodes[nodeUuid] as CommonAlertState[]).filter(({ state }) =>
stateFilter(state)
);
const { nodeName, itemLabel } = states[0].state;
return {
name: (
<EuiText>
{states[0].state.nodeName} ({states.length})
{nodeName || itemLabel} ({states.length})
</EuiText>
),
panel: index + 1,
@@ -86,7 +87,8 @@ export function getAlertPanelsByNode(
let title = '';
for (const { alert, states } of alertsForNode) {
for (const alertState of states) {
title = alertState.state.nodeName;
const { nodeName, itemLabel } = alertState.state;
title = nodeName || itemLabel;
panelItems.push({
name: (
<Fragment>
Original file line number Diff line number Diff line change
@@ -77,6 +77,7 @@ export function replaceTokens(alertMessage: AlertMessage): JSX.Element | string
}

const url = linkToken.partialUrl
.replace('{basePath}', Legacy.shims.getBasePath())
.replace('{elasticWebsiteUrl}', Legacy.shims.docLinks.ELASTIC_WEBSITE_URL)
.replace('{docLinkVersion}', Legacy.shims.docLinks.DOC_LINK_VERSION);
const index = text.indexOf(linkPart[0]);
Original file line number Diff line number Diff line change
@@ -47,6 +47,7 @@ import {
ALERT_NODES_CHANGED,
ALERT_ELASTICSEARCH_VERSION_MISMATCH,
ALERT_MISSING_MONITORING_DATA,
ALERT_CCR_READ_EXCEPTIONS,
} from '../../../../common/constants';
import { AlertsBadge } from '../../../alerts/badge';
import { shouldShowAlertBadge } from '../../../alerts/lib/should_show_alert_badge';
@@ -159,7 +160,11 @@ function renderLog(log) {
);
}

const OVERVIEW_PANEL_ALERTS = [ALERT_CLUSTER_HEALTH, ALERT_LICENSE_EXPIRATION];
const OVERVIEW_PANEL_ALERTS = [
ALERT_CLUSTER_HEALTH,
ALERT_LICENSE_EXPIRATION,
ALERT_CCR_READ_EXCEPTIONS,
];

const NODES_PANEL_ALERTS = [
ALERT_CPU_USAGE,
2 changes: 2 additions & 0 deletions x-pack/plugins/monitoring/public/plugin.ts
Original file line number Diff line number Diff line change
@@ -156,6 +156,7 @@ export class MonitoringPlugin
'./alerts/thread_pool_rejections_alert'
);
const { createMemoryUsageAlertType } = await import('./alerts/memory_usage_alert');
const { createCCRReadExceptionsAlertType } = await import('./alerts/ccr_read_exceptions_alert');

const {
triggersActionsUi: { alertTypeRegistry },
@@ -176,6 +177,7 @@ export class MonitoringPlugin
ALERT_DETAILS[ALERT_THREAD_POOL_WRITE_REJECTIONS]
)
);
alertTypeRegistry.register(createCCRReadExceptionsAlertType());
const legacyAlertTypes = createLegacyAlertTypes();
for (const legacyAlertType of legacyAlertTypes) {
alertTypeRegistry.register(legacyAlertType);
4 changes: 3 additions & 1 deletion x-pack/plugins/monitoring/server/alerts/alerts_factory.ts
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
*/

import {
CCRReadExceptionsAlert,
CpuUsageAlert,
MissingMonitoringDataAlert,
DiskUsageAlert,
@@ -32,6 +33,7 @@ import {
ALERT_LOGSTASH_VERSION_MISMATCH,
ALERT_KIBANA_VERSION_MISMATCH,
ALERT_ELASTICSEARCH_VERSION_MISMATCH,
ALERT_CCR_READ_EXCEPTIONS,
} from '../../common/constants';
import { AlertsClient } from '../../../alerts/server';
import { Alert } from '../../../alerts/common';
@@ -49,6 +51,7 @@ const BY_TYPE = {
[ALERT_LOGSTASH_VERSION_MISMATCH]: LogstashVersionMismatchAlert,
[ALERT_KIBANA_VERSION_MISMATCH]: KibanaVersionMismatchAlert,
[ALERT_ELASTICSEARCH_VERSION_MISMATCH]: ElasticsearchVersionMismatchAlert,
[ALERT_CCR_READ_EXCEPTIONS]: CCRReadExceptionsAlert,
};

export class AlertsFactory {
@@ -68,7 +71,6 @@ export class AlertsFactory {

if (!alertClientAlerts.total || !alertClientAlerts.data?.length) {
return;
// return new alertCls() as BaseAlert;
}

const [rawAlert] = alertClientAlerts.data as [Alert];
13 changes: 8 additions & 5 deletions x-pack/plugins/monitoring/server/alerts/base_alert.ts
Original file line number Diff line number Diff line change
@@ -344,7 +344,7 @@ export class BaseAlert {

const firingNodeUuids = nodes
.filter((node) => node.shouldFire)
.map((node) => node.meta.nodeId)
.map((node) => node.meta.nodeId || node.meta.instanceId)
.join(',');
const instanceId = `${this.alertOptions.id}:${cluster.clusterUuid}:${firingNodeUuids}`;
const instance = services.alertInstanceFactory(instanceId);
@@ -354,13 +354,16 @@ export class BaseAlert {
if (!node.shouldFire) {
continue;
}
const stat = node.meta as AlertNodeState;
const { meta } = node;
const nodeState = this.getDefaultAlertState(cluster, node) as AlertNodeState;
if (key) {
nodeState[key] = stat[key];
nodeState[key] = meta[key];
}
nodeState.nodeId = stat.nodeId || node.nodeId!;
nodeState.nodeName = stat.nodeName || node.nodeName || nodeState.nodeId;
nodeState.nodeId = meta.nodeId || node.nodeId! || meta.instanceId;
// TODO: make these functions more generic, so it's node/item agnostic
nodeState.nodeName = meta.itemLabel || meta.nodeName || node.nodeName || nodeState.nodeId;
nodeState.itemLabel = meta.itemLabel;
nodeState.meta = meta;
nodeState.ui.triggeredMS = currentUTC;
nodeState.ui.isFiring = true;
nodeState.ui.severity = node.severity;
Loading