Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Monitoring] JVM memory usage alert #79039

Merged
merged 12 commits into from
Oct 5, 2020
2 changes: 2 additions & 0 deletions x-pack/plugins/monitoring/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ export const ALERT_NODES_CHANGED = `${ALERT_PREFIX}alert_nodes_changed`;
export const ALERT_ELASTICSEARCH_VERSION_MISMATCH = `${ALERT_PREFIX}alert_elasticsearch_version_mismatch`;
export const ALERT_KIBANA_VERSION_MISMATCH = `${ALERT_PREFIX}alert_kibana_version_mismatch`;
export const ALERT_LOGSTASH_VERSION_MISMATCH = `${ALERT_PREFIX}alert_logstash_version_mismatch`;
export const ALERT_MEMORY_USAGE = `${ALERT_PREFIX}alert_jvm_memory_usage`;
export const ALERT_MISSING_MONITORING_DATA = `${ALERT_PREFIX}alert_missing_monitoring_data`;

/**
Expand All @@ -250,6 +251,7 @@ export const ALERTS = [
ALERT_ELASTICSEARCH_VERSION_MISMATCH,
ALERT_KIBANA_VERSION_MISMATCH,
ALERT_LOGSTASH_VERSION_MISMATCH,
ALERT_MEMORY_USAGE,
ALERT_MISSING_MONITORING_DATA,
];

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

import React from 'react';
import { validate } from '../components/duration/validation';
import { Expression, Props } from '../components/duration/expression';

// eslint-disable-next-line @kbn/eslint/no-restricted-paths
import { AlertTypeModel } from '../../../../triggers_actions_ui/public/types';

// eslint-disable-next-line @kbn/eslint/no-restricted-paths
import { MemoryUsageAlert } from '../../../server/alerts';

export function createMemoryUsageAlertType(): AlertTypeModel {
return {
id: MemoryUsageAlert.TYPE,
name: MemoryUsageAlert.LABEL,
iconClass: 'bell',
alertParamsExpression: (props: Props) => (
<Expression {...props} paramDetails={MemoryUsageAlert.PARAM_DETAILS} />
),
validate,
defaultActionMessage: '{{context.internalFullMessage}}',
requiresAppContext: true,
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import {
ALERT_CLUSTER_HEALTH,
ALERT_CPU_USAGE,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
ALERT_NODES_CHANGED,
ALERT_ELASTICSEARCH_VERSION_MISMATCH,
ALERT_MISSING_MONITORING_DATA,
Expand Down Expand Up @@ -160,6 +161,7 @@ const OVERVIEW_PANEL_ALERTS = [ALERT_CLUSTER_HEALTH, ALERT_LICENSE_EXPIRATION];
const NODES_PANEL_ALERTS = [
ALERT_CPU_USAGE,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
ALERT_NODES_CHANGED,
ALERT_ELASTICSEARCH_VERSION_MISMATCH,
ALERT_MISSING_MONITORING_DATA,
Expand Down
12 changes: 8 additions & 4 deletions x-pack/plugins/monitoring/public/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import { createCpuUsageAlertType } from './alerts/cpu_usage_alert';
import { createMissingMonitoringDataAlertType } from './alerts/missing_monitoring_data_alert';
import { createLegacyAlertTypes } from './alerts/legacy_alert';
import { createDiskUsageAlertType } from './alerts/disk_usage_alert';
import { createMemoryUsageAlertType } from './alerts/memory_usage_alert';

interface MonitoringSetupPluginDependencies {
home?: HomePublicPluginSetup;
Expand Down Expand Up @@ -72,12 +73,15 @@ export class MonitoringPlugin
});
}

plugins.triggers_actions_ui.alertTypeRegistry.register(createCpuUsageAlertType());
plugins.triggers_actions_ui.alertTypeRegistry.register(createMissingMonitoringDataAlertType());
plugins.triggers_actions_ui.alertTypeRegistry.register(createDiskUsageAlertType());
const { alertTypeRegistry } = plugins.triggers_actions_ui;
alertTypeRegistry.register(createCpuUsageAlertType());
alertTypeRegistry.register(createDiskUsageAlertType());
alertTypeRegistry.register(createMemoryUsageAlertType());
alertTypeRegistry.register(createMissingMonitoringDataAlertType());

const legacyAlertTypes = createLegacyAlertTypes();
for (const legacyAlertType of legacyAlertTypes) {
plugins.triggers_actions_ui.alertTypeRegistry.register(legacyAlertType);
alertTypeRegistry.register(legacyAlertType);
}

const app: App = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
ALERT_CPU_USAGE,
ALERT_MISSING_MONITORING_DATA,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
} from '../../../../../common/constants';

function getPageData($injector) {
Expand Down Expand Up @@ -72,7 +73,12 @@ uiRoutes.when('/elasticsearch/nodes/:node/advanced', {
alerts: {
shouldFetch: true,
options: {
alertTypeIds: [ALERT_CPU_USAGE, ALERT_DISK_USAGE, ALERT_MISSING_MONITORING_DATA],
alertTypeIds: [
ALERT_CPU_USAGE,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
ALERT_MISSING_MONITORING_DATA,
],
filters: [
{
nodeUuid: nodeName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
ALERT_CPU_USAGE,
ALERT_MISSING_MONITORING_DATA,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
} from '../../../../common/constants';

uiRoutes.when('/elasticsearch/nodes/:node', {
Expand Down Expand Up @@ -56,7 +57,12 @@ uiRoutes.when('/elasticsearch/nodes/:node', {
alerts: {
shouldFetch: true,
options: {
alertTypeIds: [ALERT_CPU_USAGE, ALERT_DISK_USAGE, ALERT_MISSING_MONITORING_DATA],
alertTypeIds: [
ALERT_CPU_USAGE,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
ALERT_MISSING_MONITORING_DATA,
],
filters: [
{
nodeUuid: nodeName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
ALERT_CPU_USAGE,
ALERT_MISSING_MONITORING_DATA,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
} from '../../../../common/constants';

uiRoutes.when('/elasticsearch/nodes', {
Expand Down Expand Up @@ -88,7 +89,12 @@ uiRoutes.when('/elasticsearch/nodes', {
alerts: {
shouldFetch: true,
options: {
alertTypeIds: [ALERT_CPU_USAGE, ALERT_DISK_USAGE, ALERT_MISSING_MONITORING_DATA],
alertTypeIds: [
ALERT_CPU_USAGE,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
ALERT_MISSING_MONITORING_DATA,
],
filters: [
{
stackProduct: ELASTICSEARCH_SYSTEM_ID,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,6 @@ describe('AlertsFactory', () => {

it('should get all', () => {
const alerts = AlertsFactory.getAll();
expect(alerts.length).toBe(9);
expect(alerts.length).toBe(10);
});
});
3 changes: 3 additions & 0 deletions x-pack/plugins/monitoring/server/alerts/alerts_factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
CpuUsageAlert,
MissingMonitoringDataAlert,
DiskUsageAlert,
MemoryUsageAlert,
NodesChangedAlert,
ClusterHealthAlert,
LicenseExpirationAlert,
Expand All @@ -22,6 +23,7 @@ import {
ALERT_CPU_USAGE,
ALERT_MISSING_MONITORING_DATA,
ALERT_DISK_USAGE,
ALERT_MEMORY_USAGE,
ALERT_NODES_CHANGED,
ALERT_LOGSTASH_VERSION_MISMATCH,
ALERT_KIBANA_VERSION_MISMATCH,
Expand All @@ -35,6 +37,7 @@ export const BY_TYPE = {
[ALERT_CPU_USAGE]: CpuUsageAlert,
[ALERT_MISSING_MONITORING_DATA]: MissingMonitoringDataAlert,
[ALERT_DISK_USAGE]: DiskUsageAlert,
[ALERT_MEMORY_USAGE]: MemoryUsageAlert,
[ALERT_NODES_CHANGED]: NodesChangedAlert,
[ALERT_LOGSTASH_VERSION_MISMATCH]: LogstashVersionMismatchAlert,
[ALERT_KIBANA_VERSION_MISMATCH]: KibanaVersionMismatchAlert,
Expand Down
9 changes: 9 additions & 0 deletions x-pack/plugins/monitoring/server/alerts/base_alert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -377,4 +377,13 @@ export class BaseAlert {
) {
throw new Error('Child classes must implement `executeActions`');
}

protected createGlobalStateLink(link: string, clusterUuid: string, ccs?: string) {
const globalState = [`cluster_uuid:${clusterUuid}`];
if (ccs) {
globalState.push(`ccs:${ccs}`);
}
globalState.push('refreshInterval:(pause:!f,value:10000),time:(from:now-1h,to:now)');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why add this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To keep it consistent with our defaults, otherwise it'll use timerange default of 15min, which will then persist to next state once they start navigating the app

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but it won't transfer to url_state unless we explicitly provide it (like I'm doing above). You can test this for yourself

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, but I'm still not following.

This is what I'm seeing which seems fine:
Kapture 2020-10-02 at 14 31 51

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There isn't much to figure out. Like I said it falls back to defaults, and for me it's:
Screen Shot 2020-10-02 at 4 16 06 PM

Anyways, we're digressing here. These are all trivial things that are not really related to the scope of this PR and can be addressed/debated as a separate discussion/issue

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My defaults are the same:
Screen Shot 2020-10-02 at 4 31 15 PM

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It also relies on localStorage. Maybe try clearing it, or at least the following key:
kibana.timepicker.timeHistory: "[{"from":"now-1h","to":"now"},{"from":"now-15m","to":"now"}]"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it isn't related to this PR and we should defer the conversation, but we should have it at some point. I'm curious if there is an actual bug around this code that we need to fix.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey catching up on this, I think introducing more than one place to define defaults is probably not a good idea and we should understand why it's needed before we introduce that kind of foot-gun.

For now, if the goal is to get this alert in before FF, I recommend removing that line and then during the test plan phase try to understand why you both are seeing these different behaviors, and then document that back on this PR or in a new ticket. Hopefully it's not necessary at all, but if it is, we should export the defaults as a constant and then use it in both places at the very least.

return `${this.kibanaUrl}/app/monitoring#/${link}?_g=(${globalState.toString()})`;
}
}
10 changes: 4 additions & 6 deletions x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ describe('CpuUsageAlert', () => {
};
const kibanaUrl = 'http://localhost:5601';

const hasScheduledActions = jest.fn();
const replaceState = jest.fn();
const scheduleActions = jest.fn();
const getState = jest.fn();
Expand All @@ -87,7 +86,6 @@ describe('CpuUsageAlert', () => {
callCluster: jest.fn(),
alertInstanceFactory: jest.fn().mockImplementation(() => {
return {
hasScheduledActions,
replaceState,
scheduleActions,
getState,
Expand Down Expand Up @@ -154,7 +152,7 @@ describe('CpuUsageAlert', () => {
endToken: '#end_link',
type: 'docLink',
partialUrl:
'{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html',
'{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html',
},
],
},
Expand All @@ -166,7 +164,7 @@ describe('CpuUsageAlert', () => {
endToken: '#end_link',
type: 'docLink',
partialUrl:
'{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html',
'{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html',
},
],
},
Expand Down Expand Up @@ -506,7 +504,7 @@ describe('CpuUsageAlert', () => {
endToken: '#end_link',
type: 'docLink',
partialUrl:
'{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html',
'{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html',
},
],
},
Expand All @@ -518,7 +516,7 @@ describe('CpuUsageAlert', () => {
endToken: '#end_link',
type: 'docLink',
partialUrl:
'{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html',
'{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html',
},
],
},
Expand Down
4 changes: 2 additions & 2 deletions x-pack/plugins/monitoring/server/alerts/cpu_usage_alert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,13 @@ export class CpuUsageAlert extends BaseAlert {
i18n.translate('xpack.monitoring.alerts.cpuUsage.ui.nextSteps.hotThreads', {
defaultMessage: '#start_linkCheck hot threads#end_link',
}),
`{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html`
`{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/cluster-nodes-hot-threads.html`
),
createLink(
i18n.translate('xpack.monitoring.alerts.cpuUsage.ui.nextSteps.runningTasks', {
defaultMessage: '#start_linkCheck long running tasks#end_link',
}),
`{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html`
`{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tasks.html`
),
],
tokens: [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ describe('DiskUsageAlert', () => {
};
const kibanaUrl = 'http://localhost:5601';

const hasScheduledActions = jest.fn();
const replaceState = jest.fn();
const scheduleActions = jest.fn();
const getState = jest.fn();
Expand All @@ -98,7 +97,6 @@ describe('DiskUsageAlert', () => {
callCluster: jest.fn(),
alertInstanceFactory: jest.fn().mockImplementation(() => {
return {
hasScheduledActions,
replaceState,
scheduleActions,
getState,
Expand Down
16 changes: 8 additions & 8 deletions x-pack/plugins/monitoring/server/alerts/disk_usage_alert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,13 @@ export class DiskUsageAlert extends BaseAlert {

protected filterAlertInstance(alertInstance: RawAlertInstance, filters: CommonAlertFilter[]) {
const alertInstanceStates = alertInstance.state?.alertStates as AlertDiskUsageState[];
const nodeUuid = filters?.find((filter) => filter.nodeUuid);
const nodeFilter = filters?.find((filter) => filter.nodeUuid);

if (!filters || !filters.length || !alertInstanceStates?.length || !nodeUuid) {
if (!filters || !filters.length || !alertInstanceStates?.length || !nodeFilter?.nodeUuid) {
return true;
}

const nodeAlerts = alertInstanceStates.filter(({ nodeId }) => nodeId === nodeUuid);
const nodeAlerts = alertInstanceStates.filter(({ nodeId }) => nodeId === nodeFilter.nodeUuid);
return Boolean(nodeAlerts.length);
}

Expand Down Expand Up @@ -160,7 +160,7 @@ export class DiskUsageAlert extends BaseAlert {
i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.tuneDisk', {
defaultMessage: '#start_linkTune for disk usage#end_link',
}),
`{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/tune-for-disk-usage.html`
`{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/tune-for-disk-usage.html`
),
createLink(
i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.identifyIndices', {
Expand All @@ -173,19 +173,19 @@ export class DiskUsageAlert extends BaseAlert {
i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.ilmPolicies', {
defaultMessage: '#start_linkImplement ILM policies#end_link',
}),
`{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/index-lifecycle-management.html`
`{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/index-lifecycle-management.html`
),
createLink(
i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.addMoreNodes', {
defaultMessage: '#start_linkAdd more data nodes#end_link',
}),
`{elasticWebsiteUrl}/guide/en/elasticsearch/reference/{docLinkVersion}/add-elasticsearch-nodes.html`
`{elasticWebsiteUrl}guide/en/elasticsearch/reference/{docLinkVersion}/add-elasticsearch-nodes.html`
),
createLink(
i18n.translate('xpack.monitoring.alerts.diskUsage.ui.nextSteps.resizeYourDeployment', {
defaultMessage: '#start_linkResize your deployment (ECE)#end_link',
}),
`{elasticWebsiteUrl}/guide/en/cloud-enterprise/current/ece-resize-deployment.html`
`{elasticWebsiteUrl}guide/en/cloud-enterprise/current/ece-resize-deployment.html`
),
],
tokens: [
Expand Down Expand Up @@ -331,7 +331,7 @@ export class DiskUsageAlert extends BaseAlert {

const alertInstanceState = { alertStates: newAlertStates };
instance.replaceState(alertInstanceState);
if (newAlertStates.length && !instance.hasScheduledActions()) {
if (newAlertStates.length) {
this.executeActions(instance, alertInstanceState, null, cluster);
state.lastExecutedAction = currentUTC;
}
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugins/monitoring/server/alerts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export { BaseAlert } from './base_alert';
export { CpuUsageAlert } from './cpu_usage_alert';
export { MissingMonitoringDataAlert } from './missing_monitoring_data_alert';
export { DiskUsageAlert } from './disk_usage_alert';
export { MemoryUsageAlert } from './memory_usage_alert';
export { ClusterHealthAlert } from './cluster_health_alert';
export { LicenseExpirationAlert } from './license_expiration_alert';
export { NodesChangedAlert } from './nodes_changed_alert';
Expand Down
Loading