From dbb21ebc6a77fa9ad3dadcd17a03b8f9604a21f0 Mon Sep 17 00:00:00 2001 From: Igor Zaytsev Date: Fri, 2 Apr 2021 16:53:27 -0400 Subject: [PATCH 1/7] Using shard size avg instead of primary total --- x-pack/plugins/monitoring/common/types/es.ts | 3 +++ .../lib/alerts/fetch_index_shard_size.ts | 24 ++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/x-pack/plugins/monitoring/common/types/es.ts b/x-pack/plugins/monitoring/common/types/es.ts index 9dce32211f4b1..3ad2fe89fa6b2 100644 --- a/x-pack/plugins/monitoring/common/types/es.ts +++ b/x-pack/plugins/monitoring/common/types/es.ts @@ -100,6 +100,9 @@ export interface ElasticsearchNodeStats { export interface ElasticsearchIndexStats { index?: string; + shards: { + total: number; + }; primaries?: { docs?: { count?: number; diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts index f51e1cde47f8d..b32c06bb88fae 100644 --- a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts @@ -69,13 +69,6 @@ export async function fetchIndexShardSize( }, aggs: { over_threshold: { - filter: { - range: { - 'index_stats.primaries.store.size_in_bytes': { - gt: threshold * gbMultiplier, - }, - }, - }, aggs: { index: { terms: { @@ -96,7 +89,8 @@ export async function fetchIndexShardSize( _source: { includes: [ '_index', - 'index_stats.primaries.store.size_in_bytes', + 'index_stats.shards.total', + 'index_stats.total.store.size_in_bytes', 'source_node.name', 'source_node.uuid', ], @@ -123,7 +117,7 @@ export async function fetchIndexShardSize( if (!clusterBuckets.length) { return stats; } - + const thresholdGB = threshold * gbMultiplier; for (const clusterBucket of clusterBuckets) { const indexBuckets = clusterBucket.over_threshold.index.buckets; const clusterUuid = clusterBucket.key; @@ -143,9 +137,17 @@ export async function fetchIndexShardSize( _source: { source_node: sourceNode, index_stats: indexStats }, } = topHit; - const { size_in_bytes: shardSizeBytes } = indexStats?.primaries?.store!; + const { total: totalShards } = indexStats?.shards; + const { size_in_bytes: shardSizeBytes = 0 } = indexStats?.total?.store!; + if (!shardSizeBytes) { + continue; + } const { name: nodeName, uuid: nodeId } = sourceNode; - const shardSize = +(shardSizeBytes! / gbMultiplier).toFixed(2); + const avgShardSize = shardSizeBytes ? shardSizeBytes / totalShards : 0; + const shardSize = +(avgShardSize / gbMultiplier).toFixed(2); + if (shardSize < thresholdGB) { + continue; + } stats.push({ shardIndex, shardSize, From 9e62f40ab40df60d61547d276b8eb610b1d750bc Mon Sep 17 00:00:00 2001 From: Igor Zaytsev Date: Fri, 2 Apr 2021 16:58:05 -0400 Subject: [PATCH 2/7] Added ui text --- x-pack/plugins/monitoring/common/constants.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/monitoring/common/constants.ts b/x-pack/plugins/monitoring/common/constants.ts index a6184261350b7..6ce57bf806325 100644 --- a/x-pack/plugins/monitoring/common/constants.ts +++ b/x-pack/plugins/monitoring/common/constants.ts @@ -477,7 +477,7 @@ export const ALERT_DETAILS = { paramDetails: { threshold: { label: i18n.translate('xpack.monitoring.alerts.shardSize.paramDetails.threshold.label', { - defaultMessage: `Notify when a shard exceeds this size`, + defaultMessage: `Notify when average shard size exceeds this size`, }), type: AlertParamType.Number, append: 'GB', From 290259a1644c029cc9ac866470613679722961f1 Mon Sep 17 00:00:00 2001 From: Igor Zaytsev Date: Mon, 5 Apr 2021 22:17:47 -0400 Subject: [PATCH 3/7] Changed to primary average instead of total --- docs/user/monitoring/kibana-alerts.asciidoc | 4 ++-- x-pack/plugins/monitoring/common/constants.ts | 4 ++-- x-pack/plugins/monitoring/common/types/es.ts | 2 +- .../server/alerts/large_shard_size_alert.ts | 5 +++-- .../server/lib/alerts/fetch_index_shard_size.ts | 16 ++++++++++------ 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/docs/user/monitoring/kibana-alerts.asciidoc b/docs/user/monitoring/kibana-alerts.asciidoc index 04f4e986ca289..2b02984102ab9 100644 --- a/docs/user/monitoring/kibana-alerts.asciidoc +++ b/docs/user/monitoring/kibana-alerts.asciidoc @@ -81,8 +81,8 @@ by running checks on a schedule time of 1 minute with a re-notify interval of 6 [[kibana-alerts-large-shard-size]] == Large shard size -This alert is triggered if a large (primary) shard size is found on any of the -specified index patterns. The trigger condition is met if an index's shard size is +This alert is triggered if a large average shard size (across associated primaries) is found on any of the +specified index patterns. The trigger condition is met if an index's (primary average) shard size is 55gb or higher in the last 5 minutes. The alert is grouped across all indices that match the default patter of `*` by running checks on a schedule time of 1 minute with a re-notify interval of 12 hours. diff --git a/x-pack/plugins/monitoring/common/constants.ts b/x-pack/plugins/monitoring/common/constants.ts index 6ce57bf806325..d81ede13f5545 100644 --- a/x-pack/plugins/monitoring/common/constants.ts +++ b/x-pack/plugins/monitoring/common/constants.ts @@ -477,7 +477,7 @@ export const ALERT_DETAILS = { paramDetails: { threshold: { label: i18n.translate('xpack.monitoring.alerts.shardSize.paramDetails.threshold.label', { - defaultMessage: `Notify when average shard size exceeds this size`, + defaultMessage: `Notify when primary average shard size exceeds this value`, }), type: AlertParamType.Number, append: 'GB', @@ -494,7 +494,7 @@ export const ALERT_DETAILS = { defaultMessage: 'Shard size', }), description: i18n.translate('xpack.monitoring.alerts.shardSize.description', { - defaultMessage: 'Alert if an index (primary) shard is oversize.', + defaultMessage: 'Alert if an index (primary) shard average is oversize.', }), }, }; diff --git a/x-pack/plugins/monitoring/common/types/es.ts b/x-pack/plugins/monitoring/common/types/es.ts index 3ad2fe89fa6b2..38a7e7859272c 100644 --- a/x-pack/plugins/monitoring/common/types/es.ts +++ b/x-pack/plugins/monitoring/common/types/es.ts @@ -101,7 +101,7 @@ export interface ElasticsearchNodeStats { export interface ElasticsearchIndexStats { index?: string; shards: { - total: number; + primaries: number; }; primaries?: { docs?: { diff --git a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts index 2c9e5a04e37e4..aedb83a0ad315 100644 --- a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts @@ -49,7 +49,8 @@ export class LargeShardSizeAlert extends BaseAlert { description: i18n.translate( 'xpack.monitoring.alerts.shardSize.actionVariables.shardIndex', { - defaultMessage: 'List of indices which are experiencing large shard size.', + defaultMessage: + 'List of indices which are experiencing large (primary average) shard size.', } ), }, @@ -100,7 +101,7 @@ export class LargeShardSizeAlert extends BaseAlert { const { shardIndex, shardSize } = item.meta as IndexShardSizeUIMeta; return { text: i18n.translate('xpack.monitoring.alerts.shardSize.ui.firingMessage', { - defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large shard size of: {shardSize}GB at #absolute`, + defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large (primary average) shard size of: {shardSize}GB at #absolute`, values: { shardIndex, shardSize, diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts index b32c06bb88fae..cbe4692e1d667 100644 --- a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts @@ -89,8 +89,8 @@ export async function fetchIndexShardSize( _source: { includes: [ '_index', - 'index_stats.shards.total', - 'index_stats.total.store.size_in_bytes', + 'index_stats.shards.primaries', + 'index_stats.primaries.store.size_in_bytes', 'source_node.name', 'source_node.uuid', ], @@ -137,13 +137,17 @@ export async function fetchIndexShardSize( _source: { source_node: sourceNode, index_stats: indexStats }, } = topHit; - const { total: totalShards } = indexStats?.shards; - const { size_in_bytes: shardSizeBytes = 0 } = indexStats?.total?.store!; - if (!shardSizeBytes) { + if (!indexStats || !indexStats.primaries) { + continue; + } + + const { primaries: totalPrimaryShards } = indexStats.shards; + const { size_in_bytes: primaryShardSizeBytes = 0 } = indexStats.primaries.store!; + if (!primaryShardSizeBytes) { continue; } const { name: nodeName, uuid: nodeId } = sourceNode; - const avgShardSize = shardSizeBytes ? shardSizeBytes / totalShards : 0; + const avgShardSize = primaryShardSizeBytes ? primaryShardSizeBytes / totalPrimaryShards : 0; const shardSize = +(avgShardSize / gbMultiplier).toFixed(2); if (shardSize < thresholdGB) { continue; From 019e9c9498a8f4735cbb10d5e8f9f0e86afdd2e2 Mon Sep 17 00:00:00 2001 From: Igor Zaytsev Date: Mon, 12 Apr 2021 12:04:38 -0400 Subject: [PATCH 4/7] Addressed cr feedback --- docs/user/monitoring/kibana-alerts.asciidoc | 2 +- x-pack/plugins/monitoring/common/constants.ts | 2 +- .../monitoring/server/alerts/large_shard_size_alert.ts | 5 ++--- .../server/lib/alerts/fetch_index_shard_size.ts | 8 ++++++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/user/monitoring/kibana-alerts.asciidoc b/docs/user/monitoring/kibana-alerts.asciidoc index 2b02984102ab9..4b498a1958921 100644 --- a/docs/user/monitoring/kibana-alerts.asciidoc +++ b/docs/user/monitoring/kibana-alerts.asciidoc @@ -82,7 +82,7 @@ by running checks on a schedule time of 1 minute with a re-notify interval of 6 == Large shard size This alert is triggered if a large average shard size (across associated primaries) is found on any of the -specified index patterns. The trigger condition is met if an index's (primary average) shard size is +specified index patterns. The trigger condition is met if an index's average shard size is 55gb or higher in the last 5 minutes. The alert is grouped across all indices that match the default patter of `*` by running checks on a schedule time of 1 minute with a re-notify interval of 12 hours. diff --git a/x-pack/plugins/monitoring/common/constants.ts b/x-pack/plugins/monitoring/common/constants.ts index d81ede13f5545..1a1d5071c5c71 100644 --- a/x-pack/plugins/monitoring/common/constants.ts +++ b/x-pack/plugins/monitoring/common/constants.ts @@ -477,7 +477,7 @@ export const ALERT_DETAILS = { paramDetails: { threshold: { label: i18n.translate('xpack.monitoring.alerts.shardSize.paramDetails.threshold.label', { - defaultMessage: `Notify when primary average shard size exceeds this value`, + defaultMessage: `Notify when average shard size exceeds this value`, }), type: AlertParamType.Number, append: 'GB', diff --git a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts index aedb83a0ad315..db318d7962beb 100644 --- a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts @@ -49,8 +49,7 @@ export class LargeShardSizeAlert extends BaseAlert { description: i18n.translate( 'xpack.monitoring.alerts.shardSize.actionVariables.shardIndex', { - defaultMessage: - 'List of indices which are experiencing large (primary average) shard size.', + defaultMessage: 'List of indices which are experiencing large average shard size.', } ), }, @@ -101,7 +100,7 @@ export class LargeShardSizeAlert extends BaseAlert { const { shardIndex, shardSize } = item.meta as IndexShardSizeUIMeta; return { text: i18n.translate('xpack.monitoring.alerts.shardSize.ui.firingMessage', { - defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large (primary average) shard size of: {shardSize}GB at #absolute`, + defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large average shard size of: {shardSize}GB at #absolute`, values: { shardIndex, shardSize, diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts index cbe4692e1d667..08fa7ec8ff314 100644 --- a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts @@ -142,12 +142,16 @@ export async function fetchIndexShardSize( } const { primaries: totalPrimaryShards } = indexStats.shards; - const { size_in_bytes: primaryShardSizeBytes = 0 } = indexStats.primaries.store!; + const { size_in_bytes: primaryShardSizeBytes = 0 } = indexStats.primaries.store || {}; if (!primaryShardSizeBytes) { continue; } + /** + * We can only calculate the average primary shard size at this point, since we don't have + * data (in .monitoring-es* indices) to give us individual shards. This might change in the future + */ const { name: nodeName, uuid: nodeId } = sourceNode; - const avgShardSize = primaryShardSizeBytes ? primaryShardSizeBytes / totalPrimaryShards : 0; + const avgShardSize = primaryShardSizeBytes / totalPrimaryShards; const shardSize = +(avgShardSize / gbMultiplier).toFixed(2); if (shardSize < thresholdGB) { continue; From 0d78959fea68c263ae2bff6164b039409bd14a82 Mon Sep 17 00:00:00 2001 From: Igor Zaytsev Date: Mon, 12 Apr 2021 12:31:30 -0400 Subject: [PATCH 5/7] Added zero check --- .../monitoring/server/lib/alerts/fetch_index_shard_size.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts index 08fa7ec8ff314..27cfa7974dc7f 100644 --- a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts @@ -143,7 +143,7 @@ export async function fetchIndexShardSize( const { primaries: totalPrimaryShards } = indexStats.shards; const { size_in_bytes: primaryShardSizeBytes = 0 } = indexStats.primaries.store || {}; - if (!primaryShardSizeBytes) { + if (!primaryShardSizeBytes || !totalPrimaryShards) { continue; } /** From 56d8a0d3a79fd33768e6cad21a9559cf11b88d42 Mon Sep 17 00:00:00 2001 From: Igor Zaytsev Date: Mon, 12 Apr 2021 20:02:19 -0400 Subject: [PATCH 6/7] Fixed threshold checking --- .../monitoring/server/lib/alerts/fetch_index_shard_size.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts index 27cfa7974dc7f..c3e9f08c3b949 100644 --- a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts @@ -117,7 +117,7 @@ export async function fetchIndexShardSize( if (!clusterBuckets.length) { return stats; } - const thresholdGB = threshold * gbMultiplier; + const thresholdBytes = threshold * gbMultiplier; for (const clusterBucket of clusterBuckets) { const indexBuckets = clusterBucket.over_threshold.index.buckets; const clusterUuid = clusterBucket.key; @@ -152,10 +152,10 @@ export async function fetchIndexShardSize( */ const { name: nodeName, uuid: nodeId } = sourceNode; const avgShardSize = primaryShardSizeBytes / totalPrimaryShards; - const shardSize = +(avgShardSize / gbMultiplier).toFixed(2); - if (shardSize < thresholdGB) { + if (avgShardSize < thresholdBytes) { continue; } + const shardSize = +(avgShardSize / gbMultiplier).toFixed(2); stats.push({ shardIndex, shardSize, From 567fec317d1f1884745cc1613fe32f4222a96c83 Mon Sep 17 00:00:00 2001 From: Igor Zaytsev Date: Mon, 12 Apr 2021 20:05:59 -0400 Subject: [PATCH 7/7] Changed description --- x-pack/plugins/monitoring/common/constants.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugins/monitoring/common/constants.ts b/x-pack/plugins/monitoring/common/constants.ts index 2f7e624227d02..cd3e28debb7d5 100644 --- a/x-pack/plugins/monitoring/common/constants.ts +++ b/x-pack/plugins/monitoring/common/constants.ts @@ -477,7 +477,7 @@ export const ALERT_DETAILS = { defaultMessage: 'Shard size', }), description: i18n.translate('xpack.monitoring.alerts.shardSize.description', { - defaultMessage: 'Alert if an index (primary) shard average is oversize.', + defaultMessage: 'Alert if the average shard size is larger than the configured threshold.', }), }, };