diff --git a/docs/user/monitoring/kibana-alerts.asciidoc b/docs/user/monitoring/kibana-alerts.asciidoc index bbc9c41c6ca5a..2944921edd2ee 100644 --- a/docs/user/monitoring/kibana-alerts.asciidoc +++ b/docs/user/monitoring/kibana-alerts.asciidoc @@ -81,8 +81,8 @@ by running checks on a schedule time of 1 minute with a re-notify interval of 6 [[kibana-alerts-large-shard-size]] == Large shard size -This alert is triggered if a large (primary) shard size is found on any of the -specified index patterns. The trigger condition is met if an index's shard size is +This alert is triggered if a large average shard size (across associated primaries) is found on any of the +specified index patterns. The trigger condition is met if an index's average shard size is 55gb or higher in the last 5 minutes. The alert is grouped across all indices that match the default pattern of `*` by running checks on a schedule time of 1 minute with a re-notify interval of 12 hours. diff --git a/x-pack/plugins/monitoring/common/constants.ts b/x-pack/plugins/monitoring/common/constants.ts index bf6e32af0dc39..cd3e28debb7d5 100644 --- a/x-pack/plugins/monitoring/common/constants.ts +++ b/x-pack/plugins/monitoring/common/constants.ts @@ -460,7 +460,7 @@ export const ALERT_DETAILS = { paramDetails: { threshold: { label: i18n.translate('xpack.monitoring.alerts.shardSize.paramDetails.threshold.label', { - defaultMessage: `Notify when a shard exceeds this size`, + defaultMessage: `Notify when average shard size exceeds this value`, }), type: AlertParamType.Number, append: 'GB', @@ -477,7 +477,7 @@ export const ALERT_DETAILS = { defaultMessage: 'Shard size', }), description: i18n.translate('xpack.monitoring.alerts.shardSize.description', { - defaultMessage: 'Alert if an index (primary) shard is oversize.', + defaultMessage: 'Alert if the average shard size is larger than the configured threshold.', }), }, }; diff --git a/x-pack/plugins/monitoring/common/types/es.ts b/x-pack/plugins/monitoring/common/types/es.ts index 9dce32211f4b1..38a7e7859272c 100644 --- a/x-pack/plugins/monitoring/common/types/es.ts +++ b/x-pack/plugins/monitoring/common/types/es.ts @@ -100,6 +100,9 @@ export interface ElasticsearchNodeStats { export interface ElasticsearchIndexStats { index?: string; + shards: { + primaries: number; + }; primaries?: { docs?: { count?: number; diff --git a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts index 2c9e5a04e37e4..db318d7962beb 100644 --- a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts @@ -49,7 +49,7 @@ export class LargeShardSizeAlert extends BaseAlert { description: i18n.translate( 'xpack.monitoring.alerts.shardSize.actionVariables.shardIndex', { - defaultMessage: 'List of indices which are experiencing large shard size.', + defaultMessage: 'List of indices which are experiencing large average shard size.', } ), }, @@ -100,7 +100,7 @@ export class LargeShardSizeAlert extends BaseAlert { const { shardIndex, shardSize } = item.meta as IndexShardSizeUIMeta; return { text: i18n.translate('xpack.monitoring.alerts.shardSize.ui.firingMessage', { - defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large shard size of: {shardSize}GB at #absolute`, + defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large average shard size of: {shardSize}GB at #absolute`, values: { shardIndex, shardSize, diff --git a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts index f51e1cde47f8d..c3e9f08c3b949 100644 --- a/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts +++ b/x-pack/plugins/monitoring/server/lib/alerts/fetch_index_shard_size.ts @@ -69,13 +69,6 @@ export async function fetchIndexShardSize( }, aggs: { over_threshold: { - filter: { - range: { - 'index_stats.primaries.store.size_in_bytes': { - gt: threshold * gbMultiplier, - }, - }, - }, aggs: { index: { terms: { @@ -96,6 +89,7 @@ export async function fetchIndexShardSize( _source: { includes: [ '_index', + 'index_stats.shards.primaries', 'index_stats.primaries.store.size_in_bytes', 'source_node.name', 'source_node.uuid', @@ -123,7 +117,7 @@ export async function fetchIndexShardSize( if (!clusterBuckets.length) { return stats; } - + const thresholdBytes = threshold * gbMultiplier; for (const clusterBucket of clusterBuckets) { const indexBuckets = clusterBucket.over_threshold.index.buckets; const clusterUuid = clusterBucket.key; @@ -143,9 +137,25 @@ export async function fetchIndexShardSize( _source: { source_node: sourceNode, index_stats: indexStats }, } = topHit; - const { size_in_bytes: shardSizeBytes } = indexStats?.primaries?.store!; + if (!indexStats || !indexStats.primaries) { + continue; + } + + const { primaries: totalPrimaryShards } = indexStats.shards; + const { size_in_bytes: primaryShardSizeBytes = 0 } = indexStats.primaries.store || {}; + if (!primaryShardSizeBytes || !totalPrimaryShards) { + continue; + } + /** + * We can only calculate the average primary shard size at this point, since we don't have + * data (in .monitoring-es* indices) to give us individual shards. This might change in the future + */ const { name: nodeName, uuid: nodeId } = sourceNode; - const shardSize = +(shardSizeBytes! / gbMultiplier).toFixed(2); + const avgShardSize = primaryShardSizeBytes / totalPrimaryShards; + if (avgShardSize < thresholdBytes) { + continue; + } + const shardSize = +(avgShardSize / gbMultiplier).toFixed(2); stats.push({ shardIndex, shardSize,