From 637b48d30b878ba10f92145d2b60bcacbcf6e219 Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Fri, 4 Oct 2024 12:58:30 +0200 Subject: [PATCH 1/9] init integration_aws-direct-connect for testing --- docs/severity.md | 10 + .../integration_aws-direct-connect/README.md | 109 +++++++++++ .../common-filters.tf | 1 + .../common-locals.tf | 1 + .../common-modules.tf | 1 + .../common-variables.tf | 1 + .../common-versions.tf | 1 + .../conf/00-heartbeat.yaml | 13 ++ .../conf/01-connection_state.yaml | 19 ++ .../conf/02-connection_traffic.yaml | 25 +++ .../conf/readme.yaml | 3 + .../detectors-gen.tf | 95 ++++++++++ .../integration_aws-direct-connect/outputs.tf | 15 ++ .../integration_aws-direct-connect/tags.tf | 4 + .../variables-gen.tf | 172 ++++++++++++++++++ 15 files changed, 470 insertions(+) create mode 100644 modules/integration_aws-direct-connect/README.md create mode 120000 modules/integration_aws-direct-connect/common-filters.tf create mode 120000 modules/integration_aws-direct-connect/common-locals.tf create mode 120000 modules/integration_aws-direct-connect/common-modules.tf create mode 120000 modules/integration_aws-direct-connect/common-variables.tf create mode 120000 modules/integration_aws-direct-connect/common-versions.tf create mode 100644 modules/integration_aws-direct-connect/conf/00-heartbeat.yaml create mode 100644 modules/integration_aws-direct-connect/conf/01-connection_state.yaml create mode 100644 modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml create mode 100644 modules/integration_aws-direct-connect/conf/readme.yaml create mode 100644 modules/integration_aws-direct-connect/detectors-gen.tf create mode 100644 modules/integration_aws-direct-connect/outputs.tf create mode 100644 modules/integration_aws-direct-connect/tags.tf create mode 100644 modules/integration_aws-direct-connect/variables-gen.tf diff --git a/docs/severity.md b/docs/severity.md index 149003026..10f0ae3ee 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -15,6 +15,7 @@ - [integration_aws-apigateway](#integration_aws-apigateway) - [integration_aws-backup](#integration_aws-backup) - [integration_aws-beanstalk](#integration_aws-beanstalk) +- [integration_aws-direct-connect](#integration_aws-direct-connect) - [integration_aws-ecs-cluster](#integration_aws-ecs-cluster) - [integration_aws-ecs-service](#integration_aws-ecs-service) - [integration_aws-efs](#integration_aws-efs) @@ -234,6 +235,15 @@ |AWS Beanstalk instance root filesystem usage|X|X|-|-|-| +## integration_aws-direct-connect + +|Detector|Critical|Major|Minor|Warning|Info| +|---|---|---|---|---|---| +|Aws-direct-connect heartbeat|X|-|-|-|-| +|AWS Direct Connect connection state|X|-|-|-|-| +|AWS Direct Connect connection traffic|-|X|-|-|-| + + ## integration_aws-ecs-cluster |Detector|Critical|Major|Minor|Warning|Info| diff --git a/modules/integration_aws-direct-connect/README.md b/modules/integration_aws-direct-connect/README.md new file mode 100644 index 000000000..afc076048 --- /dev/null +++ b/modules/integration_aws-direct-connect/README.md @@ -0,0 +1,109 @@ +# AWS-DIRECT-CONNECT SignalFx detectors + + + +:link: **Contents** + +- [How to use this module?](#how-to-use-this-module) +- [What are the available detectors in this module?](#what-are-the-available-detectors-in-this-module) +- [How to collect required metrics?](#how-to-collect-required-metrics) + - [Metrics](#metrics) +- [Related documentation](#related-documentation) + + + +## How to use this module? + +This directory defines a [Terraform](https://www.terraform.io/) +[module](https://www.terraform.io/language/modules/syntax) you can use in your +existing [stack](https://github.com/claranet/terraform-signalfx-detectors/wiki/Getting-started#stack) by adding a +`module` configuration and setting its `source` parameter to URL of this folder: + +```hcl +module "signalfx-detectors-integration-aws-direct-connect" { + source = "github.com/claranet/terraform-signalfx-detectors.git//modules/integration_aws-direct-connect?ref={revision}" + + environment = var.environment + notifications = local.notifications +} +``` + +Note the following parameters: + +* `source`: Use this parameter to specify the URL of the module. The double slash (`//`) is intentional and required. + Terraform uses it to specify subfolders within a Git repo (see [module + sources](https://www.terraform.io/language/modules/sources)). The `ref` parameter specifies a specific Git tag in + this repository. It is recommended to use the latest "pinned" version in place of `{revision}`. Avoid using a branch + like `master` except for testing purpose. Note that every modules in this repository are available on the Terraform + [registry](https://registry.terraform.io/modules/claranet/detectors/signalfx) and we recommend using it as source + instead of `git` which is more flexible but less future-proof. + +* `environment`: Use this parameter to specify the + [environment](https://github.com/claranet/terraform-signalfx-detectors/wiki/Getting-started#environment) used by this + instance of the module. + Its value will be added to the `prefixes` list at the start of the [detector + name](https://github.com/claranet/terraform-signalfx-detectors/wiki/Templating#example). + In general, it will also be used in the `filtering` internal sub-module to [apply + filters](https://github.com/claranet/terraform-signalfx-detectors/wiki/Guidance#filtering) based on our default + [tagging convention](https://github.com/claranet/terraform-signalfx-detectors/wiki/Tagging-convention) by default. + +* `notifications`: Use this parameter to define where alerts should be sent depending on their severity. It consists + of a Terraform [object](https://www.terraform.io/language/expressions/type-constraints#object) where each key represents an available + [detector rule severity](https://docs.splunk.com/observability/alerts-detectors-notifications/create-detectors-for-alerts.html#severity) + and its value is a list of recipients. Every recipients must respect the [detector notification + format](https://registry.terraform.io/providers/splunk-terraform/signalfx/latest/docs/resources/detector#notification-format). + Check the [notification binding](https://github.com/claranet/terraform-signalfx-detectors/wiki/Notifications-binding) + documentation to understand the recommended role of each severity. + +These 3 parameters along with all variables defined in [common-variables.tf](common-variables.tf) are common to all +[modules](../) in this repository. Other variables, specific to this module, are available in +[variables-gen.tf](variables-gen.tf). +In general, the default configuration "works" but all of these Terraform +[variables](https://www.terraform.io/language/values/variables) make it possible to +customize the detectors behavior to better fit your needs. + +Most of them represent usual tips and rules detailed in the +[guidance](https://github.com/claranet/terraform-signalfx-detectors/wiki/Guidance) documentation and listed in the +common [variables](https://github.com/claranet/terraform-signalfx-detectors/wiki/Variables) dedicated documentation. + +Feel free to explore the [wiki](https://github.com/claranet/terraform-signalfx-detectors/wiki) for more information about +general usage of this repository. + +## What are the available detectors in this module? + +This module creates the following SignalFx detectors which could contain one or multiple alerting rules: + +|Detector|Critical|Major|Minor|Warning|Info| +|---|---|---|---|---|---| +|Aws-direct-connect heartbeat|X|-|-|-|-| +|AWS Direct Connect connection state|X|-|-|-|-| +|AWS Direct Connect connection traffic|-|X|-|-|-| + +## How to collect required metrics? + +This module deploys detectors using metrics reported by the +[AWS integration](https://docs.splunk.com/Observability/gdi/get-data-in/connect/aws/aws.html) configurable +with [this Terraform module](https://github.com/claranet/terraform-signalfx-integrations/tree/master/cloud/aws). + + +Check the [Related documentation](#related-documentation) section for more detailed and specific information about this module dependencies. + + + +### Metrics + + +Here is the list of required metrics for detectors in this module. + +* `ConnectionBpsEgress` +* `ConnectionBpsIngress` +* `ConnectionState` + + + + +## Related documentation + +* [Terraform SignalFx provider](https://registry.terraform.io/providers/splunk-terraform/signalfx/latest/docs) +* [Terraform SignalFx detector](https://registry.terraform.io/providers/splunk-terraform/signalfx/latest/docs/resources/detector) +* [Splunk Observability integrations](https://docs.splunk.com/Observability/gdi/get-data-in/integrations.html) diff --git a/modules/integration_aws-direct-connect/common-filters.tf b/modules/integration_aws-direct-connect/common-filters.tf new file mode 120000 index 000000000..9272cf517 --- /dev/null +++ b/modules/integration_aws-direct-connect/common-filters.tf @@ -0,0 +1 @@ +../../common/module/filters-integration-aws.tf \ No newline at end of file diff --git a/modules/integration_aws-direct-connect/common-locals.tf b/modules/integration_aws-direct-connect/common-locals.tf new file mode 120000 index 000000000..5672d21ab --- /dev/null +++ b/modules/integration_aws-direct-connect/common-locals.tf @@ -0,0 +1 @@ +../../common/module/locals.tf \ No newline at end of file diff --git a/modules/integration_aws-direct-connect/common-modules.tf b/modules/integration_aws-direct-connect/common-modules.tf new file mode 120000 index 000000000..8c81ef377 --- /dev/null +++ b/modules/integration_aws-direct-connect/common-modules.tf @@ -0,0 +1 @@ +../../common/module/modules.tf \ No newline at end of file diff --git a/modules/integration_aws-direct-connect/common-variables.tf b/modules/integration_aws-direct-connect/common-variables.tf new file mode 120000 index 000000000..f3037a584 --- /dev/null +++ b/modules/integration_aws-direct-connect/common-variables.tf @@ -0,0 +1 @@ +../../common/module/variables.tf \ No newline at end of file diff --git a/modules/integration_aws-direct-connect/common-versions.tf b/modules/integration_aws-direct-connect/common-versions.tf new file mode 120000 index 000000000..fa7f5509f --- /dev/null +++ b/modules/integration_aws-direct-connect/common-versions.tf @@ -0,0 +1 @@ +../../common/module/versions.tf \ No newline at end of file diff --git a/modules/integration_aws-direct-connect/conf/00-heartbeat.yaml b/modules/integration_aws-direct-connect/conf/00-heartbeat.yaml new file mode 100644 index 000000000..b58d6eb05 --- /dev/null +++ b/modules/integration_aws-direct-connect/conf/00-heartbeat.yaml @@ -0,0 +1,13 @@ +## Example +module: aws-direct-connect +name: heartbeat + +transformation: false +aggregation: true +exclude_not_running_vm: true + +signals: + signal: + metric: "ConnectionState" +rules: + critical: diff --git a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml new file mode 100644 index 000000000..18e73a34d --- /dev/null +++ b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml @@ -0,0 +1,19 @@ +module: AWS Direct Connect +name: "Connection state" + +transformation: true +aggregation: true + +filtering: "filter('namespace', 'AWS/DirectConnect')" +value_unit: "state" + +signals: + signal: + metric: ConnectionState + filter: "filter('stat', 'maximum')" + +rules: + critical: + threshold: 0 + comparator: "==" + description: "Connection is down" diff --git a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml new file mode 100644 index 000000000..6da4e5be2 --- /dev/null +++ b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml @@ -0,0 +1,25 @@ +module: AWS Direct Connect +name: "Connection traffic" + +transformation: true +aggregation: true + +filtering: "filter('namespace', 'AWS/DirectConnect')" +value_unit: "bytes" + +signals: + ingress_bps: + metric: ConnectionBpsIngress + filter: "filter('stat', 'sum')" + + egress_bps: + metric: ConnectionBpsEgress + filter: "filter('stat', 'sum')" + +rules: + major: + threshold: 0 + comparator: "==" + signal: ingress_bps + append_condition: and when(egress_bps == 0) + description: "No traffic detected" diff --git a/modules/integration_aws-direct-connect/conf/readme.yaml b/modules/integration_aws-direct-connect/conf/readme.yaml new file mode 100644 index 000000000..9015fc41a --- /dev/null +++ b/modules/integration_aws-direct-connect/conf/readme.yaml @@ -0,0 +1,3 @@ +documentations: + +source_doc: diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf new file mode 100644 index 000000000..cd016ec1b --- /dev/null +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -0,0 +1,95 @@ +resource "signalfx_detector" "heartbeat" { + name = format("%s %s", local.detector_name_prefix, "Aws-direct-connect heartbeat") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + program_text = <<-EOF + from signalfx.detectors.not_reporting import not_reporting + signal = data('ConnectionState', filter=%{if var.heartbeat_exclude_not_running_vm}${local.not_running_vm_filters} and %{endif}${module.filtering.signalflow})${var.heartbeat_aggregation_function}.publish('signal') + not_reporting.detector(stream=signal, resource_identifier=None, duration='${var.heartbeat_timeframe}', auto_resolve_after='${local.heartbeat_auto_resolve_after}').publish('CRIT') +EOF + + rule { + description = "has not reported in ${var.heartbeat_timeframe}" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.heartbeat_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.heartbeat_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.heartbeat_runbook_url, var.runbook_url), "") + tip = var.heartbeat_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject_novalue : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.heartbeat_max_delay +} + +resource "signalfx_detector" "connection_state" { + name = format("%s %s", local.detector_name_prefix, "AWS Direct Connect connection state") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + viz_options { + label = "signal" + value_suffix = "state" + } + + program_text = <<-EOF + base_filtering = filter('namespace', 'AWS/DirectConnect') + signal = data('ConnectionState', filter=base_filtering and filter('stat', 'maximum') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') + detect(when(signal == ${var.connection_state_threshold_critical}%{if var.connection_state_lasting_duration_critical != null}, lasting='${var.connection_state_lasting_duration_critical}', at_least=${var.connection_state_at_least_percentage_critical}%{endif})).publish('CRIT') +EOF + + rule { + description = "Connection is down == ${var.connection_state_threshold_critical}state" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.connection_state_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.connection_state_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.connection_state_runbook_url, var.runbook_url), "") + tip = var.connection_state_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.connection_state_max_delay +} + +resource "signalfx_detector" "connection_traffic" { + name = format("%s %s", local.detector_name_prefix, "AWS Direct Connect connection traffic") + + authorized_writer_teams = var.authorized_writer_teams + teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) + tags = compact(concat(local.common_tags, local.tags, var.extra_tags)) + + viz_options { + label = "egress_bps" + value_suffix = "bytes" + } + + program_text = <<-EOF + base_filtering = filter('namespace', 'AWS/DirectConnect') + ingress_bps = data('ConnectionBpsIngress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_traffic_aggregation_function}${var.connection_traffic_transformation_function} + egress_bps = data('ConnectionBpsEgress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_traffic_aggregation_function}${var.connection_traffic_transformation_function}.publish('egress_bps') + detect(when(ingress_bps == ${var.connection_traffic_threshold_major}%{if var.connection_traffic_lasting_duration_major != null}, lasting='${var.connection_traffic_lasting_duration_major}', at_least=${var.connection_traffic_at_least_percentage_major}%{endif}) and when(egress_bps == 0)).publish('MAJOR') +EOF + + rule { + description = "No traffic detected == ${var.connection_traffic_threshold_major}bytes" + severity = "Major" + detect_label = "MAJOR" + disabled = coalesce(var.connection_traffic_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.connection_traffic_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.connection_traffic_runbook_url, var.runbook_url), "") + tip = var.connection_traffic_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + + max_delay = var.connection_traffic_max_delay +} + diff --git a/modules/integration_aws-direct-connect/outputs.tf b/modules/integration_aws-direct-connect/outputs.tf new file mode 100644 index 000000000..1853a3928 --- /dev/null +++ b/modules/integration_aws-direct-connect/outputs.tf @@ -0,0 +1,15 @@ +output "connection_state" { + description = "Detector resource for connection_state" + value = signalfx_detector.connection_state +} + +output "connection_traffic" { + description = "Detector resource for connection_traffic" + value = signalfx_detector.connection_traffic +} + +output "heartbeat" { + description = "Detector resource for heartbeat" + value = signalfx_detector.heartbeat +} + diff --git a/modules/integration_aws-direct-connect/tags.tf b/modules/integration_aws-direct-connect/tags.tf new file mode 100644 index 000000000..297d5ebee --- /dev/null +++ b/modules/integration_aws-direct-connect/tags.tf @@ -0,0 +1,4 @@ +locals { + tags = ["integration", "aws-direct-connect"] +} + diff --git a/modules/integration_aws-direct-connect/variables-gen.tf b/modules/integration_aws-direct-connect/variables-gen.tf new file mode 100644 index 000000000..95393b668 --- /dev/null +++ b/modules/integration_aws-direct-connect/variables-gen.tf @@ -0,0 +1,172 @@ +# heartbeat detector + +variable "heartbeat_notifications" { + description = "Notification recipients list per severity overridden for heartbeat detector" + type = map(list(string)) + default = {} +} + +variable "heartbeat_aggregation_function" { + description = "Aggregation function and group by for heartbeat detector (i.e. \".mean(by=['host'])\")" + type = string + default = "" +} + +variable "heartbeat_max_delay" { + description = "Enforce max delay for heartbeat detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "heartbeat_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "heartbeat_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "heartbeat_disabled" { + description = "Disable all alerting rules for heartbeat detector" + type = bool + default = null +} + +variable "heartbeat_exclude_not_running_vm" { + description = "Don’t send alerts if associated VM is stopped or stopping (metadata provided by cloud provider integration). Can be useful for ephemeral infrastructure (such as auto scaling groups) as VM will be stopped and started regularly. Note that timeframe must be at least 25 minutes for the metadata to be available to the detector." + type = bool + default = true +} + +variable "heartbeat_timeframe" { + description = "Timeframe for heartbeat detector (i.e. \"25m\"). Must be at least \"25m\" if \"heartbeat_exclude_not_running_vm\" is true" + type = string + default = "25m" +} + +# connection_state detector + +variable "connection_state_notifications" { + description = "Notification recipients list per severity overridden for connection_state detector" + type = map(list(string)) + default = {} +} + +variable "connection_state_aggregation_function" { + description = "Aggregation function and group by for connection_state detector (i.e. \".mean(by=['host'])\")" + type = string + default = "" +} + +variable "connection_state_transformation_function" { + description = "Transformation function for connection_state detector (i.e. \".mean(over='5m')\")" + type = string + default = "" +} + +variable "connection_state_max_delay" { + description = "Enforce max delay for connection_state detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "connection_state_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "connection_state_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "connection_state_disabled" { + description = "Disable all alerting rules for connection_state detector" + type = bool + default = null +} + +variable "connection_state_threshold_critical" { + description = "Critical threshold for connection_state detector in state" + type = number + default = 0 +} + +variable "connection_state_lasting_duration_critical" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = null +} + +variable "connection_state_at_least_percentage_critical" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} +# connection_traffic detector + +variable "connection_traffic_notifications" { + description = "Notification recipients list per severity overridden for connection_traffic detector" + type = map(list(string)) + default = {} +} + +variable "connection_traffic_aggregation_function" { + description = "Aggregation function and group by for connection_traffic detector (i.e. \".mean(by=['host'])\")" + type = string + default = "" +} + +variable "connection_traffic_transformation_function" { + description = "Transformation function for connection_traffic detector (i.e. \".mean(over='5m')\")" + type = string + default = "" +} + +variable "connection_traffic_max_delay" { + description = "Enforce max delay for connection_traffic detector (use \"0\" or \"null\" for \"Auto\")" + type = number + default = null +} + +variable "connection_traffic_tip" { + description = "Suggested first course of action or any note useful for incident handling" + type = string + default = "" +} + +variable "connection_traffic_runbook_url" { + description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" + type = string + default = "" +} + +variable "connection_traffic_disabled" { + description = "Disable all alerting rules for connection_traffic detector" + type = bool + default = null +} + +variable "connection_traffic_threshold_major" { + description = "Major threshold for connection_traffic detector in bytes" + type = number + default = 0 +} + +variable "connection_traffic_lasting_duration_major" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = null +} + +variable "connection_traffic_at_least_percentage_major" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} From f8f2f0e4522753b888bc1bad2f9593f53ab7f9cf Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Fri, 4 Oct 2024 14:59:18 +0200 Subject: [PATCH 2/9] fix: namespace --- .../conf/01-connection_state.yaml | 2 +- .../conf/02-connection_traffic.yaml | 2 +- modules/integration_aws-direct-connect/detectors-gen.tf | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml index 18e73a34d..e4cb56b3e 100644 --- a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml +++ b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml @@ -4,7 +4,7 @@ name: "Connection state" transformation: true aggregation: true -filtering: "filter('namespace', 'AWS/DirectConnect')" +filtering: "filter('namespace', 'AWS/DX')" value_unit: "state" signals: diff --git a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml index 6da4e5be2..32e167bdf 100644 --- a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml +++ b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml @@ -4,7 +4,7 @@ name: "Connection traffic" transformation: true aggregation: true -filtering: "filter('namespace', 'AWS/DirectConnect')" +filtering: "filter('namespace', 'AWS/DX')" value_unit: "bytes" signals: diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index cd016ec1b..890790594 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -39,7 +39,7 @@ resource "signalfx_detector" "connection_state" { } program_text = <<-EOF - base_filtering = filter('namespace', 'AWS/DirectConnect') + base_filtering = filter('namespace', 'AWS/DX') signal = data('ConnectionState', filter=base_filtering and filter('stat', 'maximum') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') detect(when(signal == ${var.connection_state_threshold_critical}%{if var.connection_state_lasting_duration_critical != null}, lasting='${var.connection_state_lasting_duration_critical}', at_least=${var.connection_state_at_least_percentage_critical}%{endif})).publish('CRIT') EOF @@ -72,7 +72,7 @@ resource "signalfx_detector" "connection_traffic" { } program_text = <<-EOF - base_filtering = filter('namespace', 'AWS/DirectConnect') + base_filtering = filter('namespace', 'AWS/DX') ingress_bps = data('ConnectionBpsIngress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_traffic_aggregation_function}${var.connection_traffic_transformation_function} egress_bps = data('ConnectionBpsEgress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_traffic_aggregation_function}${var.connection_traffic_transformation_function}.publish('egress_bps') detect(when(ingress_bps == ${var.connection_traffic_threshold_major}%{if var.connection_traffic_lasting_duration_major != null}, lasting='${var.connection_traffic_lasting_duration_major}', at_least=${var.connection_traffic_at_least_percentage_major}%{endif}) and when(egress_bps == 0)).publish('MAJOR') From a2973b654d49c91989a11da64c23a6b1e4b8421f Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Wed, 9 Oct 2024 14:50:36 +0200 Subject: [PATCH 3/9] fix: connection state --- .../conf/01-connection_state.yaml | 2 +- modules/integration_aws-direct-connect/detectors-gen.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml index e4cb56b3e..49180da4c 100644 --- a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml +++ b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml @@ -10,7 +10,7 @@ value_unit: "state" signals: signal: metric: ConnectionState - filter: "filter('stat', 'maximum')" + filter: "filter('stat', 'sum')" rules: critical: diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index 890790594..3c8c06900 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -40,7 +40,7 @@ resource "signalfx_detector" "connection_state" { program_text = <<-EOF base_filtering = filter('namespace', 'AWS/DX') - signal = data('ConnectionState', filter=base_filtering and filter('stat', 'maximum') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') + signal = data('ConnectionState', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') detect(when(signal == ${var.connection_state_threshold_critical}%{if var.connection_state_lasting_duration_critical != null}, lasting='${var.connection_state_lasting_duration_critical}', at_least=${var.connection_state_at_least_percentage_critical}%{endif})).publish('CRIT') EOF From c28f92c43fc5fe474ae8c34d55c81c7a290e6f9c Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Wed, 9 Oct 2024 17:51:55 +0200 Subject: [PATCH 4/9] fix: lower --- .../conf/01-connection_state.yaml | 2 +- modules/integration_aws-direct-connect/detectors-gen.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml index 49180da4c..6d5ae7b12 100644 --- a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml +++ b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml @@ -10,7 +10,7 @@ value_unit: "state" signals: signal: metric: ConnectionState - filter: "filter('stat', 'sum')" + filter: "filter('stat', 'lower')" rules: critical: diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index 3c8c06900..1c6c02385 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -40,7 +40,7 @@ resource "signalfx_detector" "connection_state" { program_text = <<-EOF base_filtering = filter('namespace', 'AWS/DX') - signal = data('ConnectionState', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') + signal = data('ConnectionState', filter=base_filtering and filter('stat', 'lower') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') detect(when(signal == ${var.connection_state_threshold_critical}%{if var.connection_state_lasting_duration_critical != null}, lasting='${var.connection_state_lasting_duration_critical}', at_least=${var.connection_state_at_least_percentage_critical}%{endif})).publish('CRIT') EOF From 5411f67e80d01e318d12fb60de544ab004b06d68 Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Thu, 10 Oct 2024 15:58:47 +0200 Subject: [PATCH 5/9] fix: detectors --- docs/severity.md | 2 +- .../integration_aws-direct-connect/README.md | 6 ++-- .../conf/02-connection_traffic.yaml | 11 +++--- .../detectors-gen.tf | 26 +++++++------- .../integration_aws-direct-connect/outputs.tf | 10 +++--- .../variables-gen.tf | 34 +++++++++---------- 6 files changed, 44 insertions(+), 45 deletions(-) diff --git a/docs/severity.md b/docs/severity.md index 10f0ae3ee..b554a3f92 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -241,7 +241,7 @@ |---|---|---|---|---|---| |Aws-direct-connect heartbeat|X|-|-|-|-| |AWS Direct Connect connection state|X|-|-|-|-| -|AWS Direct Connect connection traffic|-|X|-|-|-| +|AWS Direct Connect virtual interface traffic|X|-|-|-|-| ## integration_aws-ecs-cluster diff --git a/modules/integration_aws-direct-connect/README.md b/modules/integration_aws-direct-connect/README.md index afc076048..8a8135dfb 100644 --- a/modules/integration_aws-direct-connect/README.md +++ b/modules/integration_aws-direct-connect/README.md @@ -77,7 +77,7 @@ This module creates the following SignalFx detectors which could contain one or |---|---|---|---|---|---| |Aws-direct-connect heartbeat|X|-|-|-|-| |AWS Direct Connect connection state|X|-|-|-|-| -|AWS Direct Connect connection traffic|-|X|-|-|-| +|AWS Direct Connect virtual interface traffic|X|-|-|-|-| ## How to collect required metrics? @@ -95,9 +95,9 @@ Check the [Related documentation](#related-documentation) section for more detai Here is the list of required metrics for detectors in this module. -* `ConnectionBpsEgress` -* `ConnectionBpsIngress` * `ConnectionState` +* `VirtualInterfaceBpsEgress` +* `VirtualInterfaceBpsIngress` diff --git a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml index 32e167bdf..98af79d4e 100644 --- a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml +++ b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml @@ -1,5 +1,5 @@ module: AWS Direct Connect -name: "Connection traffic" +name: "Virtual Interface traffic" transformation: true aggregation: true @@ -9,17 +9,16 @@ value_unit: "bytes" signals: ingress_bps: - metric: ConnectionBpsIngress + metric: VirtualInterfaceBpsIngress filter: "filter('stat', 'sum')" - egress_bps: - metric: ConnectionBpsEgress + metric: VirtualInterfaceBpsEgress filter: "filter('stat', 'sum')" rules: - major: + critical: threshold: 0 comparator: "==" signal: ingress_bps append_condition: and when(egress_bps == 0) - description: "No traffic detected" + description: "No traffic detected on the virtual interface" diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index 1c6c02385..7d3045581 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -59,8 +59,8 @@ EOF max_delay = var.connection_state_max_delay } -resource "signalfx_detector" "connection_traffic" { - name = format("%s %s", local.detector_name_prefix, "AWS Direct Connect connection traffic") +resource "signalfx_detector" "virtual_interface_traffic" { + name = format("%s %s", local.detector_name_prefix, "AWS Direct Connect virtual interface traffic") authorized_writer_teams = var.authorized_writer_teams teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) @@ -73,23 +73,23 @@ resource "signalfx_detector" "connection_traffic" { program_text = <<-EOF base_filtering = filter('namespace', 'AWS/DX') - ingress_bps = data('ConnectionBpsIngress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_traffic_aggregation_function}${var.connection_traffic_transformation_function} - egress_bps = data('ConnectionBpsEgress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.connection_traffic_aggregation_function}${var.connection_traffic_transformation_function}.publish('egress_bps') - detect(when(ingress_bps == ${var.connection_traffic_threshold_major}%{if var.connection_traffic_lasting_duration_major != null}, lasting='${var.connection_traffic_lasting_duration_major}', at_least=${var.connection_traffic_at_least_percentage_major}%{endif}) and when(egress_bps == 0)).publish('MAJOR') + ingress_bps = data('VirtualInterfaceBpsIngress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function} + egress_bps = data('VirtualInterfaceBpsEgress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function}.publish('egress_bps') + detect(when(ingress_bps == ${var.virtual_interface_traffic_threshold_critical}%{if var.virtual_interface_traffic_lasting_duration_critical != null}, lasting='${var.virtual_interface_traffic_lasting_duration_critical}', at_least=${var.virtual_interface_traffic_at_least_percentage_critical}%{endif}) and when(egress_bps == 0)).publish('CRIT') EOF rule { - description = "No traffic detected == ${var.connection_traffic_threshold_major}bytes" - severity = "Major" - detect_label = "MAJOR" - disabled = coalesce(var.connection_traffic_disabled, var.detectors_disabled) - notifications = try(coalescelist(lookup(var.connection_traffic_notifications, "major", []), var.notifications.major), null) - runbook_url = try(coalesce(var.connection_traffic_runbook_url, var.runbook_url), "") - tip = var.connection_traffic_tip + description = "No traffic detected on the virtual interface == ${var.virtual_interface_traffic_threshold_critical}bytes" + severity = "Critical" + detect_label = "CRIT" + disabled = coalesce(var.virtual_interface_traffic_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.virtual_interface_traffic_notifications, "critical", []), var.notifications.critical), null) + runbook_url = try(coalesce(var.virtual_interface_traffic_runbook_url, var.runbook_url), "") + tip = var.virtual_interface_traffic_tip parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject parameterized_body = var.message_body == "" ? local.rule_body : var.message_body } - max_delay = var.connection_traffic_max_delay + max_delay = var.virtual_interface_traffic_max_delay } diff --git a/modules/integration_aws-direct-connect/outputs.tf b/modules/integration_aws-direct-connect/outputs.tf index 1853a3928..6a1ecc4a7 100644 --- a/modules/integration_aws-direct-connect/outputs.tf +++ b/modules/integration_aws-direct-connect/outputs.tf @@ -3,13 +3,13 @@ output "connection_state" { value = signalfx_detector.connection_state } -output "connection_traffic" { - description = "Detector resource for connection_traffic" - value = signalfx_detector.connection_traffic -} - output "heartbeat" { description = "Detector resource for heartbeat" value = signalfx_detector.heartbeat } +output "virtual_interface_traffic" { + description = "Detector resource for virtual_interface_traffic" + value = signalfx_detector.virtual_interface_traffic +} + diff --git a/modules/integration_aws-direct-connect/variables-gen.tf b/modules/integration_aws-direct-connect/variables-gen.tf index 95393b668..90d5d2ada 100644 --- a/modules/integration_aws-direct-connect/variables-gen.tf +++ b/modules/integration_aws-direct-connect/variables-gen.tf @@ -109,63 +109,63 @@ variable "connection_state_at_least_percentage_critical" { type = number default = 1 } -# connection_traffic detector +# virtual_interface_traffic detector -variable "connection_traffic_notifications" { - description = "Notification recipients list per severity overridden for connection_traffic detector" +variable "virtual_interface_traffic_notifications" { + description = "Notification recipients list per severity overridden for virtual_interface_traffic detector" type = map(list(string)) default = {} } -variable "connection_traffic_aggregation_function" { - description = "Aggregation function and group by for connection_traffic detector (i.e. \".mean(by=['host'])\")" +variable "virtual_interface_traffic_aggregation_function" { + description = "Aggregation function and group by for virtual_interface_traffic detector (i.e. \".mean(by=['host'])\")" type = string default = "" } -variable "connection_traffic_transformation_function" { - description = "Transformation function for connection_traffic detector (i.e. \".mean(over='5m')\")" +variable "virtual_interface_traffic_transformation_function" { + description = "Transformation function for virtual_interface_traffic detector (i.e. \".mean(over='5m')\")" type = string default = "" } -variable "connection_traffic_max_delay" { - description = "Enforce max delay for connection_traffic detector (use \"0\" or \"null\" for \"Auto\")" +variable "virtual_interface_traffic_max_delay" { + description = "Enforce max delay for virtual_interface_traffic detector (use \"0\" or \"null\" for \"Auto\")" type = number default = null } -variable "connection_traffic_tip" { +variable "virtual_interface_traffic_tip" { description = "Suggested first course of action or any note useful for incident handling" type = string default = "" } -variable "connection_traffic_runbook_url" { +variable "virtual_interface_traffic_runbook_url" { description = "URL like SignalFx dashboard or wiki page which can help to troubleshoot the incident cause" type = string default = "" } -variable "connection_traffic_disabled" { - description = "Disable all alerting rules for connection_traffic detector" +variable "virtual_interface_traffic_disabled" { + description = "Disable all alerting rules for virtual_interface_traffic detector" type = bool default = null } -variable "connection_traffic_threshold_major" { - description = "Major threshold for connection_traffic detector in bytes" +variable "virtual_interface_traffic_threshold_critical" { + description = "Critical threshold for virtual_interface_traffic detector in bytes" type = number default = 0 } -variable "connection_traffic_lasting_duration_major" { +variable "virtual_interface_traffic_lasting_duration_critical" { description = "Minimum duration that conditions must be true before raising alert" type = string default = null } -variable "connection_traffic_at_least_percentage_major" { +variable "virtual_interface_traffic_at_least_percentage_critical" { description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" type = number default = 1 From f2c1a829e57e2be7ee710c30c54e3048baf49912 Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Thu, 10 Oct 2024 17:40:04 +0200 Subject: [PATCH 6/9] fix: variables --- .../conf/01-connection_state.yaml | 2 +- .../conf/02-connection_traffic.yaml | 2 +- modules/integration_aws-direct-connect/detectors-gen.tf | 4 ++-- modules/integration_aws-direct-connect/variables.tf | 9 +++++++++ 4 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 modules/integration_aws-direct-connect/variables.tf diff --git a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml index 6d5ae7b12..eefb1126f 100644 --- a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml +++ b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml @@ -4,7 +4,7 @@ name: "Connection state" transformation: true aggregation: true -filtering: "filter('namespace', 'AWS/DX')" +filtering: "filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}')" value_unit: "state" signals: diff --git a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml index 98af79d4e..8bc186655 100644 --- a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml +++ b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml @@ -4,7 +4,7 @@ name: "Virtual Interface traffic" transformation: true aggregation: true -filtering: "filter('namespace', 'AWS/DX')" +filtering: "filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}') and filter('VirtualInterfaceId', '${var.virtual_interface_id}')" value_unit: "bytes" signals: diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index 7d3045581..f1402277d 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -39,7 +39,7 @@ resource "signalfx_detector" "connection_state" { } program_text = <<-EOF - base_filtering = filter('namespace', 'AWS/DX') + base_filtering = filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}') signal = data('ConnectionState', filter=base_filtering and filter('stat', 'lower') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') detect(when(signal == ${var.connection_state_threshold_critical}%{if var.connection_state_lasting_duration_critical != null}, lasting='${var.connection_state_lasting_duration_critical}', at_least=${var.connection_state_at_least_percentage_critical}%{endif})).publish('CRIT') EOF @@ -72,7 +72,7 @@ resource "signalfx_detector" "virtual_interface_traffic" { } program_text = <<-EOF - base_filtering = filter('namespace', 'AWS/DX') + base_filtering = filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}') and filter('VirtualInterfaceId', '${var.virtual_interface_id}') ingress_bps = data('VirtualInterfaceBpsIngress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function} egress_bps = data('VirtualInterfaceBpsEgress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function}.publish('egress_bps') detect(when(ingress_bps == ${var.virtual_interface_traffic_threshold_critical}%{if var.virtual_interface_traffic_lasting_duration_critical != null}, lasting='${var.virtual_interface_traffic_lasting_duration_critical}', at_least=${var.virtual_interface_traffic_at_least_percentage_critical}%{endif}) and when(egress_bps == 0)).publish('CRIT') diff --git a/modules/integration_aws-direct-connect/variables.tf b/modules/integration_aws-direct-connect/variables.tf new file mode 100644 index 000000000..ab1266493 --- /dev/null +++ b/modules/integration_aws-direct-connect/variables.tf @@ -0,0 +1,9 @@ +variable "connection_id" { + description = "The Direct Connect Connection ID" + type = string +} + +variable "virtual_interface_id" { + description = "The Direct Connect Virtual Interface ID" + type = string +} From 136b9fc4267da742afe7384f852e36f337174fd5 Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Thu, 10 Oct 2024 18:06:07 +0200 Subject: [PATCH 7/9] fix: reorg, remove vars --- .../conf/01-connection_state.yaml | 2 +- .../conf/02-connection_traffic.yaml | 2 +- modules/integration_aws-direct-connect/detectors-gen.tf | 4 ++-- modules/integration_aws-direct-connect/variables.tf | 9 --------- 4 files changed, 4 insertions(+), 13 deletions(-) delete mode 100644 modules/integration_aws-direct-connect/variables.tf diff --git a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml index eefb1126f..fe244fee3 100644 --- a/modules/integration_aws-direct-connect/conf/01-connection_state.yaml +++ b/modules/integration_aws-direct-connect/conf/01-connection_state.yaml @@ -4,7 +4,7 @@ name: "Connection state" transformation: true aggregation: true -filtering: "filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}')" +filtering: "filter('namespace', 'AWS/DX')" value_unit: "state" signals: diff --git a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml index 8bc186655..98af79d4e 100644 --- a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml +++ b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml @@ -4,7 +4,7 @@ name: "Virtual Interface traffic" transformation: true aggregation: true -filtering: "filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}') and filter('VirtualInterfaceId', '${var.virtual_interface_id}')" +filtering: "filter('namespace', 'AWS/DX')" value_unit: "bytes" signals: diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index f1402277d..7d3045581 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -39,7 +39,7 @@ resource "signalfx_detector" "connection_state" { } program_text = <<-EOF - base_filtering = filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}') + base_filtering = filter('namespace', 'AWS/DX') signal = data('ConnectionState', filter=base_filtering and filter('stat', 'lower') and ${module.filtering.signalflow})${var.connection_state_aggregation_function}${var.connection_state_transformation_function}.publish('signal') detect(when(signal == ${var.connection_state_threshold_critical}%{if var.connection_state_lasting_duration_critical != null}, lasting='${var.connection_state_lasting_duration_critical}', at_least=${var.connection_state_at_least_percentage_critical}%{endif})).publish('CRIT') EOF @@ -72,7 +72,7 @@ resource "signalfx_detector" "virtual_interface_traffic" { } program_text = <<-EOF - base_filtering = filter('namespace', 'AWS/DX') and filter('ConnectionId', '${var.connection_id}') and filter('VirtualInterfaceId', '${var.virtual_interface_id}') + base_filtering = filter('namespace', 'AWS/DX') ingress_bps = data('VirtualInterfaceBpsIngress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function} egress_bps = data('VirtualInterfaceBpsEgress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function}.publish('egress_bps') detect(when(ingress_bps == ${var.virtual_interface_traffic_threshold_critical}%{if var.virtual_interface_traffic_lasting_duration_critical != null}, lasting='${var.virtual_interface_traffic_lasting_duration_critical}', at_least=${var.virtual_interface_traffic_at_least_percentage_critical}%{endif}) and when(egress_bps == 0)).publish('CRIT') diff --git a/modules/integration_aws-direct-connect/variables.tf b/modules/integration_aws-direct-connect/variables.tf deleted file mode 100644 index ab1266493..000000000 --- a/modules/integration_aws-direct-connect/variables.tf +++ /dev/null @@ -1,9 +0,0 @@ -variable "connection_id" { - description = "The Direct Connect Connection ID" - type = string -} - -variable "virtual_interface_id" { - description = "The Direct Connect Virtual Interface ID" - type = string -} From 994f5d26a4d8808d003e45fa9fa8e1a25a66a9d9 Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Thu, 17 Oct 2024 17:35:20 +0200 Subject: [PATCH 8/9] fix detector after testing cutting the VIF --- modules/integration_aws-direct-connect/README.md | 1 - .../conf/02-connection_traffic.yaml | 6 +----- modules/integration_aws-direct-connect/detectors-gen.tf | 3 +-- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/modules/integration_aws-direct-connect/README.md b/modules/integration_aws-direct-connect/README.md index 8a8135dfb..6b76b4759 100644 --- a/modules/integration_aws-direct-connect/README.md +++ b/modules/integration_aws-direct-connect/README.md @@ -97,7 +97,6 @@ Here is the list of required metrics for detectors in this module. * `ConnectionState` * `VirtualInterfaceBpsEgress` -* `VirtualInterfaceBpsIngress` diff --git a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml index 98af79d4e..cc9c6dd07 100644 --- a/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml +++ b/modules/integration_aws-direct-connect/conf/02-connection_traffic.yaml @@ -8,9 +8,6 @@ filtering: "filter('namespace', 'AWS/DX')" value_unit: "bytes" signals: - ingress_bps: - metric: VirtualInterfaceBpsIngress - filter: "filter('stat', 'sum')" egress_bps: metric: VirtualInterfaceBpsEgress filter: "filter('stat', 'sum')" @@ -19,6 +16,5 @@ rules: critical: threshold: 0 comparator: "==" - signal: ingress_bps - append_condition: and when(egress_bps == 0) + signal: egress_bps description: "No traffic detected on the virtual interface" diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index 7d3045581..355601041 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -73,9 +73,8 @@ resource "signalfx_detector" "virtual_interface_traffic" { program_text = <<-EOF base_filtering = filter('namespace', 'AWS/DX') - ingress_bps = data('VirtualInterfaceBpsIngress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function} egress_bps = data('VirtualInterfaceBpsEgress', filter=base_filtering and filter('stat', 'sum') and ${module.filtering.signalflow})${var.virtual_interface_traffic_aggregation_function}${var.virtual_interface_traffic_transformation_function}.publish('egress_bps') - detect(when(ingress_bps == ${var.virtual_interface_traffic_threshold_critical}%{if var.virtual_interface_traffic_lasting_duration_critical != null}, lasting='${var.virtual_interface_traffic_lasting_duration_critical}', at_least=${var.virtual_interface_traffic_at_least_percentage_critical}%{endif}) and when(egress_bps == 0)).publish('CRIT') + detect(when(egress_bps == ${var.virtual_interface_traffic_threshold_critical}%{if var.virtual_interface_traffic_lasting_duration_critical != null}, lasting='${var.virtual_interface_traffic_lasting_duration_critical}', at_least=${var.virtual_interface_traffic_at_least_percentage_critical}%{endif})).publish('CRIT') EOF rule { From 0130ef3dac67120dd0fb173dff5063f8345227d0 Mon Sep 17 00:00:00 2001 From: Julien LE SAUX Date: Tue, 5 Nov 2024 17:50:44 +0100 Subject: [PATCH 9/9] fix: heatbeat detector --- docs/severity.md | 2 +- modules/integration_aws-direct-connect/README.md | 2 +- .../conf/00-heartbeat.yaml | 7 +------ .../detectors-gen.tf | 4 ++-- .../variables-gen.tf | 14 +++++++------- 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/docs/severity.md b/docs/severity.md index b554a3f92..9f7852be8 100644 --- a/docs/severity.md +++ b/docs/severity.md @@ -239,7 +239,7 @@ |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| -|Aws-direct-connect heartbeat|X|-|-|-|-| +|AWS Direct Connect heartbeat|X|-|-|-|-| |AWS Direct Connect connection state|X|-|-|-|-| |AWS Direct Connect virtual interface traffic|X|-|-|-|-| diff --git a/modules/integration_aws-direct-connect/README.md b/modules/integration_aws-direct-connect/README.md index 6b76b4759..e8bc4e0c4 100644 --- a/modules/integration_aws-direct-connect/README.md +++ b/modules/integration_aws-direct-connect/README.md @@ -75,7 +75,7 @@ This module creates the following SignalFx detectors which could contain one or |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| -|Aws-direct-connect heartbeat|X|-|-|-|-| +|AWS Direct Connect heartbeat|X|-|-|-|-| |AWS Direct Connect connection state|X|-|-|-|-| |AWS Direct Connect virtual interface traffic|X|-|-|-|-| diff --git a/modules/integration_aws-direct-connect/conf/00-heartbeat.yaml b/modules/integration_aws-direct-connect/conf/00-heartbeat.yaml index b58d6eb05..1b7a7a05c 100644 --- a/modules/integration_aws-direct-connect/conf/00-heartbeat.yaml +++ b/modules/integration_aws-direct-connect/conf/00-heartbeat.yaml @@ -1,11 +1,6 @@ -## Example -module: aws-direct-connect +module: AWS Direct Connect name: heartbeat -transformation: false -aggregation: true -exclude_not_running_vm: true - signals: signal: metric: "ConnectionState" diff --git a/modules/integration_aws-direct-connect/detectors-gen.tf b/modules/integration_aws-direct-connect/detectors-gen.tf index 355601041..579f2e0c0 100644 --- a/modules/integration_aws-direct-connect/detectors-gen.tf +++ b/modules/integration_aws-direct-connect/detectors-gen.tf @@ -1,5 +1,5 @@ resource "signalfx_detector" "heartbeat" { - name = format("%s %s", local.detector_name_prefix, "Aws-direct-connect heartbeat") + name = format("%s %s", local.detector_name_prefix, "AWS Direct Connect heartbeat") authorized_writer_teams = var.authorized_writer_teams teams = try(coalescelist(var.teams, var.authorized_writer_teams), null) @@ -7,7 +7,7 @@ resource "signalfx_detector" "heartbeat" { program_text = <<-EOF from signalfx.detectors.not_reporting import not_reporting - signal = data('ConnectionState', filter=%{if var.heartbeat_exclude_not_running_vm}${local.not_running_vm_filters} and %{endif}${module.filtering.signalflow})${var.heartbeat_aggregation_function}.publish('signal') + signal = data('ConnectionState', filter=${module.filtering.signalflow})${var.heartbeat_aggregation_function}${var.heartbeat_transformation_function}.publish('signal') not_reporting.detector(stream=signal, resource_identifier=None, duration='${var.heartbeat_timeframe}', auto_resolve_after='${local.heartbeat_auto_resolve_after}').publish('CRIT') EOF diff --git a/modules/integration_aws-direct-connect/variables-gen.tf b/modules/integration_aws-direct-connect/variables-gen.tf index 90d5d2ada..a93c956dc 100644 --- a/modules/integration_aws-direct-connect/variables-gen.tf +++ b/modules/integration_aws-direct-connect/variables-gen.tf @@ -12,6 +12,12 @@ variable "heartbeat_aggregation_function" { default = "" } +variable "heartbeat_transformation_function" { + description = "Transformation function for heartbeat detector (i.e. \".mean(over='5m')\")" + type = string + default = "" +} + variable "heartbeat_max_delay" { description = "Enforce max delay for heartbeat detector (use \"0\" or \"null\" for \"Auto\")" type = number @@ -36,14 +42,8 @@ variable "heartbeat_disabled" { default = null } -variable "heartbeat_exclude_not_running_vm" { - description = "Don’t send alerts if associated VM is stopped or stopping (metadata provided by cloud provider integration). Can be useful for ephemeral infrastructure (such as auto scaling groups) as VM will be stopped and started regularly. Note that timeframe must be at least 25 minutes for the metadata to be available to the detector." - type = bool - default = true -} - variable "heartbeat_timeframe" { - description = "Timeframe for heartbeat detector (i.e. \"25m\"). Must be at least \"25m\" if \"heartbeat_exclude_not_running_vm\" is true" + description = "Timeframe for heartbeat detector (i.e. \"25m\")." type = string default = "25m" }