Skip to content

dasmeta/terraform-onpremise-grafana

Repository files navigation

terraform-onpremise-grafana

https://registry.terraform.io/modules/dasmeta/grafana/onpremise/latest

This module is created to manage OnPremise Grafana stack with Terraform. At this moment we support managing

  • Grafana Dashboard with dashboard submodule
  • Grafana Alerts with alerts submodule
  • Grafana Contact Points with contact-points submodule
  • Grafana Notification Policies with notifications submodule

More parts are coming soon.

example for dashboard

module "grafana_monitoring" {
  source  = "dasmeta/grafana/onpremise"
  version = "1.7.0"

  name = "Test-dashboard"

  application_dashboard = {
    rows : [
      { type : "block/sla" },
      { type : "block/ingress" },
      { type : "block/service", name : "service-name-1", host : "example.com" },
      { type : "block/service", name : "service-name-2" },
      { type : "block/service", name : "service-name-3" }
    ]
    data_source = {
      uid : "00000"
    }
    variables = [
      {
        "name" : "namespace",
        "options" : [
          {
            "selected" : true,
            "value" : "prod"
          },
          {
            "value" : "stage"
          },
          {
            "value" : "dev"
          }
        ],
      }
    ]
  }
}

Example for Alerts

module "grafana_alerts" {
  source  = "dasmeta/grafana/onpremise//modules/alerts"
  version = "1.7.0"

  alerts = {
    rules = [
      {
        name        = "App_1 has 0 available replicas"
        folder_name = "Replica Count"
        datasource  = "prometheus"
        metric_name = "kube_deployment_status_replicas_available"
        filters = {
          deployment = "app-1-microservice"
        }
        function  = "last"
        equation = "lt"
        threshold = 1
      },
      {
        name        = "Nginx Expressions"
        folder_name = "Nginx Expressions Group"
        datasource  = "prometheus"
        expr        = "sum(rate(nginx_ingress_controller_requests{status=~'5..'}[1m])) by (ingress,cluster) / sum(rate(nginx_ingress_controller_requests[1m]))by (ingress) * 100 > 5"
        function    = "mean"
        equation    = "gt"
        threshold   = 2
      },
    ]
    contact_points = {
      opsgenie = [
        {
          name       = "opsgenie"
          api_key    = "xxxxxxxxxxxxxxxx"
          auto_close = true
        }
      ]
      slack = [
        {
          name        = "slack"
          webhook_url = "https://hooks.slack.com/services/xxxxxxxxxxxxxxxx"
        }
      ]
    }
    notifications = {
      contact_point : "slack"
      "policies" : [
        {
          contact_point : "opsgenie"
          matchers : [{ label : "priority", match : "=", value : "P1" }]
        },
        {
          "contact_point" : "slack"
        }
      ]
    }
  }
}

Usage

Check ./tests, modules/alert-rules/tests, modules/alert-contact-points/tests and modules/alert-notifications/tests folders to see more examples.

Requirements

Name Version
terraform >= 1.3.0
grafana >= 3.7.0

Providers

No providers.

Modules

Name Source Version
alerts ./modules/alerts n/a
application_dashboard ./modules/dashboard/ n/a

Resources

No resources.

Inputs

Name Description Type Default Required
alerts n/a
object({
alert_interval_seconds = optional(number, 10) # The interval, in seconds, at which all rules in the group are evaluated. If a group contains many rules, the rules are evaluated sequentially
disable_provenance = optional(bool, true) # Allow modifying resources from other sources than Terraform or the Grafana API
rules = optional( # Describes alert folders, groups and rules
list(object({
name = string # The name of the alert rule
no_data_state = optional(string, "NoData") # Describes what state to enter when the rule's query returns No Data
exec_err_state = optional(string, "Error") # Describes what state to enter when the rule's query is invalid and the rule cannot be executed
summary = optional(string, null) # Rule annotation as a summary, if not passed automatically generated based on data
labels = optional(map(any), { "priority" : "P1" }) # Labels help to define matchers in notification policy to control where to send each alert
folder_name = optional(string, "Main Alerts") # Grafana folder name in which the rule will be created, the folder name used also as alert group name with suffix " Group"
datasource = string # Name of the datasource used for the alert
expr = optional(string, null) # Full expression for the alert
metric_name = optional(string, "") # Prometheus metric name which queries the data for the alert
metric_function = optional(string, "") # Prometheus function used with metric for queries, like rate, sum etc.
metric_interval = optional(string, "") # The time interval with using functions like rate
settings_mode = optional(string, "replaceNN") # The mode used in B block, possible values are Strict, replaceNN, dropNN
settings_replaceWith = optional(number, 0) # The value by which NaN results of the query will be replaced
filters = optional(any, null) # Filters object to identify each service for alerting
function = optional(string, "mean") # One of Reduce functions which will be used in B block for alerting
equation = string # The equation in the math expression which compares B blocks value with a number and generates an alert if needed. Possible values: gt, lt, gte, lte, e
threshold = number # The value against which B blocks are compared in the math expression
})), [])
contact_points = optional(object({
slack = optional(list(object({ # Slack contact points list
name = string # The name of the contact point
endpoint_url = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to
icon_emoji = optional(string, "") # The name of a Slack workspace emoji to use as the bot icon
icon_url = optional(string, "") # A URL of an image to use as the bot icon
recipient = optional(string, null) # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to
text = optional(string, "") # Templated content of the message
title = optional(string, "") # Templated title of the message
token = optional(string, "") # A Slack API token,for sending messages directly without the webhook method
webhook_url = optional(string, "") # A Slack webhook URL,for sending messages via the webhook method
username = optional(string, "") # Username for the bot to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
})), [])
opsgenie = optional(list(object({ # OpsGenie contact points list
name = string # The name of the contact point
api_key = string # The OpsGenie API key to use
auto_close = optional(bool, false) # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager
message = optional(string, "") # The templated content of the message
api_url = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
})), [])
teams = optional(list(object({ # Teams contact points list
name = string # The name of the contact point
url = string # The MS Teams Webhook URL to use
message = optional(string, "") # The templated content of the message
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages
section_title = optional(string, "") # The templated subtitle for each message section.
title = optional(string, "") # The templated title of the message
})), [])
webhook = optional(list(object({ # Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format
name = string # The name of the contact point
url = string # The URL to send webhook requests to
authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters
authorization_scheme = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters
basic_auth_password = optional(string, null) # The password component of the basic auth credentials to use
basic_auth_user = optional(string, null) # The username component of the basic auth credentials to use
disable_resolve_message = optional(bool, false) # Whether to disable sending resolve messages. Defaults to
settings = optional(any, null) # Additional custom properties to attach to the notifier
})), [])
}), null)
notifications = optional(object({
contact_point = optional(string, "Slack") # The default contact point to route all unmatched notifications to
group_by = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by
group_interval = optional(string, "5m") # Minimum time interval between two notifications for the same group
repeat_interval = optional(string, "4h") # Minimum time interval for re-sending a notification if an alert is still firing

mute_timing = optional(object({ # Mute timing config, which will be applied on all policies
name = optional(string, "Default mute timing") # the name of mute timing
intervals = optional(list(object({ # the mute timing interval configs
weekdays = optional(string, null)
days_of_month = optional(string, null)
months = optional(string, null)
years = optional(string, null)
location = optional(string, null)
times = optional(object({
start = optional(string, "00:00")
end = optional(string, "24:59")
}), null)
})), [])
}), null)

policies = optional(list(object({
contact_point = optional(string, null) # The contact point to route notifications that match this rule to
continue = optional(bool, true) # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it
group_by = optional(list(string), ["..."])

matchers = optional(list(object({
label = optional(string, "priority") # The name of the label to match against
match = optional(string, "=") # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality
value = optional(string, "P1") # The label value to match against
})), [])
policies = optional(list(object({ # sub-policies(there is also possibility to implement also ability for sub.sub.sub-policies, but for not seems existing configs are enough)
contact_point = optional(string, null)
continue = optional(bool, true)
group_by = optional(list(string), ["..."])
mute_timings = optional(list(string), [])

matchers = optional(list(object({
label = optional(string, "priority")
match = optional(string, "=")
value = optional(string, "P1")
})), [])
})), [])
})), [])
}), null)
})
{} no
application_dashboard Dashboard for monitoring applications
object({
rows = optional(any, [])
data_source = object({ # global/default datasource, TODO: create datasource inside the module
uid = string
type = optional(string, "prometheus")
})
variables = optional(list(object({ # Allows to define variables to be used in dashboard
name = string
type = optional(string, "custom")
hide = optional(number, 0)
includeAll = optional(bool, false)
multi = optional(bool, false)
query = optional(string, "")
queryValue = optional(string, "")
skipUrlSync = optional(bool, false)
options = optional(list(object({
selected = optional(bool, false)
value = string
text = optional(string, null)
})), [])
})), [])
})
{
"data_source": null,
"rows": [],
"variables": []
}
no
name Dashboard name string n/a yes

Outputs

Name Description
data n/a