-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathalarms.tf
225 lines (209 loc) · 11.5 KB
/
alarms.tf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
locals {
dimensions = {
DBClusterIdentifier = var.cluster_identifier
}
}
resource "aws_cloudwatch_metric_alarm" "cpu_utilization" {
for_each = var.cpu_utilization_checks
alarm_name = "${var.alarm_prefix}: ${each.key} CPU usage is high for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.cpu_utilization_periods
threshold = each.value
metric_name = "CPUUtilization"
namespace = "AWS/RDS"
period = var.cpu_utilization_period
statistic = "Average"
alarm_description = "Priority: ${each.key} Alarm is above of threshold: ${each.value}. The percentage of CPU used by an Aurora DB instance. More: https://amzn.to/31wBNqC"
treat_missing_data = var.cpu_utilization_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "freeable_memory" {
for_each = var.freeable_memory_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Approximate memory usage is high for ${var.cluster_identifier}"
comparison_operator = "LessThanOrEqualToThreshold"
evaluation_periods = var.freeable_memory_periods
threshold = each.value * 1024 * 1024
metric_name = "FreeableMemory"
namespace = "AWS/RDS"
period = var.freeable_memory_period
statistic = "Average"
alarm_description = "Priority: ${each.key} Alarm is bellow of threshold: ${each.value}MB. The amount of available random access memory, in bytes. More: https://amzn.to/31wBNqC"
treat_missing_data = var.freeable_memory_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "free_storage_space" {
for_each = var.free_storage_space_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Approximate free storage space is low for ${var.cluster_identifier}"
comparison_operator = "LessThanOrEqualToThreshold"
evaluation_periods = var.free_storage_space_periods
threshold = each.value * 1024 * 1024
metric_name = "FreeLocalStorage"
namespace = "AWS/RDS"
period = var.free_storage_space_period
statistic = "Average"
alarm_description = "Priority: ${each.key} Alarm is bellow of threshold: ${each.value}MB. The amount of available storage space. More: https://amzn.to/31wBNqC"
treat_missing_data = var.free_storage_space_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "aurora_replica_lag" {
for_each = var.aurora_replica_lag_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Approximate aurora replication lag is high for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.aurora_replica_lag_periods
threshold = each.value
metric_name = "AuroraReplicaLag"
namespace = "AWS/RDS"
period = var.aurora_replica_lag_period
statistic = "Average"
alarm_description = "Priority: ${each.key} Alarm is above of threshold: ${each.value}ms. For an Aurora Replica, the amount of lag when replicating updates from the primary instance, in milliseconds. More: https://amzn.to/31wBNqC"
treat_missing_data = var.aurora_replica_lag_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "database_connections" {
for_each = var.database_connections_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Approximate database connections is high for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.database_connections_periods
threshold = each.value
metric_name = "DatabaseConnections"
namespace = "AWS/RDS"
period = var.database_connections_period
statistic = "Average"
alarm_description = "Priority: ${each.key} Alarm is above of threshold: ${each.value} connections. More: https://amzn.to/31wBNqC"
treat_missing_data = var.database_connections_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "swap_usage" {
for_each = var.swap_usage_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Approximate swap usage is high for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.swap_usage_periods
threshold = each.value * 1024 * 1024
metric_name = "SwapUsage"
namespace = "AWS/RDS"
period = var.swap_usage_period
statistic = "Average"
alarm_description = "Priority: ${each.key} Alarm is above of threshold: ${each.value}mb. The amount of swap space used on the Aurora DB instance. More: https://amzn.to/31wBNqC"
treat_missing_data = var.swap_usage_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "deadlocks" {
for_each = var.deadlocks_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Approximate count of deadlocks are above ${each.value} for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.deadlocks_periods
threshold = each.value
metric_name = "Deadlocks"
namespace = "AWS/RDS"
period = var.deadlocks_period
statistic = "Sum"
alarm_description = "Priority: ${each.key} Alarm is above of threshold: ${each.value} deadlocks. The amount of deadlocks are high. It can block some critical queries on database. More: https://amzn.to/31wBNqC"
treat_missing_data = var.deadlocks_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "volume_read_iops" {
for_each = var.volume_read_iops_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Volume read IOPS is too high for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.volume_read_iops_periods
threshold = each.value * 1000
metric_name = "VolumeReadIOPs"
namespace = "AWS/RDS"
period = var.volume_read_iops_period
statistic = var.volume_read_iops_statistic
alarm_description = "Priority: ${each.key} The number of billed read I/O operations from a cluster volume, reported at 5-minute intervals. High value = high price for database. More: https://amzn.to/31wBNqC"
treat_missing_data = var.volume_read_iops_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "volume_write_iops" {
for_each = var.volume_write_iops_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Volume write IOPS is too high for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.volume_write_iops_periods
threshold = each.value * 1000
metric_name = "VolumeWriteIOPs"
namespace = "AWS/RDS"
period = var.volume_write_iops_period
statistic = var.volume_write_iops_statistic
alarm_description = "Priority: ${each.key} The number of write disk I/O operations to the cluster volume, reported at 5-minute intervals. See the description of VolumeReadIOPS above for a detailed description of how billed write operations are calculated. High value = high price for database. More: https://amzn.to/31wBNqC"
treat_missing_data = var.volume_write_iops_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "transaction_logs_disk_usage" {
for_each = var.transaction_logs_disk_usage_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Transaction logs disk usage is too high for ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.transaction_logs_disk_usage_periods
threshold = each.value * 1024 * 1024 * 1024
metric_name = "TransactionLogsDiskUsage"
namespace = "AWS/RDS"
period = var.transaction_logs_disk_usage_period
statistic = var.transaction_logs_disk_usage_statistic
alarm_description = "Priority: ${each.key} The disk space used by transaction logs. Applies to PostgreSQL. More: https://amzn.to/31wBNqC"
treat_missing_data = var.transaction_logs_disk_usage_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "buffer_cache_hit_ratio" {
for_each = var.buffer_cache_hit_ratio_checks
alarm_name = "${var.alarm_prefix}: ${each.key} Buffer cache hit ratio is too low for ${var.cluster_identifier}"
comparison_operator = "LessThanOrEqualToThreshold"
evaluation_periods = var.buffer_cache_hit_ratio_periods
threshold = each.value
metric_name = "BufferCacheHitRatio"
namespace = "AWS/RDS"
period = var.buffer_cache_hit_ratio_period
statistic = var.buffer_cache_hit_ratio_statistic
alarm_description = "Priority: ${each.key} The percentage of requests that are served by the buffer cache. Should be close to 100%. More: https://amzn.to/31wBNqC"
treat_missing_data = var.buffer_cache_hit_ratio_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}
resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids" {
for_each = var.maximum_used_transaction_ids_checks
alarm_name = "${var.alarm_prefix}: ${each.key} MaximumUsedTransactionIDs is too high ${var.cluster_identifier}"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = var.maximum_used_transaction_ids_periods
threshold = each.value
metric_name = "MaximumUsedTransactionIDs"
namespace = "AWS/RDS"
period = var.maximum_used_transaction_ids_period
statistic = "Maximum"
alarm_description = "Priority: ${each.key} A PostgreSQL database can have two billion in-flight unvacuumed transactions before PostgreSQL takes dramatic action to avoid data loss. More: https://amzn.to/2MhOTEv"
treat_missing_data = var.maximum_used_transaction_ids_missing_data
alarm_actions = var.actions
ok_actions = var.ok_actions
tags = var.tags
dimensions = local.dimensions
}