Skip to content

Commit

Permalink
#11 Add backup metrics and alaram
Browse files Browse the repository at this point in the history
  • Loading branch information
kaklakariada committed Feb 21, 2022
1 parent adb8c4f commit 6cd3532
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 9 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ target
.idea
.vscode
*.iml

.settings/org.eclipse.m2e.core.prefs
.settings/org.eclipse.core.resources.prefs
# CDK asset staging directory
.cdk.staging
cdk.out
Expand All @@ -26,4 +27,4 @@ pom.xml.versionsBackup
*.bak
*.orig
*.old
*.md.html
*.md.html
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,25 @@ In this repository we define the dashboard by code using [AWS CDK](https://aws.a
You can use this as a starting point for your own dashboards. In contrast to changing the dashboard using the AWS Console that allows you for example to version your dashboards in Git, add a review process or simply copy the same dashboard for multiple Exasol deployments. For that you need to go through the following steps:

* Checkout (or fork) this repository
* Adapt the dashboard to your needs (modify `src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java`)
* Adapt the dashboard to your needs (modify [`src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java`](src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java))
* [Install the AWS CDK](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html#getting_started_install)
* Deploy your dashboard using
```shell
cdk deploy --parameters deploymentName=<YOUR DEPLOYMENT NAME>
```
(Don't forget to replace `<YOUR DEPLOYMENT NAME>` with the value you configured during the CloudWatch-adapter setup)
## Injecting Metrics Values for Testing
To test your dashboard and alarms you can manually inject metrics values by executing commands like these:
```sh
aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=<YOUR DEPLOYMENT NAME>" --unit Seconds --value 30 --metric-name "BACKUP_DURATION"
aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=<YOUR DEPLOYMENT NAME>" --unit Count --value 1 --metric-name "BACKUP_START"
aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=<YOUR DEPLOYMENT NAME>" --unit Count --value 1 --metric-name "BACKUP_END"
aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=<YOUR DEPLOYMENT NAME>" --unit Count --value 1 --metric-name "BACKUP_BACKUP_ABORTED"
```
## Additional Information
* [Changelog](doc/changes/changelog.md)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
* This stack creates an CloudWatch dashboard with graphs for the metrics reported by the exasol cloudwatch-adapter.
*/
public class CloudwatchDashboardExamplesStack extends Stack {
private static final String NAMESPACE = "Exasol";
private final CfnParameter deploymentName;
private final Map<String, String> dimensions;

/**
* Create a new instance of {@link CloudwatchDashboardExamplesStack}.
*
*
* @param scope parent scope
* @param id stack id
*/
Expand All @@ -34,14 +35,19 @@ public CloudwatchDashboardExamplesStack(final Construct scope, final String id,
super(scope, id, props);
this.deploymentName = CfnParameter.Builder.create(this, "deploymentName").type("String")
.description("Deployment name matching the one configured in the cloud watch adapter.").build();
this.dimensions = Map.of("Cluster Name", "MASTER", "Deployment", this.deploymentName.getValueAsString());
this.dimensions = Map.of("Cluster Name", "MAIN", "Deployment", this.deploymentName.getValueAsString());

final Dashboard dashboard = Dashboard.Builder.create(this, "Exasol Dashboard").build();

final Metric queriesMetric = getExasolMetricBuilder().metricName("QUERIES")
.label("Parallel running queries (5 min MAX)").statistic("Maximum").build();
final Metric usersMetric = getExasolMetricBuilder().metricName("USERS").label("Users (5 min MAX)")
.statistic("Maximum").build();

final Metric eventBackupStart = eventMetric("BACKUP_START", "Backup started");
final Metric eventBackupEnd = eventMetric("BACKUP_END", "Backup finished successfully");
final Metric eventBackupAborted = eventMetric("BACKUP_ABORTED", "Backup failed or aborted");

dashboard.addWidgets(//
cpuWidget(), //
tempDbRamWidget(), //
Expand All @@ -50,9 +56,12 @@ public CloudwatchDashboardExamplesStack(final Construct scope, final String id,
usageWidget(queriesMetric, usersMetric), //
recommendedDbRamSizeWidget(), //
currentDbSizeWidget(), //
currentQueriesAndUsersWidget(queriesMetric, usersMetric));
currentQueriesAndUsersWidget(queriesMetric, usersMetric), //
backupEvents(List.of(eventBackupStart, eventBackupEnd, eventBackupAborted)));

addTempdbRamAlarm();
addBackupDidNotSucceedAlarms(eventBackupEnd);
addBackupFailedAlarms(eventBackupAborted);
}

private void addTempdbRamAlarm() {
Expand Down Expand Up @@ -108,7 +117,7 @@ private GraphWidget recommendedDbRamSizeWidget() {

private SingleValueWidget currentQueriesAndUsersWidget(final Metric queriesMetric, final Metric usersMetric) {
return SingleValueWidget.Builder.create().title("Current Queries and Users").setPeriodToTimeRange(false)
.metrics(List.of(queriesMetric, usersMetric)).width(6).build();
.metrics(List.of(queriesMetric, usersMetric)).width(6).height(6).build();
}

private SingleValueWidget currentDbSizeWidget() {
Expand All @@ -131,10 +140,36 @@ private SingleValueWidget currentDbSizeWidget() {
getExasolMetricBuilder().metricName("OBJECT_COUNT").label("Object count").period(Duration.hours(1))
.build(), //
getExasolMetricBuilder().metricName("NODES").label("Node count").period(Duration.days(1)).build() //
)).setPeriodToTimeRange(false).width(12).height(6).build();
)).setPeriodToTimeRange(false).width(12).height(8).build();
}

private GraphWidget backupEvents(final List<? extends IMetric> events) {
return GraphWidget.Builder.create().title("Backup Events").left(events)
.right(List.of(getExasolMetricBuilder().metricName("BACKUP_DURATION").label("Backup duration")
.statistic("Avg").period(Duration.minutes(1)).build()))
.build();
}

private Metric eventMetric(final String metricName, final String label) {
return getExasolMetricBuilder().metricName(metricName).label(label).statistic("SampleCount")
.period(Duration.minutes(1)).build();
}

private void addBackupDidNotSucceedAlarms(final Metric eventBackupEnd) {
Alarm.Builder.create(this, "backup_not_suceeded").metric(eventBackupEnd).threshold(1)
.comparisonOperator(ComparisonOperator.LESS_THAN_THRESHOLD).evaluationPeriods(1).datapointsToAlarm(1)
.alarmName("Backup did not succeed").alarmDescription("Backup did not succeed for more than one minute")
.treatMissingData(TreatMissingData.BREACHING).actionsEnabled(false).build();
}

private void addBackupFailedAlarms(final Metric eventBackupAborted) {
Alarm.Builder.create(this, "backup_failed").metric(eventBackupAborted).threshold(0)
.comparisonOperator(ComparisonOperator.GREATER_THAN_THRESHOLD).evaluationPeriods(1).datapointsToAlarm(1)
.alarmName("Backup failed").alarmDescription("Backup failed within one minute")
.treatMissingData(TreatMissingData.NOT_BREACHING).actionsEnabled(false).build();
}

private Metric.Builder getExasolMetricBuilder() {
return Metric.Builder.create().namespace("Exasol").dimensions(this.dimensions);
return Metric.Builder.create().namespace(NAMESPACE).dimensionsMap(this.dimensions);
}
}

0 comments on commit 6cd3532

Please sign in to comment.