diff --git a/.gitignore b/.gitignore index 6c416ca..559b49b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,8 @@ target .idea .vscode *.iml - +.settings/org.eclipse.m2e.core.prefs +.settings/org.eclipse.core.resources.prefs # CDK asset staging directory .cdk.staging cdk.out @@ -26,4 +27,4 @@ pom.xml.versionsBackup *.bak *.orig *.old -*.md.html \ No newline at end of file +*.md.html diff --git a/README.md b/README.md index 2d31347..f412494 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ In this repository we define the dashboard by code using [AWS CDK](https://aws.a You can use this as a starting point for your own dashboards. In contrast to changing the dashboard using the AWS Console that allows you for example to version your dashboards in Git, add a review process or simply copy the same dashboard for multiple Exasol deployments. For that you need to go through the following steps: * Checkout (or fork) this repository -* Adapt the dashboard to your needs (modify `src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java`) +* Adapt the dashboard to your needs (modify [`src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java`](src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java)) * [Install the AWS CDK](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html#getting_started_install) * Deploy your dashboard using ```shell @@ -43,6 +43,17 @@ You can use this as a starting point for your own dashboards. In contrast to cha ``` (Don't forget to replace `` with the value you configured during the CloudWatch-adapter setup) +## Injecting Metrics Values for Testing + +To test your dashboard and alarms you can manually inject metrics values by executing commands like these: + +```sh +aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=" --unit Seconds --value 30 --metric-name "BACKUP_DURATION" +aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=" --unit Count --value 1 --metric-name "BACKUP_START" +aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=" --unit Count --value 1 --metric-name "BACKUP_END" +aws cloudwatch put-metric-data --namespace "Exasol" --dimensions "Cluster Name=MAIN,Deployment=" --unit Count --value 1 --metric-name "BACKUP_BACKUP_ABORTED" +``` + ## Additional Information * [Changelog](doc/changes/changelog.md) diff --git a/src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java b/src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java index 36ebe10..6ef54e3 100644 --- a/src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java +++ b/src/main/java/com/exasol/cloudwatchexampledashboard/CloudwatchDashboardExamplesStack.java @@ -10,12 +10,13 @@ * This stack creates an CloudWatch dashboard with graphs for the metrics reported by the exasol cloudwatch-adapter. */ public class CloudwatchDashboardExamplesStack extends Stack { + private static final String NAMESPACE = "Exasol"; private final CfnParameter deploymentName; private final Map dimensions; /** * Create a new instance of {@link CloudwatchDashboardExamplesStack}. - * + * * @param scope parent scope * @param id stack id */ @@ -34,7 +35,7 @@ public CloudwatchDashboardExamplesStack(final Construct scope, final String id, super(scope, id, props); this.deploymentName = CfnParameter.Builder.create(this, "deploymentName").type("String") .description("Deployment name matching the one configured in the cloud watch adapter.").build(); - this.dimensions = Map.of("Cluster Name", "MASTER", "Deployment", this.deploymentName.getValueAsString()); + this.dimensions = Map.of("Cluster Name", "MAIN", "Deployment", this.deploymentName.getValueAsString()); final Dashboard dashboard = Dashboard.Builder.create(this, "Exasol Dashboard").build(); @@ -42,6 +43,11 @@ public CloudwatchDashboardExamplesStack(final Construct scope, final String id, .label("Parallel running queries (5 min MAX)").statistic("Maximum").build(); final Metric usersMetric = getExasolMetricBuilder().metricName("USERS").label("Users (5 min MAX)") .statistic("Maximum").build(); + + final Metric eventBackupStart = eventMetric("BACKUP_START", "Backup started"); + final Metric eventBackupEnd = eventMetric("BACKUP_END", "Backup finished successfully"); + final Metric eventBackupAborted = eventMetric("BACKUP_ABORTED", "Backup failed or aborted"); + dashboard.addWidgets(// cpuWidget(), // tempDbRamWidget(), // @@ -50,9 +56,12 @@ public CloudwatchDashboardExamplesStack(final Construct scope, final String id, usageWidget(queriesMetric, usersMetric), // recommendedDbRamSizeWidget(), // currentDbSizeWidget(), // - currentQueriesAndUsersWidget(queriesMetric, usersMetric)); + currentQueriesAndUsersWidget(queriesMetric, usersMetric), // + backupEvents(List.of(eventBackupStart, eventBackupEnd, eventBackupAborted))); addTempdbRamAlarm(); + addBackupDidNotSucceedAlarms(eventBackupEnd); + addBackupFailedAlarms(eventBackupAborted); } private void addTempdbRamAlarm() { @@ -108,7 +117,7 @@ private GraphWidget recommendedDbRamSizeWidget() { private SingleValueWidget currentQueriesAndUsersWidget(final Metric queriesMetric, final Metric usersMetric) { return SingleValueWidget.Builder.create().title("Current Queries and Users").setPeriodToTimeRange(false) - .metrics(List.of(queriesMetric, usersMetric)).width(6).build(); + .metrics(List.of(queriesMetric, usersMetric)).width(6).height(6).build(); } private SingleValueWidget currentDbSizeWidget() { @@ -131,10 +140,36 @@ private SingleValueWidget currentDbSizeWidget() { getExasolMetricBuilder().metricName("OBJECT_COUNT").label("Object count").period(Duration.hours(1)) .build(), // getExasolMetricBuilder().metricName("NODES").label("Node count").period(Duration.days(1)).build() // - )).setPeriodToTimeRange(false).width(12).height(6).build(); + )).setPeriodToTimeRange(false).width(12).height(8).build(); + } + + private GraphWidget backupEvents(final List events) { + return GraphWidget.Builder.create().title("Backup Events").left(events) + .right(List.of(getExasolMetricBuilder().metricName("BACKUP_DURATION").label("Backup duration") + .statistic("Avg").period(Duration.minutes(1)).build())) + .build(); + } + + private Metric eventMetric(final String metricName, final String label) { + return getExasolMetricBuilder().metricName(metricName).label(label).statistic("SampleCount") + .period(Duration.minutes(1)).build(); + } + + private void addBackupDidNotSucceedAlarms(final Metric eventBackupEnd) { + Alarm.Builder.create(this, "backup_not_suceeded").metric(eventBackupEnd).threshold(1) + .comparisonOperator(ComparisonOperator.LESS_THAN_THRESHOLD).evaluationPeriods(1).datapointsToAlarm(1) + .alarmName("Backup did not succeed").alarmDescription("Backup did not succeed for more than one minute") + .treatMissingData(TreatMissingData.BREACHING).actionsEnabled(false).build(); + } + + private void addBackupFailedAlarms(final Metric eventBackupAborted) { + Alarm.Builder.create(this, "backup_failed").metric(eventBackupAborted).threshold(0) + .comparisonOperator(ComparisonOperator.GREATER_THAN_THRESHOLD).evaluationPeriods(1).datapointsToAlarm(1) + .alarmName("Backup failed").alarmDescription("Backup failed within one minute") + .treatMissingData(TreatMissingData.NOT_BREACHING).actionsEnabled(false).build(); } private Metric.Builder getExasolMetricBuilder() { - return Metric.Builder.create().namespace("Exasol").dimensions(this.dimensions); + return Metric.Builder.create().namespace(NAMESPACE).dimensionsMap(this.dimensions); } }