Skip to content

Commit

Permalink
feat: Add 4xx alarms as an optional parameter to ec2 base alarm config
Browse files Browse the repository at this point in the history
Being alerted to high volumes of 404 requests is arguably a common pattern that is useful to extend to CDK. We've added this to another project where it would have allowed us to catch a production issue, so contributing upstream to gauge if useful for others.

This change adds an optional http4xxAlarm to the `Alarms` type on the `GuEc2AppProps` interface, as it is perhaps not as critical to consider as the 5xx events, but still hopefully a useful affordance.
  • Loading branch information
Robert Kenny authored and kenoir committed Oct 11, 2023
1 parent 8750dec commit f91c137
Show file tree
Hide file tree
Showing 6 changed files with 365 additions and 5 deletions.
216 changes: 216 additions & 0 deletions src/constructs/cloudwatch/__snapshots__/ec2-alarms.test.ts.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,221 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`The GuAlb4xxPercentageAlarm construct should create the correct alarm resource with minimal config 1`] = `
{
"Metadata": {
"gu:cdk:constructs": [
"GuStack",
"GuApplicationLoadBalancer",
"GuAlb4xxPercentageAlarm",
],
"gu:cdk:version": "TEST",
},
"Outputs": {
"ApplicationLoadBalancerTestingDnsName": {
"Description": "DNS entry for ApplicationLoadBalancerTesting",
"Value": {
"Fn::GetAtt": [
"ApplicationLoadBalancerTesting172A253B",
"DNSName",
],
},
},
},
"Resources": {
"ApplicationLoadBalancerTesting172A253B": {
"Properties": {
"LoadBalancerAttributes": [
{
"Key": "deletion_protection.enabled",
"Value": "true",
},
],
"Scheme": "internal",
"SecurityGroups": [
{
"Fn::GetAtt": [
"ApplicationLoadBalancerTestingSecurityGroup883A01A4",
"GroupId",
],
},
],
"Subnets": [
"",
],
"Tags": [
{
"Key": "App",
"Value": "testing",
},
{
"Key": "gu:cdk:version",
"Value": "TEST",
},
{
"Key": "gu:repo",
"Value": "guardian/cdk",
},
{
"Key": "Stack",
"Value": "test-stack",
},
{
"Key": "Stage",
"Value": "TEST",
},
],
"Type": "application",
},
"Type": "AWS::ElasticLoadBalancingV2::LoadBalancer",
},
"ApplicationLoadBalancerTestingSecurityGroup883A01A4": {
"Properties": {
"GroupDescription": "Automatically created Security Group for ELB TestApplicationLoadBalancerTesting8F9EA5A8",
"SecurityGroupEgress": [
{
"CidrIp": "255.255.255.255/32",
"Description": "Disallow all traffic",
"FromPort": 252,
"IpProtocol": "icmp",
"ToPort": 86,
},
],
"Tags": [
{
"Key": "App",
"Value": "testing",
},
{
"Key": "gu:cdk:version",
"Value": "TEST",
},
{
"Key": "gu:repo",
"Value": "guardian/cdk",
},
{
"Key": "Stack",
"Value": "test-stack",
},
{
"Key": "Stage",
"Value": "TEST",
},
],
"VpcId": "test",
},
"Type": "AWS::EC2::SecurityGroup",
},
"High4xxPercentageAlarmTestingE21E0AD7": {
"Properties": {
"ActionsEnabled": true,
"AlarmActions": [
{
"Fn::Join": [
"",
[
"arn:aws:sns:",
{
"Ref": "AWS::Region",
},
":",
{
"Ref": "AWS::AccountId",
},
":test-topic",
],
],
},
],
"AlarmDescription": "testing exceeded 1% error rate",
"AlarmName": "High 4XX error percentage from testing in TEST",
"ComparisonOperator": "GreaterThanThreshold",
"EvaluationPeriods": 1,
"Metrics": [
{
"Expression": "100*(m1+m2)/m3",
"Id": "expr_1",
"Label": "% of 4XX responses served for testing (load balancer and instances combined)",
},
{
"Id": "m1",
"MetricStat": {
"Metric": {
"Dimensions": [
{
"Name": "LoadBalancer",
"Value": {
"Fn::GetAtt": [
"ApplicationLoadBalancerTesting172A253B",
"LoadBalancerFullName",
],
},
},
],
"MetricName": "HTTPCode_ELB_4XX_Count",
"Namespace": "AWS/ApplicationELB",
},
"Period": 60,
"Stat": "Sum",
},
"ReturnData": false,
},
{
"Id": "m2",
"MetricStat": {
"Metric": {
"Dimensions": [
{
"Name": "LoadBalancer",
"Value": {
"Fn::GetAtt": [
"ApplicationLoadBalancerTesting172A253B",
"LoadBalancerFullName",
],
},
},
],
"MetricName": "HTTPCode_Target_4XX_Count",
"Namespace": "AWS/ApplicationELB",
},
"Period": 60,
"Stat": "Sum",
},
"ReturnData": false,
},
{
"Id": "m3",
"MetricStat": {
"Metric": {
"Dimensions": [
{
"Name": "LoadBalancer",
"Value": {
"Fn::GetAtt": [
"ApplicationLoadBalancerTesting172A253B",
"LoadBalancerFullName",
],
},
},
],
"MetricName": "RequestCount",
"Namespace": "AWS/ApplicationELB",
},
"Period": 60,
"Stat": "Sum",
},
"ReturnData": false,
},
],
"Threshold": 1,
"TreatMissingData": "notBreaching",
},
"Type": "AWS::CloudWatch::Alarm",
},
},
}
`;

exports[`The GuAlb5xxPercentageAlarm construct should create the correct alarm resource with minimal config 1`] = `
{
"Metadata": {
Expand Down
9 changes: 9 additions & 0 deletions src/constructs/cloudwatch/alarm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ export interface GuAlarmProps extends AlarmProps, AppIdentity {
okAction?: boolean;
}

export interface Http4xxAlarmProps
extends Omit<
GuAlarmProps,
"snsTopicName" | "evaluationPeriods" | "metric" | "period" | "threshold" | "treatMissingData" | "app"
> {
tolerated4xxPercentage: number;
numberOfMinutesAboveThresholdBeforeAlarm?: number;
}

export interface Http5xxAlarmProps
extends Omit<
GuAlarmProps,
Expand Down
57 changes: 56 additions & 1 deletion src/constructs/cloudwatch/ec2-alarms.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { ApplicationListener, ApplicationProtocol } from "aws-cdk-lib/aws-elasti
import { simpleGuStackForTesting } from "../../utils/test";
import type { AppIdentity } from "../core";
import { GuApplicationLoadBalancer, GuApplicationTargetGroup } from "../loadbalancing";
import { GuAlb5xxPercentageAlarm, GuUnhealthyInstancesAlarm } from "./ec2-alarms";
import { GuAlb4xxPercentageAlarm, GuAlb5xxPercentageAlarm, GuUnhealthyInstancesAlarm } from "./ec2-alarms";

const vpc = Vpc.fromVpcAttributes(new Stack(), "VPC", {
vpcId: "test",
Expand Down Expand Up @@ -72,6 +72,61 @@ describe("The GuAlb5xxPercentageAlarm construct", () => {
});
});

describe("The GuAlb4xxPercentageAlarm construct", () => {
it("should create the correct alarm resource with minimal config", () => {
const stack = simpleGuStackForTesting();
const alb = new GuApplicationLoadBalancer(stack, "ApplicationLoadBalancer", { ...app, vpc });
const props = {
tolerated4xxPercentage: 1,
snsTopicName: "test-topic",
};
new GuAlb4xxPercentageAlarm(stack, { ...app, loadBalancer: alb, ...props });
expect(Template.fromStack(stack).toJSON()).toMatchSnapshot();
});

it("should use a custom description if one is provided", () => {
const stack = simpleGuStackForTesting();
const alb = new GuApplicationLoadBalancer(stack, "ApplicationLoadBalancer", { ...app, vpc });
const props = {
alarmDescription: "test-custom-alarm-description",
tolerated4xxPercentage: 1,
snsTopicName: "test-topic",
};
new GuAlb4xxPercentageAlarm(stack, { ...app, loadBalancer: alb, ...props });
Template.fromStack(stack).hasResourceProperties("AWS::CloudWatch::Alarm", {
AlarmDescription: "test-custom-alarm-description",
});
});

it("should use a custom alarm name if one is provided", () => {
const stack = simpleGuStackForTesting();
const alb = new GuApplicationLoadBalancer(stack, "ApplicationLoadBalancer", { ...app, vpc });
const props = {
alarmName: "test-custom-alarm-name",
tolerated4xxPercentage: 1,
snsTopicName: "test-topic",
};
new GuAlb4xxPercentageAlarm(stack, { ...app, loadBalancer: alb, ...props });
Template.fromStack(stack).hasResourceProperties("AWS::CloudWatch::Alarm", {
AlarmName: "test-custom-alarm-name",
});
});

it("should adjust the number of evaluation periods if a custom value is provided", () => {
const stack = simpleGuStackForTesting();
const alb = new GuApplicationLoadBalancer(stack, "ApplicationLoadBalancer", { ...app, vpc });
const props = {
tolerated4xxPercentage: 1,
numberOfMinutesAboveThresholdBeforeAlarm: 3,
snsTopicName: "test-topic",
};
new GuAlb4xxPercentageAlarm(stack, { ...app, loadBalancer: alb, ...props });
Template.fromStack(stack).hasResourceProperties("AWS::CloudWatch::Alarm", {
EvaluationPeriods: 3,
});
});
});

describe("The GuUnhealthyInstancesAlarm construct", () => {
it("should create the correct alarm resource with minimal config", () => {
const stack = simpleGuStackForTesting();
Expand Down
37 changes: 36 additions & 1 deletion src/constructs/cloudwatch/ec2-alarms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ import type { GuStack } from "../core";
import { AppIdentity } from "../core";
import type { GuApplicationLoadBalancer, GuApplicationTargetGroup } from "../loadbalancing";
import { GuAlarm } from "./alarm";
import type { GuAlarmProps, Http5xxAlarmProps } from "./alarm";
import type { GuAlarmProps, Http4xxAlarmProps, Http5xxAlarmProps } from "./alarm";

interface GuAlb4xxPercentageAlarmProps extends Pick<GuAlarmProps, "snsTopicName">, Http4xxAlarmProps, AppIdentity {
loadBalancer: GuApplicationLoadBalancer;
}
interface GuAlb5xxPercentageAlarmProps extends Pick<GuAlarmProps, "snsTopicName">, Http5xxAlarmProps, AppIdentity {
loadBalancer: GuApplicationLoadBalancer;
}
Expand Down Expand Up @@ -47,6 +50,38 @@ export class GuAlb5xxPercentageAlarm extends GuAlarm {
}
}

/**
* Creates an alarm which is triggered whenever the percentage of requests with a 4xx response code exceeds
* the specified threshold.
*/
export class GuAlb4xxPercentageAlarm extends GuAlarm {
constructor(scope: GuStack, props: GuAlb4xxPercentageAlarmProps) {
const mathExpression = new MathExpression({
expression: "100*(m1+m2)/m3",
usingMetrics: {
m1: props.loadBalancer.metrics.httpCodeElb(HttpCodeElb.ELB_4XX_COUNT),
m2: props.loadBalancer.metrics.httpCodeTarget(HttpCodeTarget.TARGET_4XX_COUNT),
m3: props.loadBalancer.metrics.requestCount(),
},
label: `% of 4XX responses served for ${props.app} (load balancer and instances combined)`,
period: Duration.minutes(1),
});
const defaultAlarmName = `High 4XX error percentage from ${props.app} in ${scope.stage}`;
const defaultDescription = `${props.app} exceeded ${props.tolerated4xxPercentage}% error rate`;
const alarmProps = {
...props,
metric: mathExpression,
treatMissingData: TreatMissingData.NOT_BREACHING,
threshold: props.tolerated4xxPercentage,
comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
alarmName: props.alarmName ?? defaultAlarmName,
alarmDescription: props.alarmDescription ?? defaultDescription,
evaluationPeriods: props.numberOfMinutesAboveThresholdBeforeAlarm ?? 1,
};
super(scope, AppIdentity.suffixText(props, "High4xxPercentageAlarm"), alarmProps);
}
}

/**
* Creates an alarm which is triggered whenever there have been several healthcheck failures within a single hour.
*/
Expand Down
Loading

0 comments on commit f91c137

Please sign in to comment.