Skip to content

Commit

Permalink
feat(manifest): add queue_delay for autoscaling worker services (#2827
Browse files Browse the repository at this point in the history
)

Resolves #2796 

By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the Apache 2.0 License.
  • Loading branch information
efekarakus authored Sep 16, 2021
1 parent 70bd6e8 commit ba952f9
Show file tree
Hide file tree
Showing 19 changed files with 709 additions and 247 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ image:

cpu: 256 # Number of CPU units for the task.
memory: 512 # Amount of memory in MiB used by the task.
count: 1 # Number of tasks that should be running in your service.
count:
range:
min: 1
max: 10
spot_from: 6
queue_delay: # Should result in 900 msgs backlog.
acceptable_latency: 15m
msg_processing_time: 1s
exec: true # Enable running commands in your container.

subscribe:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,244 @@ Resources:
"logs:PutLogEvents"
]
Resource: "*"


DynamicDesiredCountAction:
Type: Custom::DynamicDesiredCountFunction
Properties:
ServiceToken: !GetAtt DynamicDesiredCountFunction.Arn
Cluster:
Fn::ImportValue: !Sub '${AppName}-${EnvName}-ClusterId'
App: !Ref AppName
Env: !Ref EnvName
Svc: !Ref WorkloadName
DefaultDesiredCount: !Ref TaskCount
# We need to force trigger this lambda function on all deployments, so we give it a random ID as input on all event types.
UpdateID: RandomGUID

DynamicDesiredCountFunction:
Type: AWS::Lambda::Function
Properties:
Code:
ZipFile: |
mockDynamicDesiredCountZipFile
Handler: "index.handler"
Timeout: 600
MemorySize: 512
Role: !GetAtt 'CustomResourceRole.Arn'
Runtime: nodejs12.x

AutoScalingRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service: ecs-tasks.amazonaws.com
Action: 'sts:AssumeRole'
ManagedPolicyArns:
- !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonEC2ContainerServiceAutoscaleRole'

AutoScalingTarget:
Metadata:
'aws:copilot:description': "An autoscaling target to scale your service's desired count"
Type: AWS::ApplicationAutoScaling::ScalableTarget
Properties:
MinCapacity: 1
MaxCapacity: 10
ResourceId:
Fn::Join:
- '/'
- - 'service'
- Fn::ImportValue: !Sub '${AppName}-${EnvName}-ClusterId'
- !GetAtt Service.Name
ScalableDimension: ecs:service:DesiredCount
ServiceNamespace: ecs
RoleARN: !GetAtt AutoScalingRole.Arn

BacklogPerTaskCalculatorLogGroup:
Type: AWS::Logs::LogGroup
Properties:
LogGroupName:
Fn::Join:
- '/'
- - '/aws'
- 'lambda'
- Fn::Sub: "${BacklogPerTaskCalculatorFunction}"
RetentionInDays: 3

BacklogPerTaskCalculatorFunction:
Metadata:
'aws:copilot:description': "A Lambda function to emit BacklogPerTask metrics to CloudWatch"
Type: AWS::Lambda::Function
Properties:
Code:
ZipFile: |
mockBacklogPerTaskLambda
Handler: "index.handler"
Timeout: 600
MemorySize: 512
Role: !GetAtt BacklogPerTaskCalculatorRole.Arn
Runtime: nodejs12.x
Environment:
Variables:
CLUSTER_NAME:
Fn::ImportValue: !Sub '${AppName}-${EnvName}-ClusterId'
SERVICE_NAME: !Ref Service
NAMESPACE: !Sub '${AppName}-${EnvName}-${WorkloadName}'
QUEUE_NAMES:
Fn::Join:
- ','
- - !GetAtt EventsQueue.QueueName
- !GetAtt dogsvcgiveshuskiesEventsQueue.QueueName

BacklogPerTaskCalculatorRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- sts:AssumeRole
Path: /
Policies:
- PolicyName: "BacklogPerTaskCalculatorAccess"
PolicyDocument:
Version: '2012-10-17'
Statement:
- Sid: ECS
Effect: Allow
Action:
- ecs:DescribeServices
Resource: "*"
Condition:
ArnEquals:
'ecs:cluster':
Fn::Sub:
- arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}
- ClusterName:
Fn::ImportValue: !Sub '${AppName}-${EnvName}-ClusterId'
- Sid: SQS
Effect: Allow
Action:
- sqs:GetQueueAttributes
- sqs:GetQueueUrl
Resource:
- !GetAtt EventsQueue.Arn
- !GetAtt dogsvcgiveshuskiesEventsQueue.Arn
ManagedPolicyArns:
- !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole

BacklogPerTaskScheduledRule:
Metadata:
'aws:copilot:description': "A trigger to invoke the BacklogPerTaskCalculator Lambda function every minute"
DependsOn:
- BacklogPerTaskCalculatorLogGroup # Ensure log group is created before invoking.
Type: AWS::Events::Rule
Properties:
ScheduleExpression: "rate(1 minute)"
State: "ENABLED"
Targets:
- Arn: !GetAtt BacklogPerTaskCalculatorFunction.Arn
Id: "BacklogPerTaskCalculatorFunctionTrigger"

PermissionToInvokeBacklogPerTaskCalculatorLambda:
Type: AWS::Lambda::Permission
Properties:
FunctionName: !Ref BacklogPerTaskCalculatorFunction
Action: lambda:InvokeFunction
Principal: events.amazonaws.com
SourceArn: !GetAtt BacklogPerTaskScheduledRule.Arn

AutoScalingPolicyEventsQueue:
Metadata:
'aws:copilot:description': "An autoscaling policy to maintain 900 messages/task for EventsQueue"
Type: AWS::ApplicationAutoScaling::ScalingPolicy
Properties:
PolicyName: !Join ['-', [!Ref WorkloadName, BacklogPerTask, !GetAtt EventsQueue.QueueName]]
PolicyType: TargetTrackingScaling
ScalingTargetId: !Ref AutoScalingTarget
TargetTrackingScalingPolicyConfiguration:
ScaleInCooldown: 120
ScaleOutCooldown: 60
CustomizedMetricSpecification:
Namespace: !Sub '${AppName}-${EnvName}-${WorkloadName}'
MetricName: BacklogPerTask
Statistic: Average
Dimensions:
- Name: QueueName
Value: !GetAtt EventsQueue.QueueName
Unit: Count
TargetValue: 900

AutoScalingPolicydogsvcgiveshuskiesEventsQueue:
Metadata:
'aws:copilot:description': "An autoscaling policy to maintain 900 messages/task for dogsvcgiveshuskiesEventsQueue"
Type: AWS::ApplicationAutoScaling::ScalingPolicy
Properties:
PolicyName: !Join ['-', [!Ref WorkloadName, BacklogPerTask, !GetAtt dogsvcgiveshuskiesEventsQueue.QueueName]]
PolicyType: TargetTrackingScaling
ScalingTargetId: !Ref AutoScalingTarget
TargetTrackingScalingPolicyConfiguration:
ScaleInCooldown: 120
ScaleOutCooldown: 60
CustomizedMetricSpecification:
Namespace: !Sub '${AppName}-${EnvName}-${WorkloadName}'
MetricName: BacklogPerTask
Statistic: Average
Dimensions:
- Name: QueueName
Value: !GetAtt dogsvcgiveshuskiesEventsQueue.QueueName
Unit: Count
TargetValue: 900

CustomResourceRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- sts:AssumeRole
Path: /
Policies:
- PolicyName: "DelegateDesiredCountAccess"
PolicyDocument:
Version: '2012-10-17'
Statement:
- Sid: ECS
Effect: Allow
Action:
- ecs:DescribeServices
Resource: "*"
Condition:
ArnEquals:
'ecs:cluster':
Fn::Sub:
- arn:${AWS::Partition}:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${ClusterName}
- ClusterName:
Fn::ImportValue: !Sub '${AppName}-${EnvName}-ClusterId'
- Sid: ResourceGroups
Effect: Allow
Action:
- resource-groups:GetResources
Resource: "*"
- Sid: Tags
Effect: Allow
Action:
- "tag:GetResources"
Resource: "*"
ManagedPolicyArns:
- !Sub arn:${AWS::Partition}:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole

Service:
DependsOn:
- EnvControllerAction
Expand All @@ -181,7 +418,7 @@ Resources:
Fn::ImportValue:
!Sub '${AppName}-${EnvName}-ClusterId'
TaskDefinition: !Ref TaskDefinition
DesiredCount: !Ref TaskCount
DesiredCount: !GetAtt DynamicDesiredCountAction.DesiredCount
DeploymentConfiguration:
DeploymentCircuitBreaker:
Enable: true
Expand All @@ -190,7 +427,12 @@ Resources:
MaximumPercent: 200
PropagateTags: SERVICE
EnableExecuteCommand: true
LaunchType: FARGATE
CapacityProviderStrategy:
- CapacityProvider: FARGATE_SPOT
Weight: 1
- CapacityProvider: FARGATE
Weight: 0
Base: 5
NetworkConfiguration:
AwsvpcConfiguration:
AssignPublicIp: ENABLED
Expand Down
9 changes: 9 additions & 0 deletions internal/pkg/deploy/cloudformation/stack/transformers.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,15 @@ func convertAutoscaling(a *manifest.AdvancedCount) (*template.AutoscalingOpts, e
responseTime := float64(*a.ResponseTime) / float64(time.Second)
autoscalingOpts.ResponseTime = aws.Float64(responseTime)
}
if !a.QueueScaling.IsEmpty() {
acceptableBacklog, err := a.QueueScaling.AcceptableBacklogPerTask()
if err != nil {
return nil, err
}
autoscalingOpts.QueueDelay = &template.AutoscalingQueueDelayOpts{
AcceptableBacklogPerTask: acceptableBacklog,
}
}
return &autoscalingOpts, nil
}

Expand Down
25 changes: 25 additions & 0 deletions internal/pkg/deploy/cloudformation/stack/transformers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,9 @@ func Test_convertAutoscaling(t *testing.T) {
badRange := manifest.IntRangeBand("badRange")
mockRequests := 1000
mockResponseTime := 512 * time.Millisecond

testAcceptableLatency := 10 * time.Minute
testAvgProcessingTime := 250 * time.Millisecond
testCases := map[string]struct {
input *manifest.AdvancedCount

Expand Down Expand Up @@ -549,6 +552,28 @@ func Test_convertAutoscaling(t *testing.T) {
ResponseTime: aws.Float64(0.512),
},
},
"success with queue autoscaling": {
input: &manifest.AdvancedCount{
Range: manifest.Range{
RangeConfig: manifest.RangeConfig{
Min: aws.Int(5),
Max: aws.Int(10),
SpotFrom: aws.Int(5),
},
},
QueueScaling: manifest.QueueScaling{
AcceptableLatency: &testAcceptableLatency,
AvgProcessingTime: &testAvgProcessingTime,
},
},
wanted: &template.AutoscalingOpts{
MaxCapacity: aws.Int(10),
MinCapacity: aws.Int(5),
QueueDelay: &template.AutoscalingQueueDelayOpts{
AcceptableBacklogPerTask: 2400,
},
},
},
"returns nil if spot specified": {
input: &manifest.AdvancedCount{
Spot: aws.Int(5),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@ import (
)

const (
workerManifestPath = "worker-manifest.yml"
workerStackPath = "worker-test.stack.yml"
workerParamsPath = "worker-test.params.json"
workerManifestPath = "worker-manifest.yml"
workerStackPath = "worker-test.stack.yml"
workerParamsPath = "worker-test.params.json"
backlogPerTaskLambdaPath = "custom-resources/backlog-per-task-calculator.js"
)

func TestWorkerService_Template(t *testing.T) {
Expand All @@ -48,14 +49,19 @@ func TestWorkerService_Template(t *testing.T) {
tpl, err := serializer.Template()
require.NoError(t, err, "template should render")
regExpGUID := regexp.MustCompile(`([a-f\d]{8}-)([a-f\d]{4}-){3}([a-f\d]{12})`) // Matches random guids

parser := template.New()
envController, err := parser.Read(envControllerPath)
require.NoError(t, err)
envControllerZipFile := envController.String()

dynamicDesiredCount, err := parser.Read(dynamicDesiredCountPath)
require.NoError(t, err)
dynamicDesiredCountZipFile := dynamicDesiredCount.String()

backlogPerTaskLambda, err := parser.Read(backlogPerTaskLambdaPath)
require.NoError(t, err)

t.Run("CF Template should be equal", func(t *testing.T) {
actualBytes := []byte(tpl)
// Cut random GUID from template.
Expand All @@ -64,6 +70,7 @@ func TestWorkerService_Template(t *testing.T) {
// Cut out zip file for more readable output
actualString = strings.ReplaceAll(actualString, envControllerZipFile, "mockEnvControllerZipFile")
actualString = strings.ReplaceAll(actualString, dynamicDesiredCountZipFile, "mockDynamicDesiredCountZipFile")
actualString = strings.ReplaceAll(actualString, backlogPerTaskLambda.String(), "mockBacklogPerTaskLambda")
actualBytes = []byte(actualString)
mActual := make(map[interface{}]interface{})
require.NoError(t, yaml.Unmarshal(actualBytes, mActual))
Expand Down
Loading

0 comments on commit ba952f9

Please sign in to comment.