Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: set desired capacity defaults #143

Merged
merged 2 commits into from
Jul 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG/CHANGELOG-1.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,17 @@ See [code changes](https://github.com/aws/aws-k8s-tester/compare/v1.4.8...v1.5.0
### `ec2config`

- Set [ASG size defaults based on desired capacities](https://github.com/aws/aws-k8s-tester/pull/140).
- Either ["desired" or "minimum" must be >0](https://github.com/aws/aws-k8s-tester/pull/143).
- `desired 10, min 0, max 0 ==> desired 10, min 10, max 10`.
- `desired 0, min 1, max 10 ==> desired 0, min 1, max 10`.

### `eksconfig`

- Add [`AWS_K8S_TESTER_EKS_CONFIG`](https://github.com/aws/aws-k8s-tester/pull/138).
- Set [ASG size defaults based on desired capacities](https://github.com/aws/aws-k8s-tester/pull/140).
- Either ["desired" or "minimum" must be >0](https://github.com/aws/aws-k8s-tester/pull/143).
- `desired 10, min 0, max 0 ==> desired 10, min 10, max 10`.
- `desired 0, min 1, max 10 ==> desired 0, min 1, max 10`.

### `eks`

Expand Down
17 changes: 10 additions & 7 deletions ec2/asgs.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ Parameters:
ASGDesiredCapacity:
Type: Number
Description: Desired size auto scaling group
Default: 1
Default: 0
MinValue: 1
MaxValue: 1000

Expand Down Expand Up @@ -222,7 +222,7 @@ Resources:
Type: AWS::AutoScaling::AutoScalingGroup
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: !Ref ASGDesiredCapacity
MinInstancesInService: !Ref ASGMinSize
MaxBatchSize: 1
SuspendProcesses:
- HealthCheck
Expand All @@ -233,8 +233,8 @@ Resources:
Properties:
AutoScalingGroupName: !Ref ASGName
MinSize: !Ref ASGMinSize
MaxSize: !Ref ASGMaxSize
DesiredCapacity: !Ref ASGDesiredCapacity
MaxSize: !Ref ASGMaxSize{{ if ne .ASGDesiredCapacity 0 }}
DesiredCapacity: !Ref ASGDesiredCapacity{{ end }}
VPCZoneIdentifier: !Ref PublicSubnetIDs
MetricsCollection:
- Granularity: "1Minute"
Expand Down Expand Up @@ -367,8 +367,9 @@ const userDataAL2InstallSSM = ` UserData:
sudo docker info`

type templateASG struct {
Metadata string
UserData string
Metadata string
UserData string
ASGDesiredCapacity int64
}

func (ts *Tester) createASGs() (err error) {
Expand All @@ -391,7 +392,9 @@ func (ts *Tester) createASGs() (err error) {
// TODO: may not be necessary
// "/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2"
// already includes SSM agent + AWS CLI
tg := templateASG{}
tg := templateASG{
ASGDesiredCapacity: cur.ASGDesiredCapacity,
}
switch cur.AMIType {
case ec2config.AMITypeBottleRocketCPU:
// "bottlerocket" comes with SSM agent
Expand Down
5 changes: 3 additions & 2 deletions ec2/asgs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ func TestTemplateASG(t *testing.T) {

buf.Reset()
if err := tpl.Execute(buf, templateASG{
Metadata: metadataAL2InstallSSM,
UserData: userDataAL2InstallSSM,
Metadata: metadataAL2InstallSSM,
UserData: userDataAL2InstallSSM,
ASGDesiredCapacity: 1,
}); err != nil {
t.Fatal(err)
}
Expand Down
14 changes: 9 additions & 5 deletions ec2config/validate-defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,13 +414,13 @@ func (cfg *Config) validateASGs() error {
return fmt.Errorf("unknown ASGs[%q].AMIType %q", k, cur.AMIType)
}

if cur.ASGDesiredCapacity == 0 {
return fmt.Errorf("ASGs[%q].ASGDesiredCapacity must be >0", k)
if cur.ASGMinSize == 0 && cur.ASGDesiredCapacity == 0 {
return fmt.Errorf("ASGs[%q].ASGMinSize/ASGDesiredCapacity must be >0", k)
}
if cur.ASGMinSize == 0 {
if cur.ASGDesiredCapacity > 0 && cur.ASGMinSize == 0 {
cur.ASGMinSize = cur.ASGDesiredCapacity
}
if cur.ASGMaxSize == 0 {
if cur.ASGDesiredCapacity > 0 && cur.ASGMaxSize == 0 {
cur.ASGMaxSize = cur.ASGDesiredCapacity
}

Expand Down Expand Up @@ -464,7 +464,11 @@ func (cfg *Config) validateASGs() error {
case false: // use existing one, or don't run any SSM
}

total += cur.ASGDesiredCapacity
expectedN := cur.ASGDesiredCapacity
if expectedN == 0 {
expectedN = cur.ASGMinSize
}
total += expectedN
processed[k] = cur
}

Expand Down
6 changes: 4 additions & 2 deletions eks/gpu/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,12 @@ func (ts *tester) InstallNvidiaDriver() (err error) {
}
ts.cfg.Logger.Info("nodes",
zap.Int64("current-ready-nodes", foundReady),
zap.Int64("min-ready-nodes", cur.ASGMinSize),
zap.Int64("desired-ready-nodes", cur.ASGDesiredCapacity),
)
time.Sleep(5 * time.Second)

if foundReady >= cur.ASGDesiredCapacity {
if foundReady >= cur.ASGMinSize {
readyNGs[ngName] = struct{}{}
break
}
Expand Down Expand Up @@ -338,11 +339,12 @@ func (ts *tester) InstallNvidiaDriver() (err error) {
}
ts.cfg.Logger.Info("nodes",
zap.Int("current-ready-nodes", foundReady),
zap.Int("min-ready-nodes", cur.ASGMinSize),
zap.Int("desired-ready-nodes", cur.ASGDesiredCapacity),
)
time.Sleep(5 * time.Second)

if foundReady >= cur.ASGDesiredCapacity {
if foundReady >= cur.ASGMinSize {
readyMNGs[mngName] = struct{}{}
break
}
Expand Down
15 changes: 9 additions & 6 deletions eks/mng/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Parameters:

ASGDesiredCapacity:
Type: Number
Default: 2
Default: 0
Description: Desired capacity of Node Group Auto Scaling Group.

InstanceTypes:
Expand Down Expand Up @@ -105,8 +105,8 @@ Resources:
Ec2SshKey: !Ref RemoteAccessKeyName
ScalingConfig:
MinSize: !Ref ASGMinSize
MaxSize: !Ref ASGMaxSize
DesiredSize: !Ref ASGDesiredCapacity
MaxSize: !Ref ASGMaxSize{{ if ne .ASGDesiredCapacity 0 }}
DesiredSize: !Ref ASGDesiredCapacity{{ end }}
Subnets: !Ref PublicSubnetIDs
Labels:
NodeType: regular
Expand All @@ -130,6 +130,7 @@ const parametersReleaseVersion = ` ReleaseVersion:
const propertyReleaseVersion = ` ReleaseVersion: !Ref ReleaseVersion`

type templateMNG struct {
ASGDesiredCapacity int64
ParameterReleaseVersion string
PropertyReleaseVersion string
}
Expand Down Expand Up @@ -183,9 +184,8 @@ func (ts *tester) createASGs() (err error) {
Ec2SshKey: aws.String(ts.cfg.EKSConfig.RemoteAccessKeyName),
},
ScalingConfig: &aws_eks.NodegroupScalingConfig{
MinSize: aws.Int64(int64(cur.ASGMinSize)),
MaxSize: aws.Int64(int64(cur.ASGMaxSize)),
DesiredSize: aws.Int64(int64(cur.ASGDesiredCapacity)),
MinSize: aws.Int64(int64(cur.ASGMinSize)),
MaxSize: aws.Int64(int64(cur.ASGMaxSize)),
},
Subnets: aws.StringSlice(ts.cfg.EKSConfig.Parameters.PublicSubnetIDs),
Tags: map[string]*string{
Expand All @@ -200,6 +200,9 @@ func (ts *tester) createASGs() (err error) {
"NGName": aws.String(cur.Name),
},
}
if cur.ASGDesiredCapacity > 0 {
createInput.ScalingConfig.DesiredSize = aws.Int64(int64(cur.ASGDesiredCapacity))
}
for k, v := range cur.Tags {
createInput.Tags[k] = aws.String(v)
ts.cfg.Logger.Info("added EKS tag", zap.String("key", k), zap.String("value", v))
Expand Down
8 changes: 5 additions & 3 deletions eks/mng/version-upgrade/version-upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,12 @@ func (ts *tester) Upgrade(mngName string) (err error) {
reqID = aws.StringValue(updateOut.Update.Id)
}

// takes TODO
initialWait := 5 * time.Minute
totalWait := 2*time.Hour + 30*time.Minute + 3*time.Minute*time.Duration(cur.ASGDesiredCapacity)

checkN := time.Duration(cur.ASGDesiredCapacity)
if checkN == 0 {
checkN = time.Duration(cur.ASGMinSize)
}
totalWait := 2*time.Hour + 30*time.Minute + 3*time.Minute*checkN
ts.cfg.Logger.Info("sent MNG upgrade request; polling",
zap.String("cluster-name", ts.cfg.EKSConfig.Name),
zap.String("mng-name", mngName),
Expand Down
9 changes: 7 additions & 2 deletions eks/mng/wait/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,11 @@ func (ts *tester) waitForNodes(mngName string, retriesLeft int) error {
}
}

waitDur := 3*time.Minute + time.Duration(5*cur.ASGDesiredCapacity)*time.Second
checkN := time.Duration(cur.ASGDesiredCapacity)
if checkN == 0 {
checkN = time.Duration(cur.ASGMinSize)
}
waitDur := 3*time.Minute + 5*time.Second*checkN
ts.cfg.Logger.Info(
"describing EC2 instances in ASG",
zap.String("asg-name", cur.ASGName),
Expand Down Expand Up @@ -315,10 +319,11 @@ func (ts *tester) waitForNodes(mngName string, retriesLeft int) error {
zap.String("command", ts.cfg.EKSConfig.KubectlCommand()+" get nodes"),
zap.String("mng-name", cur.Name),
zap.Int("current-ready-nodes", readies),
zap.Int("min-ready-nodes", cur.ASGMinSize),
zap.Int("desired-ready-nodes", cur.ASGDesiredCapacity),
zap.String("all-csrs", fmt.Sprintf("%+v", allCSRs)),
)
if readies >= cur.ASGDesiredCapacity {
if readies >= cur.ASGMinSize {
ready = true
break
}
Expand Down
33 changes: 20 additions & 13 deletions eks/ng/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ Parameters:

ASGDesiredCapacity:
Type: Number
Default: 2
Default: 0
Description: Desired capacity of Node Group ASG.

Conditions:
Expand Down Expand Up @@ -216,7 +216,7 @@ Resources:
Type: AWS::AutoScaling::AutoScalingGroup
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: !Ref ASGDesiredCapacity
MinInstancesInService: !Ref ASGMinSize
MaxBatchSize: 1
SuspendProcesses:
- HealthCheck
Expand All @@ -227,8 +227,8 @@ Resources:
Properties:
AutoScalingGroupName: !Ref ASGName
MinSize: !Ref ASGMinSize
MaxSize: !Ref ASGMaxSize
DesiredCapacity: !Ref ASGDesiredCapacity
MaxSize: !Ref ASGMaxSize{{ if ne .ASGDesiredCapacity 0 }}
DesiredCapacity: !Ref ASGDesiredCapacity{{ end }}
VPCZoneIdentifier: !Ref PublicSubnetIDs
MetricsCollection:
- Granularity: "1Minute"
Expand All @@ -239,7 +239,7 @@ Resources:
- Key: !Sub kubernetes.io/cluster/${ClusterName}
Value: owned
PropagateAtLaunch: true
{{ if ne .AsgTagData "" }}{{.AsgTagData}}{{ end }}
{{ if ne .ASGTagData "" }}{{.ASGTagData}}{{ end }}
MixedInstancesPolicy:
InstancesDistribution:
OnDemandAllocationStrategy: "prioritized"
Expand Down Expand Up @@ -374,9 +374,10 @@ const asgTagDataNG = ` - Key: !Sub k8s.io/cluster-autoscaler/${ClusterName}
`

type templateASG struct {
Metadata string
UserData string
AsgTagData string
Metadata string
UserData string
ASGDesiredCapacity int64
ASGTagData string
}

func (ts *tester) createASGs() error {
Expand All @@ -398,7 +399,9 @@ func (ts *tester) createASGs() error {
// TODO: may not be necessary
// "/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2"
// already includes SSM agent + AWS CLI
tg := templateASG{}
tg := templateASG{
ASGDesiredCapacity: cur.ASGDesiredCapacity,
}
switch cur.AMIType {
case ec2config.AMITypeBottleRocketCPU:
// "bottlerocket" comes with SSM agent
Expand Down Expand Up @@ -448,9 +451,9 @@ func (ts *tester) createASGs() error {
tg.UserData += "\n"
tg.UserData += ` /opt/aws/bin/cfn-signal --exit-code $? --stack ${AWS::StackName} --resource ASG --region ${AWS::Region}`
}
tg.AsgTagData = ""
tg.ASGTagData = ""
if cur.ClusterAutoscaler != nil && cur.ClusterAutoscaler.Enable {
tg.AsgTagData = asgTagDataNG
tg.ASGTagData = asgTagDataNG
}
tpl := template.Must(template.New("TemplateASG").Parse(TemplateASG))
buf := bytes.NewBuffer(nil)
Expand Down Expand Up @@ -586,11 +589,15 @@ func (ts *tester) createASGs() error {
return fmt.Errorf("ASG name %q not found after creation", asgName)
}

waitDur := 30*time.Minute + 5*time.Second*time.Duration(cur.ASGDesiredCapacity)
checkN := time.Duration(cur.ASGDesiredCapacity)
if checkN == 0 {
checkN = time.Duration(cur.ASGMinSize)
}
waitDur := 30*time.Minute + 5*time.Second*checkN
for _, it := range cur.InstanceTypes {
if strings.Contains(it, ".metal") { // "i3.metal" takes much longer
ts.cfg.Logger.Info("increasing wait time for metal instance", zap.String("instance-type", it))
waitDur = time.Hour + time.Minute*time.Duration(cur.ASGDesiredCapacity)
waitDur = time.Hour + time.Minute*checkN
}
}
timeStart := time.Now()
Expand Down
5 changes: 3 additions & 2 deletions eks/ng/nodes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ func TestTemplateASG(t *testing.T) {

buf.Reset()
if err := tpl.Execute(buf, templateASG{
Metadata: metadataAL2InstallSSM,
UserData: userDataAL2InstallSSM,
Metadata: metadataAL2InstallSSM,
UserData: userDataAL2InstallSSM,
ASGDesiredCapacity: 1,
}); err != nil {
t.Fatal(err)
}
Expand Down
9 changes: 7 additions & 2 deletions eks/ng/wait/wait.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,11 @@ func (ts *tester) waitForNodes(asgName string, retriesLeft int) error {
}
}

waitDur := 3*time.Minute + time.Duration(5*cur.ASGDesiredCapacity)*time.Second
checkN := time.Duration(cur.ASGDesiredCapacity)
if checkN == 0 {
checkN = time.Duration(cur.ASGMinSize)
}
waitDur := 3*time.Minute + 5*time.Second*checkN
ts.cfg.Logger.Info(
"waiting for EC2 instances in ASG",
zap.String("asg-name", cur.Name),
Expand Down Expand Up @@ -277,10 +281,11 @@ func (ts *tester) waitForNodes(asgName string, retriesLeft int) error {
zap.String("command", ts.cfg.EKSConfig.KubectlCommand()+" get nodes"),
zap.String("ng-name", cur.Name),
zap.Int("current-ready-nodes", readies),
zap.Int64("min-ready-nodes", cur.ASGMinSize),
zap.Int64("desired-ready-nodes", cur.ASGDesiredCapacity),
zap.String("all-csrs", fmt.Sprintf("%+v", allCSRs)),
)
if int64(readies) >= cur.ASGDesiredCapacity {
if int64(readies) >= cur.ASGMinSize {
ready = true
break
}
Expand Down
10 changes: 5 additions & 5 deletions eksconfig/add-on-managed-node-groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ type MNG struct {
// ref. https://docs.aws.amazon.com/eks/latest/userguide/create-managed-node-group.html
// ref. https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html
ASGMaxSize int `json:"asg-max-size,omitempty"`
// ASGDesiredCapacity is is the desired capacity of Node Group ASG.
// ASGDesiredCapacity is the desired capacity of Node Group ASG.
// ref. https://docs.aws.amazon.com/eks/latest/userguide/create-managed-node-group.html
// ref. https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-eks-nodegroup.html
ASGDesiredCapacity int `json:"asg-desired-capacity,omitempty"`
Expand Down Expand Up @@ -501,13 +501,13 @@ func (cfg *Config) validateAddOnManagedNodeGroups() error {
}
}

if cur.ASGDesiredCapacity == 0 {
return fmt.Errorf("AddOnManagedNodeGroups.MNGs[%q].ASGDesiredCapacity must be >0", k)
if cur.ASGMinSize == 0 && cur.ASGDesiredCapacity == 0 {
return fmt.Errorf("AddOnManagedNodeGroups.MNGs[%q].ASGMinSize/ASGDesiredCapacity must be >0", k)
}
if cur.ASGMinSize == 0 {
if cur.ASGDesiredCapacity > 0 && cur.ASGMinSize == 0 {
cur.ASGMinSize = cur.ASGDesiredCapacity
}
if cur.ASGMaxSize == 0 {
if cur.ASGDesiredCapacity > 0 && cur.ASGMaxSize == 0 {
cur.ASGMaxSize = cur.ASGDesiredCapacity
}

Expand Down
Loading