Skip to content

Commit

Permalink
Load AWS EC2 Instance Types dynamically
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffwan committed Oct 10, 2019
1 parent a258103 commit 9179390
Show file tree
Hide file tree
Showing 12 changed files with 333 additions and 115 deletions.
1 change: 1 addition & 0 deletions cluster-autoscaler/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ The following startup parameters are supported for cluster autoscaler:
| `leader-elect-renew-deadline` | The interval between attempts by the acting master to renew a leadership slot before it stops leading.<br>This must be less than or equal to the lease duration.<br>This is only applicable if leader election is enabled | 10 seconds
| `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership.<br>This is only applicable if leader election is enabled | 2 seconds
| `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election.<br>Supported options are `endpoints` (default) and `configmaps` | "endpoints"
| `static-instance-list` | Should CA fetch instance types in runtime or use a static list. AWS only | false

# Troubleshooting:

Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ spec:
- r5ad.2xlarge
```
## Use Static Instance List
Latest supported EC2 Instance Types will be fetched by CA in runtime. You can find all available instance types in CA logs.

This comment has been minimized.

Copy link
@seh

seh Oct 10, 2019

The set of the latest supported EC2 instance types will be fetched by the CA at run time. You can find all the available instance types in the CA logs. 
If your network has restriction, You may like to specify option `--static-instance-list=true` to switch back to original way to use a static list.

This comment has been minimized.

Copy link
@seh

seh Oct 10, 2019

If your network access is restricted such that fetching this set is infeasible, you can specify the command-line flag `--static-instance-list=true` to switch the CA back to its original use of a statically defined set. 

This comment has been minimized.

Copy link
@Jeffwan

Jeffwan Oct 10, 2019

Author Contributor

Thanks @seh I will address the feedbacks



### Example usage:

* Create a [Launch Template](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-autoscaling-autoscalinggroup-launchtemplate.html) (LT) with an instance type, for example, r5.2xlarge. Consider this the 'base' instance type. Do not define any spot purchase options here.
Expand Down
32 changes: 30 additions & 2 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,16 @@ var (
type awsCloudProvider struct {
awsManager *AwsManager
resourceLimiter *cloudprovider.ResourceLimiter
// InstanceTypes is a map of ec2 resources
instanceTypes map[string]*InstanceType
}

// BuildAwsCloudProvider builds CloudProvider implementation for AWS.
func BuildAwsCloudProvider(awsManager *AwsManager, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
func BuildAwsCloudProvider(awsManager *AwsManager, instanceTypes map[string]*InstanceType, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
aws := &awsCloudProvider{
awsManager: awsManager,
resourceLimiter: resourceLimiter,
instanceTypes: instanceTypes,
}
return aws, nil
}
Expand Down Expand Up @@ -343,12 +346,37 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
defer config.Close()
}

// Generate EC2 list
var instanceTypes map[string]*InstanceType
if opts.StaticInstanceList {
klog.Warning("Use static EC2 Instance Types, list could be outdated")
instanceTypes = GetStaticEC2InstanceTypes()
} else {
region, err := GetCurrentAwsRegion()
if err != nil {
klog.Fatalf("Failed to get AWS Region: %v", err)
}

instanceTypes, err = GenerateEC2InstanceTypes(region)
if err != nil {
klog.Fatalf("Failed to generate AWS EC2 Instance Types: %v", err)

This comment has been minimized.

Copy link
@seh

seh Oct 10, 2019

Delete blank line.

}

keys := make([]string, 0, len(instanceTypes))
for key := range instanceTypes {
keys = append(keys, key)
}

klog.Infof("Successfully load %d EC2 Instance Types %s", len(keys), strings.Join(keys, ","))

This comment has been minimized.

Copy link
@seh

seh Oct 10, 2019

klog.Infof("Successfully loaded %d EC2 instance types: [%s]", len(keys), strings.Join(keys, ","))

Alternately, you can print the slice itself, which gives you the delimiting brackets and intervening spaces, but no commas.

"... instance types: %s", len(keys), keys)
}

manager, err := CreateAwsManager(config, do)
if err != nil {
klog.Fatalf("Failed to create AWS Manager: %v", err)
}

provider, err := BuildAwsCloudProvider(manager, rl)
provider, err := BuildAwsCloudProvider(manager, instanceTypes, rl)
if err != nil {
klog.Fatalf("Failed to create AWS cloud provider: %v", err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func testProvider(t *testing.T, m *AwsManager) *awsCloudProvider {
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})

provider, err := BuildAwsCloudProvider(m, resourceLimiter)
provider, err := BuildAwsCloudProvider(m, GetStaticEC2InstanceTypes(), resourceLimiter)
assert.NoError(t, err)
return provider.(*awsCloudProvider)
}
Expand All @@ -144,7 +144,7 @@ func TestBuildAwsCloudProvider(t *testing.T) {
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})

_, err := BuildAwsCloudProvider(testAwsManager, resourceLimiter)
_, err := BuildAwsCloudProvider(testAwsManager, GetStaticEC2InstanceTypes(), resourceLimiter)
assert.NoError(t, err)
}

Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/aws/aws_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ type AwsManager struct {
}

type asgTemplate struct {
InstanceType *instanceType
InstanceType *InstanceType
Region string
Zone string
Tags []*autoscaling.TagDescription
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func TestGetRegion(t *testing.T) {

func TestBuildGenericLabels(t *testing.T) {
labels := buildGenericLabels(&asgTemplate{
InstanceType: &instanceType{
InstanceType: &InstanceType{
InstanceType: "c4.large",
VCPU: 2,
MemoryMb: 3840,
Expand Down
171 changes: 171 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package aws

import (
"encoding/json"
"errors"
"fmt"
"github.com/aws/aws-sdk-go/aws/endpoints"
"io/ioutil"
"k8s.io/klog"
"net/http"
"os"
"regexp"
"strconv"
"strings"
)

var (
ec2MetaDataServiceUrl = "http://169.254.169.254/latest/dynamic/instance-identity/document"
)

type response struct {
Products map[string]product `json:"products"`
}

type product struct {
Attributes productAttributes `json:"attributes"`
}

type productAttributes struct {
InstanceType string `json:"instanceType"`
VCPU string `json:"vcpu"`
Memory string `json:"memory"`
GPU string `json:"gpu"`
}

// GenerateEC2InstanceTypes returns a map of ec2 resources
func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) {
instanceTypes := make(map[string]*InstanceType)

This comment has been minimized.

Copy link
@seh

seh Oct 10, 2019

At present there are around 125 different instance types, so consider specifying a size for this map.

This comment has been minimized.

Copy link
@Jeffwan

Jeffwan Oct 10, 2019

Author Contributor

It will be different in different regions here.

This comment has been minimized.

Copy link
@seh

seh Oct 10, 2019

Sure. I happened to look at my favorite region: us-east-1. I just mean that we can give a better map hint here—something on the order of a couple of hundred.


resolver := endpoints.DefaultResolver()
partitions := resolver.(endpoints.EnumPartitions).Partitions()

for _, p := range partitions {
for _, r := range p.Regions() {
if region != "" && region != r.ID() {
continue
}

url := "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/" + r.ID() + "/index.json"
klog.V(1).Infof("fetching %s\n", url)
res, err := http.Get(url)
if err != nil {
klog.Warningf("Error fetching %s skipping...\n", url)
continue
}

defer res.Body.Close()

body, err := ioutil.ReadAll(res.Body)
if err != nil {
klog.Warningf("Error parsing %s skipping...\n", url)
continue
}

var unmarshalled = response{}
err = json.Unmarshal(body, &unmarshalled)
if err != nil {
klog.Warningf("Error unmarshalling %s, skip...\n", url)
continue
}

for _, product := range unmarshalled.Products {
attr := product.Attributes
if attr.InstanceType != "" {
instanceTypes[attr.InstanceType] = &InstanceType{
InstanceType: attr.InstanceType,
}
if attr.Memory != "" && attr.Memory != "NA" {
instanceTypes[attr.InstanceType].MemoryMb = parseMemory(attr.Memory)
}
if attr.VCPU != "" {
instanceTypes[attr.InstanceType].VCPU = parseCPU(attr.VCPU)
}
if attr.GPU != "" {
instanceTypes[attr.InstanceType].GPU = parseCPU(attr.GPU)
}
}
}
}
}

if len(instanceTypes) == 0 {
return nil, errors.New("unable to load EC2 Instance Type list")
}

return instanceTypes, nil
}

// GetStaticEC2InstanceTypes return pregenerated ec2 instance type list
func GetStaticEC2InstanceTypes() map[string]*InstanceType {
return InstanceTypes
}

func parseMemory(memory string) int64 {
reg, err := regexp.Compile("[^0-9\\.]+")
if err != nil {
klog.Fatal(err)
}

parsed := strings.TrimSpace(reg.ReplaceAllString(memory, ""))
mem, err := strconv.ParseFloat(parsed, 64)
if err != nil {
klog.Fatal(err)
}

return int64(mem * float64(1024))
}

func parseCPU(cpu string) int64 {
i, err := strconv.ParseInt(cpu, 10, 64)
if err != nil {
klog.Fatal(err)
}
return i
}

// GetCurrentAwsRegion return region of current cluster without building awsManager
func GetCurrentAwsRegion() (string, error) {
region, present := os.LookupEnv("AWS_REGION")

if !present {
klog.V(1).Infof("fetching %s\n", ec2MetaDataServiceUrl)
res, err := http.Get(ec2MetaDataServiceUrl)
if err != nil {
return "", fmt.Errorf("Error fetching %s", ec2MetaDataServiceUrl)
}

defer res.Body.Close()

body, err := ioutil.ReadAll(res.Body)
if err != nil {
return "", fmt.Errorf("Error parsing %s", ec2MetaDataServiceUrl)
}

var unmarshalled = map[string]string{}
err = json.Unmarshal(body, &unmarshalled)
if err != nil {
klog.Warningf("Error unmarshalling %s, skip...\n", ec2MetaDataServiceUrl)
}

region = unmarshalled["region"]
}

return region, nil
}
105 changes: 105 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package aws

import (
"github.com/stretchr/testify/assert"
"net/http"
"net/http/httptest"
"os"
"strconv"
"testing"
)

func TestGetStaticEC2InstanceTypes(t *testing.T) {
result := GetStaticEC2InstanceTypes()
assert.True(t, len(result) != 0)
}

func TestParseMemory(t *testing.T) {
expectedResultInMiB := int64(3.75 * 1024)
tests := []struct {
input string
expect int64
}{
{
input: "3.75 GiB",
expect: expectedResultInMiB,
},
{
input: "3.75 Gib",
expect: expectedResultInMiB,
},
{
input: "3.75GiB",
expect: expectedResultInMiB,
},
{
input: "3.75",
expect: expectedResultInMiB,
},
}

for _, test := range tests {
got := parseMemory(test.input)
assert.Equal(t, test.expect, got)
}
}

func TestParseCPU(t *testing.T) {
tests := []struct {
input string
expect int64
}{
{
input: strconv.FormatInt(8, 10),
expect: int64(8),
},
}

for _, test := range tests {
got := parseCPU(test.input)
assert.Equal(t, test.expect, got)
}
}

func TestGetCurrentAwsRegion(t *testing.T) {
region := "us-west-2"
os.Unsetenv("AWS_REGION")

server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
rw.Write([]byte("{\"region\" : \"" + region + "\"}"))
}))
// Close the server when test finishes
defer server.Close()

ec2MetaDataServiceUrl = server.URL
result, err := GetCurrentAwsRegion()

assert.Nil(t, err)
assert.NotNil(t, result)
assert.Equal(t, region, result)
}

func TestGetCurrentAwsRegionWithRegionEnv(t *testing.T) {
region := "us-west-2"
os.Setenv("AWS_REGION", region)

result, err := GetCurrentAwsRegion()
assert.Nil(t, err)
assert.Equal(t, region, result)
}
Loading

0 comments on commit 9179390

Please sign in to comment.