Skip to content

Commit

Permalink
Support autodetection of GCE managed instance groups by name prefix
Browse files Browse the repository at this point in the history
This commit adds a new usage of the --node-group-auto-discovery flag intended
for use with the GCE cloud provider. GCE instance groups can be automatically
discovered based on a prefix of their group name. Example usage:

--node-group-auto-discovery=mig:prefix=k8s-mig,minNodes=0,maxNodes=10

Note that unlike the existing AWS ASG autodetection functionality we must
specify the min and max nodes in the flag. This is because MIGs store only
a target size in the GCE API - they do not have a min and max size we can
infer via the API.

In order to alleviate this limitation a little we allow multiple uses of the
autodiscovery flag. For example to discover two classes (big and small) of
instance groups with different size limits:

./cluster-autoscaler \
  --node-group-auto-discovery=mig:prefix=k8s-a-small,minNodes=1,maxNodes=10 \
  --node-group-auto-discovery=mig:prefix=k8s-a-big,minNodes=1,maxNodes=100

Zonal clusters (i.e. multizone = false in the cloud config) will detect all
managed instance groups within the cluster's zone. Regional clusters will
detect all matching (zonal) managed instance groups within any of that region's
zones.
  • Loading branch information
Nic Cope committed Nov 11, 2017
1 parent ebee079 commit 81b6010
Show file tree
Hide file tree
Showing 12 changed files with 445 additions and 47 deletions.
36 changes: 26 additions & 10 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ func BuildAwsCloudProvider(awsManager *AwsManager, discoveryOpts cloudprovider.N
return buildStaticallyDiscoveringProvider(awsManager, discoveryOpts.NodeGroupSpecs, resourceLimiter)
}
if discoveryOpts.AutoDiscoverySpecified() {
return buildAutoDiscoveringProvider(awsManager, discoveryOpts.NodeGroupAutoDiscoverySpec, resourceLimiter)
return buildAutoDiscoveringProvider(awsManager, discoveryOpts.NodeGroupAutoDiscoverySpecs, resourceLimiter)
}
return nil, fmt.Errorf("Failed to build an aws cloud provider: Either node group specs or node group auto discovery spec must be specified")
}

func buildAutoDiscoveringProvider(awsManager *AwsManager, spec string, resourceLimiter *cloudprovider.ResourceLimiter) (*awsCloudProvider, error) {
func parseAutoDiscoverySpec(spec string) ([]string, error) {
tokens := strings.Split(spec, ":")
if len(tokens) != 2 {
return nil, fmt.Errorf("Invalid node group auto discovery spec specified via --node-group-auto-discovery: %s", spec)
Expand All @@ -72,20 +72,36 @@ func buildAutoDiscoveringProvider(awsManager *AwsManager, spec string, resourceL
// Use the k8s cluster name tag to only discover asgs of the cluster denoted by clusterName
// See https://github.com/kubernetes/kubernetes/blob/9ef85a7/pkg/cloudprovider/providers/aws/tags.go#L30-L34
// for more information about the tag
tags := strings.Split(tag, ",")
asgs, err := awsManager.getAutoscalingGroupsByTags(tags)
if err != nil {
return nil, fmt.Errorf("Failed to get ASGs: %v", err)
}

return strings.Split(tag, ","), nil
}
func buildAutoDiscoveringProvider(awsManager *AwsManager, specs []string, resourceLimiter *cloudprovider.ResourceLimiter) (*awsCloudProvider, error) {
aws := &awsCloudProvider{
awsManager: awsManager,
asgs: make([]*Asg, 0),
resourceLimiter: resourceLimiter,
}
for _, asg := range asgs {
aws.addAsg(buildAsg(aws.awsManager, int(*asg.MinSize), int(*asg.MaxSize), *asg.AutoScalingGroupName))

seen := make(map[string]bool)
for _, spec := range specs {
tags, err := parseAutoDiscoverySpec(spec)
if err != nil {
return nil, err
}
asgs, err := awsManager.getAutoscalingGroupsByTags(tags)
if err != nil {
return nil, fmt.Errorf("Failed to get ASGs: %v", err)
}
for _, asg := range asgs {
// An ASG might match more than one provided spec, but we only ever
// want to add it once.
if seen[*asg.AutoScalingGroupARN] {
continue
}
seen[*asg.AutoScalingGroupARN] = true
aws.addAsg(buildAsg(aws.awsManager, int(*asg.MinSize), int(*asg.MaxSize), *asg.AutoScalingGroupName))
}
}

return aws, nil
}

Expand Down
19 changes: 19 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,25 @@ func TestBuildAwsCloudProvider(t *testing.T) {
assert.NoError(t, err)
}

func TestParseAutoDiscoverySpec(t *testing.T) {
want := []string{"coolTag", "anotherTag"}
got, err := parseAutoDiscoverySpec("asg:tag=coolTag,anotherTag")
assert.NoError(t, err)
assert.Equal(t, want, got)

badSpecs := []string{
"asg",
"tag=coolTag,anotherTag",
"mig:tag=coolTag,anotherTag",
"asg:notatag=coolTag,anotherTag",
}

for _, spec := range badSpecs {
_, err = parseAutoDiscoverySpec(spec)
assert.Error(t, err)
}
}

func TestAddNodeGroup(t *testing.T) {
provider := testProvider(t, testAwsManager)
err := provider.addNodeGroup("bad spec")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func (b CloudProviderBuilder) Build(discoveryOpts cloudprovider.NodeGroupDiscove
if gceError != nil {
glog.Fatalf("Failed to create GCE Manager: %v", gceError)
}
cloudProvider, err = gce.BuildGceCloudProvider(gceManager, nodeGroupsFlag, resourceLimiter)
cloudProvider, err = gce.BuildGceCloudProvider(gceManager, discoveryOpts, resourceLimiter)
if err != nil {
glog.Fatalf("Failed to create GCE cloud provider: %v", err)
}
Expand Down
100 changes: 98 additions & 2 deletions cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package gce

import (
"fmt"
"regexp"
"strconv"
"strings"
"time"

Expand All @@ -32,8 +34,19 @@ import (
const (
maxAutoprovisionedSize = 1000
minAutoprovisionedSize = 0

autoDiscovererTypeMIG = "mig"
autoDiscovererKeyPrefix = "prefix"
autoDiscovererKeyMinNodes = "min"
autoDiscovererKeyMaxNodes = "max"
)

var validAutoDiscovererKeys = strings.Join([]string{
autoDiscovererKeyPrefix,
autoDiscovererKeyMinNodes,
autoDiscovererKeyMaxNodes,
}, ", ")

// Big machines are temporarily commented out.
// TODO(mwielgus): get this list programatically
var autoprovisionedMachineTypes = []string{
Expand Down Expand Up @@ -66,11 +79,20 @@ type GceCloudProvider struct {
}

// BuildGceCloudProvider builds CloudProvider implementation for GCE.
func BuildGceCloudProvider(gceManager GceManager, specs []string, resourceLimiter *cloudprovider.ResourceLimiter) (*GceCloudProvider, error) {
if gceManager.getMode() == ModeGKE && len(specs) != 0 {
func BuildGceCloudProvider(gceManager GceManager, do cloudprovider.NodeGroupDiscoveryOptions, resourceLimiter *cloudprovider.ResourceLimiter) (*GceCloudProvider, error) {
if err := do.Validate(); err != nil {
return nil, fmt.Errorf("Failed to build a GCE cloud provider: %v", err)
}
if gceManager.getMode() == ModeGKE && !do.NoDiscoverySpecified() {
return nil, fmt.Errorf("GKE gets nodegroup specification via API, command line specs are not allowed")
}
if do.AutoDiscoverySpecified() {
return buildAutoDiscoveringProvider(gceManager, do.NodeGroupAutoDiscoverySpecs, resourceLimiter)
}
return buildStaticallyDiscoveringProvider(gceManager, do.NodeGroupSpecs, resourceLimiter)
}

func buildStaticallyDiscoveringProvider(gceManager GceManager, specs []string, resourceLimiter *cloudprovider.ResourceLimiter) (*GceCloudProvider, error) {
gce := &GceCloudProvider{
gceManager: gceManager,
resourceLimiterFromFlags: resourceLimiter,
Expand All @@ -83,6 +105,80 @@ func BuildGceCloudProvider(gceManager GceManager, specs []string, resourceLimite
return gce, nil
}

type autoDiscovererConfig struct {
migRe *regexp.Regexp
minNodes string
maxNodes string
}

func parseAutoDiscoverySpec(spec string) (autoDiscovererConfig, error) {
cfg := autoDiscovererConfig{}

tokens := strings.Split(spec, ":")
if len(tokens) != 2 {
return cfg, fmt.Errorf("spec \"%s\" should be discoverer:key=value,key=value", spec)
}
discoverer := tokens[0]
if discoverer != autoDiscovererTypeMIG {
return cfg, fmt.Errorf("unsupported discoverer specified: %s", discoverer)
}

for _, arg := range strings.Split(tokens[1], ",") {
kv := strings.Split(arg, "=")
k, v := kv[0], kv[1]

switch k {
case autoDiscovererKeyPrefix:
var err error
if cfg.migRe, err = regexp.Compile(fmt.Sprintf("^%s.+", v)); err != nil {
return cfg, fmt.Errorf("invalid instance group name prefix \"%s\" - \"^%s.+\" must be a valid RE2 regexp", v, v)
}
case autoDiscovererKeyMinNodes:
if _, err := strconv.Atoi(v); err != nil {
return cfg, fmt.Errorf("invalid minimum nodes: %s", v)
}
cfg.minNodes = v
case autoDiscovererKeyMaxNodes:
if _, err := strconv.Atoi(v); err != nil {
return cfg, fmt.Errorf("invalid maximum nodes: %s", v)
}
cfg.maxNodes = v
default:
return cfg, fmt.Errorf("unsupported key \"%s\" is specified for discoverer \"%s\". Supported keys are \"%s\"", k, discoverer, validAutoDiscovererKeys)
}
}
return cfg, nil
}

func buildAutoDiscoveringProvider(gceManager GceManager, specs []string, resourceLimiter *cloudprovider.ResourceLimiter) (*GceCloudProvider, error) {
gce := &GceCloudProvider{gceManager: gceManager, resourceLimiterFromFlags: resourceLimiter}

seen := make(map[string]bool)
for _, spec := range specs {
cfg, err := parseAutoDiscoverySpec(spec)
if err != nil {
return nil, fmt.Errorf("invalid node group auto discovery spec \"%s\": %v", spec, err)
}
links, err := gceManager.findMigsNamed(cfg.migRe)
if err != nil {
return nil, fmt.Errorf("cannot autodiscover managed instance groups: %s", err)
}
for _, link := range links {
// A MIG might match more than one provided spec, but we only ever
// want to add it once.
if seen[link] {
continue
}
seen[link] = true
spec := fmt.Sprintf("%s:%s:%s", cfg.minNodes, cfg.maxNodes, link)
if err := gce.addNodeGroup(spec); err != nil {
return nil, err
}
}
}
return gce, nil
}

// Cleanup cleans up all resources before the cloud provider is removed
func (gce *GceCloudProvider) Cleanup() error {
gce.gceManager.Cleanup()
Expand Down
116 changes: 104 additions & 12 deletions cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ limitations under the License.
package gce

import (
"errors"
"fmt"
"net/http"
"reflect"
"regexp"
"strings"
"testing"

Expand Down Expand Up @@ -122,7 +124,12 @@ func (m *gceManagerMock) GetResourceLimiter() (*cloudprovider.ResourceLimiter, e
return args.Get(0).(*cloudprovider.ResourceLimiter), args.Error(1)
}

func TestBuildGceCloudProvider(t *testing.T) {
func (m *gceManagerMock) findMigsNamed(name *regexp.Regexp) ([]string, error) {
args := m.Called()
return args.Get(0).([]string), args.Error(1)
}

func TestBuildStaticGceCloudProvider(t *testing.T) {
gceManagerMock := &gceManagerMock{}

ng1Name := "https://content.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroups/ng1"
Expand All @@ -132,37 +139,122 @@ func TestBuildGceCloudProvider(t *testing.T) {
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})

// GCE mode.
// GCE mode with explicit node groups.
gceManagerMock.On("getMode").Return(ModeGCE).Once()
gceManagerMock.On("RegisterMig",
mock.MatchedBy(func(mig *Mig) bool {
return mig.Name == "ng1" || mig.Name == "ng2"
})).Return(true).Times(2)

provider, err := BuildGceCloudProvider(gceManagerMock,
[]string{"0:10:" + ng1Name, "0:5:https:" + ng2Name},
resourceLimiter)
do := cloudprovider.NodeGroupDiscoveryOptions{
NodeGroupSpecs: []string{"0:10:" + ng1Name, "0:5:https:" + ng2Name},
}
provider, err := BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.NoError(t, err)
assert.NotNil(t, provider)
mock.AssertExpectationsForObjects(t, gceManagerMock)

// GKE mode.
// Error on GKE mode with specs.
gceManagerMock.On("getMode").Return(ModeGKE).Once()
_, err = BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.Error(t, err)
assert.Equal(t, "GKE gets nodegroup specification via API, command line specs are not allowed", err.Error())
mock.AssertExpectationsForObjects(t, gceManagerMock)

provider, err = BuildGceCloudProvider(gceManagerMock, []string{}, resourceLimiter)
// Ensure GKE mode works with no specs.
gceManagerMock.On("getMode").Return(ModeGKE).Once()
do = cloudprovider.NodeGroupDiscoveryOptions{}
provider, err = BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.NoError(t, err)
assert.NotNil(t, provider)
mock.AssertExpectationsForObjects(t, gceManagerMock)

// Error on GKE mode with specs.
gceManagerMock.On("getMode").Return(ModeGKE).Once()
// Error with both explicit and autodiscovery specs.
do = cloudprovider.NodeGroupDiscoveryOptions{
NodeGroupSpecs: []string{"0:10:" + ng1Name, "0:5:https:" + ng2Name},
NodeGroupAutoDiscoverySpecs: []string{"mig:prefix=pfx,min=0,max=10"},
}
_, err = BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.Error(t, err)
mock.AssertExpectationsForObjects(t, gceManagerMock)
}
func TestBuildAutodiscoveringGceCloudProvider(t *testing.T) {
gceManagerMock := &gceManagerMock{}

provider, err = BuildGceCloudProvider(gceManagerMock,
[]string{"0:10:" + ng1Name, "0:5:https:" + ng2Name},
resourceLimiter)
ng1Name := "https://content.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroups/ng1"
ng2Name := "https://content.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroups/ng2"

resourceLimiter := cloudprovider.NewResourceLimiter(
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})

// GCE mode with autodiscovery.
gceManagerMock.On("getMode").Return(ModeGCE).Once()
gceManagerMock.On("findMigsNamed").Return([]string{ng1Name, ng2Name}, nil).Twice()
gceManagerMock.On("RegisterMig",
mock.MatchedBy(func(mig *Mig) bool {
return mig.Name == "ng1" || mig.Name == "ng2"
})).Return(true).Times(2)

do := cloudprovider.NodeGroupDiscoveryOptions{
NodeGroupAutoDiscoverySpecs: []string{
"mig:prefix=ng,min=0,max=10",
"mig:prefix=n,min=1,max=2",
},
}
provider, err := BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.NoError(t, err)
assert.NotNil(t, provider)
mock.AssertExpectationsForObjects(t, gceManagerMock)

// Error finding instance groups
gceManagerMock.On("getMode").Return(ModeGCE).Once()
gceManagerMock.On("findMigsNamed").Return([]string{}, errors.New("nope")).Once()
_, err = BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.Error(t, err)
assert.Equal(t, "cannot autodiscover managed instance groups: nope", err.Error())
mock.AssertExpectationsForObjects(t, gceManagerMock)

// Error on GKE mode with autodiscovery specs.
gceManagerMock.On("getMode").Return(ModeGKE).Once()
_, err = BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.Error(t, err)
assert.Equal(t, "GKE gets nodegroup specification via API, command line specs are not allowed", err.Error())
mock.AssertExpectationsForObjects(t, gceManagerMock)

// Bad autodiscovery spec
do = cloudprovider.NodeGroupDiscoveryOptions{
NodeGroupAutoDiscoverySpecs: []string{"mig"},
}
gceManagerMock.On("getMode").Return(ModeGCE).Once()
_, err = BuildGceCloudProvider(gceManagerMock, do, resourceLimiter)
assert.Error(t, err)
mock.AssertExpectationsForObjects(t, gceManagerMock)
}

func TestParseAutoDiscoverySpec(t *testing.T) {
want := autoDiscovererConfig{
migRe: regexp.MustCompile("^pfx.+"),
minNodes: "0",
maxNodes: "10",
}
got, err := parseAutoDiscoverySpec("mig:prefix=pfx,min=0,max=10")
assert.NoError(t, err)
assert.Equal(t, want, got)

badSpecs := []string{
"prefix=pfx,min=0,max=10",
"asg:prefix=pfx,min=0,max=10",
"mig:prefix=pfx,min=0,max=10,unknown=hi",
"mig:prefix=pfx,min=a,max=10",
"mig:prefix=pfx,min=10,max=donkey",
"mig:prefix=(a,min=1,max=10",
}

for _, spec := range badSpecs {
_, err = parseAutoDiscoverySpec(spec)
assert.Error(t, err)
}
}

func TestNodeGroups(t *testing.T) {
Expand Down
Loading

0 comments on commit 81b6010

Please sign in to comment.