Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use CreateInstances() API when scaling up in GCE cloud provider #4158

Merged
merged 1 commit into from
Jun 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"time"

"google.golang.org/api/googleapi"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
Expand All @@ -39,6 +40,7 @@ const (
defaultOperationWaitTimeout = 20 * time.Second
defaultOperationPollInterval = 100 * time.Millisecond
defaultOperationDeletionPollInterval = 1 * time.Second
instanceGroupNameSuffix = "-grp"
// ErrorCodeQuotaExceeded is an error code used in InstanceErrorInfo if quota exceeded error occurs.
ErrorCodeQuotaExceeded = "QUOTA_EXCEEDED"

Expand Down Expand Up @@ -75,6 +77,7 @@ type AutoscalingGceClient interface {
// modifying resources
ResizeMig(GceRef, int64) error
DeleteInstances(migRef GceRef, instances []GceRef) error
CreateInstances(GceRef, int64, []string) error
}

type autoscalingGceClientV1 struct {
Expand Down Expand Up @@ -195,6 +198,26 @@ func (client *autoscalingGceClientV1) ResizeMig(migRef GceRef, size int64) error
return client.waitForOp(op, migRef.Project, migRef.Zone, false)
}

func (client *autoscalingGceClientV1) CreateInstances(migRef GceRef, delta int64, existingInstances []string) error {
registerRequest("instance_group_managers", "create_instances")
req := gce.InstanceGroupManagersCreateInstancesRequest{}
instanceNames := map[string]bool{}
for _, inst := range existingInstances {
instanceNames[inst] = true
}
req.Instances = make([]*gce.PerInstanceConfig, 0, delta)
for i := int64(0); i < delta; i++ {
newInstanceName := generateInstanceName(migRef, instanceNames)
instanceNames[newInstanceName] = true
req.Instances = append(req.Instances, &gce.PerInstanceConfig{Name: newInstanceName})
}
op, err := client.gceService.InstanceGroupManagers.CreateInstances(migRef.Project, migRef.Zone, migRef.Name, &req).Do()
if err != nil {
return err
}
return client.waitForOp(op, migRef.Project, migRef.Zone, false)
}

func (client *autoscalingGceClientV1) waitForOp(operation *gce.Operation, project, zone string, isDeletion bool) error {
pollInterval := client.operationPollInterval
if isDeletion {
Expand Down Expand Up @@ -346,6 +369,18 @@ func isInstanceNotRunningYet(gceInstance *gce.ManagedInstance) bool {
return gceInstance.InstanceStatus == "" || gceInstance.InstanceStatus == "PROVISIONING" || gceInstance.InstanceStatus == "STAGING"
}

func generateInstanceName(migRef GceRef, existingNames map[string]bool) string {
for i := 0; i < 100; i++ {
name := fmt.Sprintf("%v-%v", strings.TrimSuffix(migRef.Name, instanceGroupNameSuffix), rand.String(4))
if ok, _ := existingNames[name]; !ok {
return name
}
}
klog.Warning("Unable to create unique name for a new instance, duplicate name might occur")
name := fmt.Sprintf("%v-%v", strings.TrimSuffix(migRef.Name, instanceGroupNameSuffix), rand.String(4))
return name
}

func (client *autoscalingGceClientV1) FetchZones(region string) ([]string, error) {
registerRequest("regions", "get")
r, err := client.gceService.Regions.Get(client.projectId, region).Do()
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ func (mig *gceMig) IncreaseSize(delta int) error {
if int(size)+delta > mig.MaxSize() {
return fmt.Errorf("size increase too large - desired:%d max:%d", int(size)+delta, mig.MaxSize())
}
return mig.gceManager.SetMigSize(mig, size+int64(delta))
return mig.gceManager.CreateInstances(mig, int64(delta))
}

// DecreaseTargetSize decreases the target size of the node group. This function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ func (m *gceManagerMock) GetMigTemplateNode(mig Mig) (*apiv1.Node, error) {
return args.Get(0).(*apiv1.Node), args.Error(1)
}

func (m *gceManagerMock) CreateInstances(mig Mig, delta int64) error {
args := m.Called(mig, delta)
return args.Error(0)
}

func (m *gceManagerMock) getCpuAndMemoryForMachineType(machineType string, zone string) (cpu int64, mem int64, err error) {
args := m.Called(machineType, zone)
return args.Get(0).(int64), args.Get(1).(int64), args.Error(2)
Expand Down Expand Up @@ -266,7 +271,7 @@ func TestMig(t *testing.T) {

// Test IncreaseSize.
gceManagerMock.On("GetMigSize", mock.AnythingOfType("*gce.gceMig")).Return(int64(2), nil).Once()
gceManagerMock.On("SetMigSize", mock.AnythingOfType("*gce.gceMig"), int64(3)).Return(nil).Once()
gceManagerMock.On("CreateInstances", mock.AnythingOfType("*gce.gceMig"), int64(1)).Return(nil).Once()
err = mig1.IncreaseSize(1)
assert.NoError(t, err)
mock.AssertExpectationsForObjects(t, gceManagerMock)
Expand Down
18 changes: 18 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/gce_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ type GceManager interface {
SetMigSize(mig Mig, size int64) error
// DeleteInstances deletes the given instances. All instances must be controlled by the same MIG.
DeleteInstances(instances []GceRef) error
// CreateInstances creates delta new instances in a given mig.
CreateInstances(mig Mig, delta int64) error
}

type gceManagerImpl struct {
Expand Down Expand Up @@ -289,6 +291,22 @@ func (m *gceManagerImpl) Refresh() error {
return m.forceRefresh()
}

func (m *gceManagerImpl) CreateInstances(mig Mig, delta int64) error {
if delta == 0 {
return nil
}
instances, err := m.GetMigNodes(mig)
if err != nil {
return err
}
instancesNames := make([]string, 0, len(instances))
for _, ins := range instances {
instancesNames = append(instancesNames, ins.Id)
}
m.cache.InvalidateMigTargetSize(mig.GceRef())
return m.GceService.CreateInstances(mig.GceRef(), delta, instancesNames)
}

func (m *gceManagerImpl) forceRefresh() error {
m.clearMachinesCache()
if err := m.fetchAutoMigs(); err != nil {
Expand Down
46 changes: 46 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/gce_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1530,3 +1530,49 @@ func TestParseMIGAutoDiscoverySpecs(t *testing.T) {
})
}
}

const createInstancesResponse = `{
"kind": "compute#operation",
"id": "2890052495600280364",
"name": "operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32",
"zone": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b",
"operationType": "compute.instanceGroupManagers.createInstances",
"targetLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool-e25725dc-grp",
"targetId": "7836594831806456968",
"status": "DONE",
"user": "[email protected]",
"progress": 100,
"insertTime": "2021-06-22T05:55:31.903-07:00",
"startTime": "2021-06-22T05:55:31.907-07:00",
"selfLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32"
}`

const createInstancesOperationResponse = `{
"kind": "compute#operation",
"id": "2890052495600280364",
"name": "operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32",
"zone": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b",
"operationType": "compute.instanceGroupManagers.createInstances",
"targetLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool-e25725dc-grp",
"targetId": "7836594831806456968",
"status": "DONE",
"user": "[email protected]",
"progress": 100,
"insertTime": "2021-06-22T05:55:31.903-07:00",
"startTime": "2021-06-22T05:55:31.907-07:00",
"selfLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32"
}`

func TestAppendInstances(t *testing.T) {
server := NewHttpServerMock()
defer server.Close()
g := newTestGceManager(t, server.URL, false)

defaultPoolMig := setupTestDefaultPool(g, true)
server.On("handle", "/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool/listManagedInstances").Return(buildFourRunningInstancesOnDefaultMigManagedInstancesResponse(zoneB)).Once()
server.On("handle", fmt.Sprintf("/project1/zones/us-central1-b/instanceGroupManagers/%v/createInstances", defaultPoolMig.gceRef.Name)).Return(createInstancesResponse).Once()
server.On("handle", "/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32").Return(createInstancesOperationResponse).Once()
err := g.CreateInstances(defaultPoolMig, 2)
assert.NoError(t, err)
mock.AssertExpectationsForObjects(t, server)
}