Merge branch 'master' into chart_rbac

kubernetes · Jul 5, 2021 · 3341c86 · 3341c86
2 parents f5cf35b + 7858da6
commit 3341c86
Show file tree

Hide file tree

Showing 20 changed files with 394 additions and 74 deletions.
diff --git a/charts/cluster-autoscaler/Chart.yaml b/charts/cluster-autoscaler/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-appVersion: 1.20.0
+appVersion: 1.21.0
 description: Scales Kubernetes worker nodes within autoscaling groups.
 engine: gotpl
 home: https://github.com/kubernetes/autoscaler
@@ -17,4 +17,4 @@ name: cluster-autoscaler
 sources:
   - https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
 type: application
-version: 9.9.3
+version: 9.10.01
diff --git a/charts/cluster-autoscaler/README.md b/charts/cluster-autoscaler/README.md
@@ -367,7 +367,7 @@ Though enough for the majority of installations, the default PodSecurityPolicy _
 | image.pullPolicy | string | `"IfNotPresent"` | Image pull policy |
 | image.pullSecrets | list | `[]` | Image pull secrets |
 | image.repository | string | `"k8s.gcr.io/autoscaling/cluster-autoscaler"` | Image repository |
-| image.tag | string | `"v1.20.0"` | Image tag |
+| image.tag | string | `"v1.21.0"` | Image tag |
 | kubeTargetVersionOverride | string | `""` | Allow overriding the `.Capabilities.KubeVersion.GitVersion` check. Useful for `helm template` commands. |
 | magnumCABundlePath | string | `"/etc/kubernetes/ca-bundle.crt"` | Path to the host's CA bundle, from `ca-file` in the cloud-config file. |
 | magnumClusterName | string | `""` | Cluster name or ID in Magnum. Required if `cloudProvider=magnum` and not setting `autoDiscovery.clusterName`. |

diff --git a/charts/cluster-autoscaler/values.yaml b/charts/cluster-autoscaler/values.yaml
@@ -195,7 +195,7 @@ image:
   # image.repository -- Image repository
   repository: k8s.gcr.io/autoscaling/cluster-autoscaler
   # image.tag -- Image tag
-  tag: v1.20.0
+  tag: v1.21.0
   # image.pullPolicy -- Image pull policy
   pullPolicy: IfNotPresent
   ## Optionally specify an array of imagePullSecrets.

diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md
@@ -32,6 +32,7 @@ this document:
   * [How can I scale a node group to 0?](#how-can-i-scale-a-node-group-to-0)
   * [How can I prevent Cluster Autoscaler from scaling down a particular node?](#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node)
   * [How can I configure overprovisioning with Cluster Autoscaler?](#how-can-i-configure-overprovisioning-with-cluster-autoscaler)
+  * [How can I enable/disable eviction for a specific DaemonSet](#how-can-i-enabledisable-eviction-for-a-specific-daemonset)
 * [Internals](#internals)
   * [Are all of the mentioned heuristics and timings final?](#are-all-of-the-mentioned-heuristics-and-timings-final)
   * [How does scale-up work?](#how-does-scale-up-work)
@@ -434,6 +435,30 @@ spec:
       serviceAccountName: cluster-proportional-autoscaler-service-account
 ```
 
+### How can I enable/disable eviction for a specific DaemonSet
+
+Cluster Autoscaler will evict DaemonSets based on its configuration, which is
+common for the entire cluster. It is possible, however, to specify the desired
+behavior on a per pod basis. All DaemonSet pods will be evicted when they have
+the following annotation.
+
+```
+"cluster-autoscaler.kubernetes.io/enable-ds-eviction": "true"
+```
+
+It is also possible to disable DaemonSet pods eviction expicitly:
+
+
+```
+"cluster-autoscaler.kubernetes.io/enable-ds-eviction": "false"
+```
+
+Note that this annotation needs to be specified on DaemonSet pods, not the
+DaemonSet object itself. In order to do that for all DaemonSet pods, it is
+sufficient to modify the pod spec in the DaemonSet object.
+
+This annotation has no effect on pods that are not a part of any DaemonSet.
+
 ****************
 
 # Internals
@@ -512,6 +537,17 @@ What happens when a non-empty node is terminated? As mentioned above, all pods s
 elsewhere. Cluster Autoscaler does this by evicting them and tainting the node, so they aren't
 scheduled there again.
 
+DaemonSet pods may also be evicted. This can be configured separately for empty
+(i.e. containing only DaemonSet pods) and non-empty nodes with
+`--daemonset-eviction-for-empty-nodes` and
+`--daemonset-eviction-for-occupied-nodes` flags, respectively. Note that the
+default behavior is different on each flag: by default DaemonSet pods eviction
+will happen only on occupied nodes.  Individual DaemonSet pods can also
+explicitly choose to be evicted (or not). See [How can I enable/disable eviction
+for a specific
+DaemonSet](#how-can-i-enabledisable-eviction-for-a-specific-daemonset) for more
+details.
+
 Example scenario:
 
 Nodes A, B, C, X, Y.
@@ -690,6 +726,8 @@ The following startup parameters are supported for cluster autoscaler:
 | `skip-nodes-with-system-pods` | If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet or mirror pods) | true
 | `skip-nodes-with-local-storage`| If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath | true
 | `min-replica-count` | Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down | 0
+| `daemonset-eviction-for-empty-nodes` | Whether DaemonSet pods will be gracefully terminated from empty nodes | false
+| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true
 
 # Troubleshooting:
 

diff --git a/cluster-autoscaler/cloudprovider/azure/azure_instance_types.go b/cluster-autoscaler/cloudprovider/azure/azure_instance_types.go
@@ -1972,6 +1972,12 @@ var InstanceTypes = map[string]*InstanceType{
 		MemoryMb:     479232,
 		GPU:          0,
 	},
+	"Standard_HB120rs_v3": {
+		InstanceType: "Standard_HB120rs_v3",
+		VCPU:         120,
+		MemoryMb:     479232,
+		GPU:          0,
+	},
 	"Standard_HB60rs": {
 		InstanceType: "Standard_HB60rs",
 		VCPU:         60,

diff --git a/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go b/cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
@@ -27,6 +27,7 @@ import (
 	"time"
 
 	"google.golang.org/api/googleapi"
+	"k8s.io/apimachinery/pkg/util/rand"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/klogx"
@@ -39,6 +40,7 @@ const (
 	defaultOperationWaitTimeout          = 20 * time.Second
 	defaultOperationPollInterval         = 100 * time.Millisecond
 	defaultOperationDeletionPollInterval = 1 * time.Second
+	instanceGroupNameSuffix              = "-grp"
 	// ErrorCodeQuotaExceeded is an error code used in InstanceErrorInfo if quota exceeded error occurs.
 	ErrorCodeQuotaExceeded = "QUOTA_EXCEEDED"
 
@@ -75,6 +77,7 @@ type AutoscalingGceClient interface {
 	// modifying resources
 	ResizeMig(GceRef, int64) error
 	DeleteInstances(migRef GceRef, instances []GceRef) error
+	CreateInstances(GceRef, int64, []string) error
 }
 
 type autoscalingGceClientV1 struct {
@@ -195,6 +198,26 @@ func (client *autoscalingGceClientV1) ResizeMig(migRef GceRef, size int64) error
 	return client.waitForOp(op, migRef.Project, migRef.Zone, false)
 }
 
+func (client *autoscalingGceClientV1) CreateInstances(migRef GceRef, delta int64, existingInstances []string) error {
+	registerRequest("instance_group_managers", "create_instances")
+	req := gce.InstanceGroupManagersCreateInstancesRequest{}
+	instanceNames := map[string]bool{}
+	for _, inst := range existingInstances {
+		instanceNames[inst] = true
+	}
+	req.Instances = make([]*gce.PerInstanceConfig, 0, delta)
+	for i := int64(0); i < delta; i++ {
+		newInstanceName := generateInstanceName(migRef, instanceNames)
+		instanceNames[newInstanceName] = true
+		req.Instances = append(req.Instances, &gce.PerInstanceConfig{Name: newInstanceName})
+	}
+	op, err := client.gceService.InstanceGroupManagers.CreateInstances(migRef.Project, migRef.Zone, migRef.Name, &req).Do()
+	if err != nil {
+		return err
+	}
+	return client.waitForOp(op, migRef.Project, migRef.Zone, false)
+}
+
 func (client *autoscalingGceClientV1) waitForOp(operation *gce.Operation, project, zone string, isDeletion bool) error {
 	pollInterval := client.operationPollInterval
 	if isDeletion {
@@ -346,6 +369,18 @@ func isInstanceNotRunningYet(gceInstance *gce.ManagedInstance) bool {
 	return gceInstance.InstanceStatus == "" || gceInstance.InstanceStatus == "PROVISIONING" || gceInstance.InstanceStatus == "STAGING"
 }
 
+func generateInstanceName(migRef GceRef, existingNames map[string]bool) string {
+	for i := 0; i < 100; i++ {
+		name := fmt.Sprintf("%v-%v", strings.TrimSuffix(migRef.Name, instanceGroupNameSuffix), rand.String(4))
+		if ok, _ := existingNames[name]; !ok {
+			return name
+		}
+	}
+	klog.Warning("Unable to create unique name for a new instance, duplicate name might occur")
+	name := fmt.Sprintf("%v-%v", strings.TrimSuffix(migRef.Name, instanceGroupNameSuffix), rand.String(4))
+	return name
+}
+
 func (client *autoscalingGceClientV1) FetchZones(region string) ([]string, error) {
 	registerRequest("regions", "get")
 	r, err := client.gceService.Regions.Get(client.projectId, region).Do()

diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
@@ -220,7 +220,7 @@ func (mig *gceMig) IncreaseSize(delta int) error {
 	if int(size)+delta > mig.MaxSize() {
 		return fmt.Errorf("size increase too large - desired:%d max:%d", int(size)+delta, mig.MaxSize())
 	}
-	return mig.gceManager.SetMigSize(mig, size+int64(delta))
+	return mig.gceManager.CreateInstances(mig, int64(delta))
 }
 
 // DecreaseTargetSize decreases the target size of the node group. This function

diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider_test.go
@@ -92,6 +92,11 @@ func (m *gceManagerMock) GetMigTemplateNode(mig Mig) (*apiv1.Node, error) {
 	return args.Get(0).(*apiv1.Node), args.Error(1)
 }
 
+func (m *gceManagerMock) CreateInstances(mig Mig, delta int64) error {
+	args := m.Called(mig, delta)
+	return args.Error(0)
+}
+
 func (m *gceManagerMock) getCpuAndMemoryForMachineType(machineType string, zone string) (cpu int64, mem int64, err error) {
 	args := m.Called(machineType, zone)
 	return args.Get(0).(int64), args.Get(1).(int64), args.Error(2)
@@ -266,7 +271,7 @@ func TestMig(t *testing.T) {
 
 	// Test IncreaseSize.
 	gceManagerMock.On("GetMigSize", mock.AnythingOfType("*gce.gceMig")).Return(int64(2), nil).Once()
-	gceManagerMock.On("SetMigSize", mock.AnythingOfType("*gce.gceMig"), int64(3)).Return(nil).Once()
+	gceManagerMock.On("CreateInstances", mock.AnythingOfType("*gce.gceMig"), int64(1)).Return(nil).Once()
 	err = mig1.IncreaseSize(1)
 	assert.NoError(t, err)
 	mock.AssertExpectationsForObjects(t, gceManagerMock)

diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager.go b/cluster-autoscaler/cloudprovider/gce/gce_manager.go
@@ -95,6 +95,8 @@ type GceManager interface {
 	SetMigSize(mig Mig, size int64) error
 	// DeleteInstances deletes the given instances. All instances must be controlled by the same MIG.
 	DeleteInstances(instances []GceRef) error
+	// CreateInstances creates delta new instances in a given mig.
+	CreateInstances(mig Mig, delta int64) error
 }
 
 type gceManagerImpl struct {
@@ -289,6 +291,22 @@ func (m *gceManagerImpl) Refresh() error {
 	return m.forceRefresh()
 }
 
+func (m *gceManagerImpl) CreateInstances(mig Mig, delta int64) error {
+	if delta == 0 {
+		return nil
+	}
+	instances, err := m.GetMigNodes(mig)
+	if err != nil {
+		return err
+	}
+	instancesNames := make([]string, 0, len(instances))
+	for _, ins := range instances {
+		instancesNames = append(instancesNames, ins.Id)
+	}
+	m.cache.InvalidateMigTargetSize(mig.GceRef())
+	return m.GceService.CreateInstances(mig.GceRef(), delta, instancesNames)
+}
+
 func (m *gceManagerImpl) forceRefresh() error {
 	m.clearMachinesCache()
 	if err := m.fetchAutoMigs(); err != nil {

diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go b/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go
@@ -1530,3 +1530,49 @@ func TestParseMIGAutoDiscoverySpecs(t *testing.T) {
 		})
 	}
 }
+
+const createInstancesResponse = `{
+  "kind": "compute#operation",
+  "id": "2890052495600280364",
+  "name": "operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32",
+  "zone": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b",
+  "operationType": "compute.instanceGroupManagers.createInstances",
+  "targetLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool-e25725dc-grp",
+  "targetId": "7836594831806456968",
+  "status": "DONE",
+  "user": "[email protected]",
+  "progress": 100,
+  "insertTime": "2021-06-22T05:55:31.903-07:00",
+  "startTime": "2021-06-22T05:55:31.907-07:00",
+  "selfLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32"
+}`
+
+const createInstancesOperationResponse = `{
+  "kind": "compute#operation",
+  "id": "2890052495600280364",
+  "name": "operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32",
+  "zone": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b",
+  "operationType": "compute.instanceGroupManagers.createInstances",
+  "targetLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool-e25725dc-grp",
+  "targetId": "7836594831806456968",
+  "status": "DONE",
+  "user": "[email protected]",
+  "progress": 100,
+  "insertTime": "2021-06-22T05:55:31.903-07:00",
+  "startTime": "2021-06-22T05:55:31.907-07:00",
+  "selfLink": "https://www.googleapis.com/compute/v1/projects/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32"
+}`
+
+func TestAppendInstances(t *testing.T) {
+	server := NewHttpServerMock()
+	defer server.Close()
+	g := newTestGceManager(t, server.URL, false)
+
+	defaultPoolMig := setupTestDefaultPool(g, true)
+	server.On("handle", "/project1/zones/us-central1-b/instanceGroupManagers/gke-cluster-1-default-pool/listManagedInstances").Return(buildFourRunningInstancesOnDefaultMigManagedInstancesResponse(zoneB)).Once()
+	server.On("handle", fmt.Sprintf("/project1/zones/us-central1-b/instanceGroupManagers/%v/createInstances", defaultPoolMig.gceRef.Name)).Return(createInstancesResponse).Once()
+	server.On("handle", "/project1/zones/us-central1-b/operations/operation-1624366531120-5c55a4e128c15-fc5daa90-e1ef6c32").Return(createInstancesOperationResponse).Once()
+	err := g.CreateInstances(defaultPoolMig, 2)
+	assert.NoError(t, err)
+	mock.AssertExpectationsForObjects(t, server)
+}
diff --git a/cluster-autoscaler/cloudprovider/hetzner/README.md b/cluster-autoscaler/cloudprovider/hetzner/README.md
@@ -5,11 +5,17 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.
 # Configuration
 
 `HCLOUD_TOKEN` Required Hetzner Cloud token.
+
 `HCLOUD_CLOUD_INIT` Base64 encoded Cloud Init yaml with commands to join the cluster, Sample [examples/cloud-init.txt for (Kubernetes 1.20.1)](examples/cloud-init.txt)
-`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images
+
+`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case.
+
 `HCLOUD_NETWORK` Default empty , The name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks
+
 `HCLOUD_SSH_KEY` Default empty , This SSH Key will have access to the fresh created server, @see https://docs.hetzner.cloud/#ssh-keys
+
 Node groups must be defined with the `--nodes=<min-servers>:<max-servers>:<instance-type>:<region>:<name>` flag.
+
 Multiple flags will create multiple node pools. For example:
 ```
 --nodes=1:10:CPX51:FSN1:pool1

diff --git a/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go b/cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go
@@ -39,7 +39,7 @@ type hetznerManager struct {
 	nodeGroups     map[string]*hetznerNodeGroup
 	apiCallContext context.Context
 	cloudInit      string
-	image          string
+	image          *hcloud.Image
 	sshKey         *hcloud.SSHKey
 	network        *hcloud.Network
 }
@@ -55,18 +55,43 @@ func newManager() (*hetznerManager, error) {
 		return nil, errors.New("`HCLOUD_CLOUD_INIT` is not specified")
 	}
 
-	image := os.Getenv("HCLOUD_IMAGE")
-	if image == "" {
-		image = "ubuntu-20.04"
-	}
-
 	client := hcloud.NewClient(hcloud.WithToken(token))
 	ctx := context.Background()
 	cloudInit, err := base64.StdEncoding.DecodeString(cloudInitBase64)
 	if err != nil {
 		return nil, fmt.Errorf("failed to parse cloud init error: %s", err)
 	}
 
+	imageName := os.Getenv("HCLOUD_IMAGE")
+	if imageName == "" {
+		imageName = "ubuntu-20.04"
+	}
+
+	// Search for an image ID corresponding to the supplied HCLOUD_IMAGE env
+	// variable. This value can either be an image ID itself (an int), a name
+	// (e.g. "ubuntu-20.04"), or a label selector associated with an image
+	// snapshot. In the latter case it will use the most recent snapshot.
+	image, _, err := client.Image.Get(ctx, imageName)
+	if err != nil {
+		return nil, fmt.Errorf("unable to find image %s: %v", imageName, err)
+	}
+	if image == nil {
+		images, err := client.Image.AllWithOpts(ctx, hcloud.ImageListOpts{
+			Type:   []hcloud.ImageType{hcloud.ImageTypeSnapshot},
+			Status: []hcloud.ImageStatus{hcloud.ImageStatusAvailable},
+			Sort:   []string{"created:desc"},
+			ListOpts: hcloud.ListOpts{
+				LabelSelector: imageName,
+			},
+		})
+
+		if err != nil || len(images) == 0 {
+			return nil, fmt.Errorf("unable to find image %s: %v", imageName, err)
+		}
+
+		image = images[0]
+	}
+
 	var network *hcloud.Network
 	networkName := os.Getenv("HCLOUD_NETWORK")