Skip to content

Commit

Permalink
Merge branch 'kubernetes:master' into configure-memory-target
Browse files Browse the repository at this point in the history
  • Loading branch information
emla9 authored Mar 18, 2024
2 parents 3458b79 + eb5d875 commit 478d3e0
Show file tree
Hide file tree
Showing 35 changed files with 879 additions and 155 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v2
with:
go-version: '1.21.6'
go-version: '1.22.1'

- uses: actions/checkout@v2
with:
Expand Down
4 changes: 2 additions & 2 deletions builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM golang:1.21.8
FROM golang:1.22.1
LABEL maintainer="Marcin Wielgus <[email protected]>"

ENV GOPATH /gopath/
Expand All @@ -21,6 +21,6 @@ ENV GO111MODULE auto

RUN apt-get update && apt-get --yes install libseccomp-dev
RUN go version
RUN go get github.com/tools/godep
RUN go install github.com/tools/godep@latest
RUN godep version
CMD ["/bin/bash"]
2 changes: 1 addition & 1 deletion charts/cluster-autoscaler/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ name: cluster-autoscaler
sources:
- https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
type: application
version: 9.35.0
version: 9.36.0
19 changes: 18 additions & 1 deletion charts/cluster-autoscaler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ To create a valid configuration, follow instructions for your cloud provider:
- [Azure](#azure)
- [OpenStack Magnum](#openstack-magnum)
- [Cluster API](#cluster-api)
- [Exoscale](#exoscale)
- [Hetzner Cloud](#hetzner-cloud)

### Templating the autoDiscovery.clusterName

Expand Down Expand Up @@ -222,6 +224,7 @@ $ helm install my-release autoscaler/cluster-autoscaler -f myvalues.yaml
`cloudProvider: clusterapi` must be set, and then one or more of

- `autoDiscovery.clusterName`
- or `autoDiscovery.namespace`
- or `autoDiscovery.labels`

See [here](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md#configuring-node-group-auto-discovery) for more details.
Expand Down Expand Up @@ -259,6 +262,18 @@ $ helm install my-release autoscaler/cluster-autoscaler \

Read [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md) for further information on the setup without helm.

### Hetzner Cloud

The following parameters are required:

- `cloudProvider=hetzner`
- `extraEnv.HCLOUD_TOKEN=...`
- `autoscalingGroups=...`

Each autoscaling group requires an additional `instanceType` and `region` key to be set.

Read [cluster-autoscaler/cloudprovider/hetzner/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md) for further information on the setup without helm.

## Uninstalling the Chart

To uninstall `my-release`:
Expand Down Expand Up @@ -373,15 +388,17 @@ vpa:
| affinity | object | `{}` | Affinity for pod assignment |
| autoDiscovery.clusterName | string | `nil` | Enable autodiscovery for `cloudProvider=aws`, for groups matching `autoDiscovery.tags`. autoDiscovery.clusterName -- Enable autodiscovery for `cloudProvider=azure`, using tags defined in https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/azure/README.md#auto-discovery-setup. Enable autodiscovery for `cloudProvider=clusterapi`, for groups matching `autoDiscovery.labels`. Enable autodiscovery for `cloudProvider=gce`, but no MIG tagging required. Enable autodiscovery for `cloudProvider=magnum`, for groups matching `autoDiscovery.roles`. |
| autoDiscovery.labels | list | `[]` | Cluster-API labels to match https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md#configuring-node-group-auto-discovery |
| autoDiscovery.namespace | string | `nil` | Enable autodiscovery via cluster namespace for for `cloudProvider=clusterapi` |
| autoDiscovery.roles | list | `["worker"]` | Magnum node group roles to match. |
| autoDiscovery.tags | list | `["k8s.io/cluster-autoscaler/enabled","k8s.io/cluster-autoscaler/{{ .Values.autoDiscovery.clusterName }}"]` | ASG tags to match, run through `tpl`. |
| autoscalingGroups | list | `[]` | For AWS, Azure AKS or Magnum. At least one element is required if not using `autoDiscovery`. For example: <pre> - name: asg1<br /> maxSize: 2<br /> minSize: 1 </pre> |
| autoscalingGroups | list | `[]` | For AWS, Azure AKS or Magnum. At least one element is required if not using `autoDiscovery`. For example: <pre> - name: asg1<br /> maxSize: 2<br /> minSize: 1 </pre> For Hetzner Cloud, the `instanceType` and `region` keys are also required. <pre> - name: mypool<br /> maxSize: 2<br /> minSize: 1<br /> instanceType: CPX21<br /> region: FSN1 </pre> |
| autoscalingGroupsnamePrefix | list | `[]` | For GCE. At least one element is required if not using `autoDiscovery`. For example: <pre> - name: ig01<br /> maxSize: 10<br /> minSize: 0 </pre> |
| awsAccessKeyID | string | `""` | AWS access key ID ([if AWS user keys used](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#using-aws-credentials)) |
| awsRegion | string | `"us-east-1"` | AWS region (required if `cloudProvider=aws`) |
| awsSecretAccessKey | string | `""` | AWS access secret key ([if AWS user keys used](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md#using-aws-credentials)) |
| azureClientID | string | `""` | Service Principal ClientID with contributor permission to Cluster and Node ResourceGroup. Required if `cloudProvider=azure` |
| azureClientSecret | string | `""` | Service Principal ClientSecret with contributor permission to Cluster and Node ResourceGroup. Required if `cloudProvider=azure` |
| azureEnableForceDelete | bool | `false` | Whether to force delete VMs or VMSS instances when scaling down. |
| azureResourceGroup | string | `""` | Azure resource group that the cluster is located. Required if `cloudProvider=azure` |
| azureSubscriptionID | string | `""` | Azure subscription where the resources are located. Required if `cloudProvider=azure` |
| azureTenantID | string | `""` | Azure tenant where the resources are located. Required if `cloudProvider=azure` |
Expand Down
15 changes: 15 additions & 0 deletions charts/cluster-autoscaler/README.md.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ To create a valid configuration, follow instructions for your cloud provider:
- [Azure](#azure)
- [OpenStack Magnum](#openstack-magnum)
- [Cluster API](#cluster-api)
- [Exoscale](#exoscale)
- [Hetzner Cloud](#hetzner-cloud)

### Templating the autoDiscovery.clusterName

Expand Down Expand Up @@ -222,6 +224,7 @@ $ helm install my-release autoscaler/cluster-autoscaler -f myvalues.yaml
`cloudProvider: clusterapi` must be set, and then one or more of

- `autoDiscovery.clusterName`
- or `autoDiscovery.namespace`
- or `autoDiscovery.labels`

See [here](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md#configuring-node-group-auto-discovery) for more details.
Expand Down Expand Up @@ -259,6 +262,18 @@ $ helm install my-release autoscaler/cluster-autoscaler \

Read [cluster-autoscaler/cloudprovider/exoscale/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md) for further information on the setup without helm.

### Hetzner Cloud

The following parameters are required:

- `cloudProvider=hetzner`
- `extraEnv.HCLOUD_TOKEN=...`
- `autoscalingGroups=...`

Each autoscaling group requires an additional `instanceType` and `region` key to be set.

Read [cluster-autoscaler/cloudprovider/hetzner/README.md](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md) for further information on the setup without helm.

## Uninstalling the Chart

To uninstall `my-release`:
Expand Down
4 changes: 2 additions & 2 deletions charts/cluster-autoscaler/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if or .Values.autoDiscovery.clusterName .Values.autoscalingGroups -}}
{{- if or ( or .Values.autoDiscovery.clusterName .Values.autoDiscovery.namespace .Values.autoDiscovery.labels ) .Values.autoscalingGroups }}

To verify that cluster-autoscaler has started, run:

Expand All @@ -8,7 +8,7 @@ To verify that cluster-autoscaler has started, run:

##############################################################################
#### ERROR: You must specify values for either ####
#### autoDiscovery.clusterName or autoscalingGroups[] ####
#### autoDiscovery or autoscalingGroups[] ####
##############################################################################

The deployment and pod will not be created and the installation is not functional
Expand Down
48 changes: 38 additions & 10 deletions charts/cluster-autoscaler/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,11 @@ app.kubernetes.io/name: {{ include "cluster-autoscaler.name" . | quote }}


{{/*
Return labels, including instance, name and version.
Return labels, including instance and name.
*/}}
{{- define "cluster-autoscaler.labels" -}}
{{ include "cluster-autoscaler.instance-name" . }}
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
helm.sh/chart: {{ include "cluster-autoscaler.chart" . | quote }}
{{- if .Values.additionalLabels }}
{{ toYaml .Values.additionalLabels }}
Expand Down Expand Up @@ -112,21 +109,52 @@ Return true if the priority expander is enabled
{{- end -}}

{{/*
Return the autodiscoveryparameters for clusterapi.
autoDiscovery.clusterName for clusterapi.
*/}}
{{- define "cluster-autoscaler.capiAutodiscoveryConfig" -}}
{{- if .Values.autoDiscovery.clusterName -}}
{{- define "cluster-autoscaler.capiAutodiscovery.clusterName" -}}
{{- print "clusterName=" -}}{{ tpl (.Values.autoDiscovery.clusterName) . }}
{{- end -}}
{{- if and .Values.autoDiscovery.clusterName .Values.autoDiscovery.labels -}}
{{- print "," -}}

{{/*
autoDiscovery.namespace for clusterapi.
*/}}
{{- define "cluster-autoscaler.capiAutodiscovery.namespace" -}}
{{- print "namespace=" }}{{ .Values.autoDiscovery.namespace -}}
{{- end -}}
{{- if .Values.autoDiscovery.labels -}}

{{/*
autoDiscovery.labels for clusterapi.
*/}}
{{- define "cluster-autoscaler.capiAutodiscovery.labels" -}}
{{- range $i, $el := .Values.autoDiscovery.labels -}}
{{- if $i -}}{{- print "," -}}{{- end -}}
{{- range $key, $val := $el -}}
{{- $key -}}{{- print "=" -}}{{- $val -}}
{{- end -}}
{{- end -}}
{{- end -}}

{{/*
Return the autodiscoveryparameters for clusterapi.
*/}}
{{- define "cluster-autoscaler.capiAutodiscoveryConfig" -}}
{{- if .Values.autoDiscovery.clusterName -}}
{{ include "cluster-autoscaler.capiAutodiscovery.clusterName" . }}
{{- if .Values.autoDiscovery.namespace }}
{{- print "," -}}
{{ include "cluster-autoscaler.capiAutodiscovery.namespace" . }}
{{- end -}}
{{- if .Values.autoDiscovery.labels }}
{{- print "," -}}
{{ include "cluster-autoscaler.capiAutodiscovery.labels" . }}
{{- end -}}
{{- else if .Values.autoDiscovery.namespace -}}
{{ include "cluster-autoscaler.capiAutodiscovery.namespace" . }}
{{- if .Values.autoDiscovery.labels }}
{{- print "," -}}
{{ include "cluster-autoscaler.capiAutodiscovery.labels" . }}
{{- end -}}
{{- else if .Values.autoDiscovery.labels -}}
{{ include "cluster-autoscaler.capiAutodiscovery.labels" . }}
{{- end -}}
{{- end -}}
10 changes: 8 additions & 2 deletions charts/cluster-autoscaler/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if or ( or .Values.autoDiscovery.clusterName .Values.autoDiscovery.labels ) .Values.autoscalingGroups }}
{{- if or ( or .Values.autoDiscovery.clusterName .Values.autoDiscovery.namespace .Values.autoDiscovery.labels ) .Values.autoscalingGroups }}
{{/* one of the above is required */}}
apiVersion: {{ template "deployment.apiVersion" . }}
kind: Deployment
Expand Down Expand Up @@ -60,8 +60,12 @@ spec:
{{- end }}
{{- if .Values.autoscalingGroups }}
{{- range .Values.autoscalingGroups }}
{{- if eq $.Values.cloudProvider "hetzner" }}
- --nodes={{ .minSize }}:{{ .maxSize }}:{{ .instanceType }}:{{ .region }}:{{ .name }}
{{- else }}
- --nodes={{ .minSize }}:{{ .maxSize }}:{{ .name }}
{{- end }}
{{- end }}
{{- end }}
{{- if eq .Values.cloudProvider "rancher" }}
{{- if .Values.cloudConfigPath }}
Expand Down Expand Up @@ -95,7 +99,7 @@ spec:
- --cluster-name={{ tpl (.Values.magnumClusterName) . }}
{{- end }}
{{- else if eq .Values.cloudProvider "clusterapi" }}
{{- if or .Values.autoDiscovery.clusterName .Values.autoDiscovery.labels }}
{{- if or .Values.autoDiscovery.clusterName .Values.autoDiscovery.labels .Values.autoDiscovery.namepace }}
- --node-group-auto-discovery=clusterapi:{{ template "cluster-autoscaler.capiAutodiscoveryConfig" . }}
{{- end }}
{{- if eq .Values.clusterAPIMode "incluster-kubeconfig"}}
Expand Down Expand Up @@ -166,6 +170,8 @@ spec:
secretKeyRef:
key: VMType
name: {{ default (include "cluster-autoscaler.fullname" .) .Values.secretKeyRefNameOverride }}
- name: AZURE_ENABLE_FORCE_DELETE
value: "{{ .Values.azureEnableForceDelete }}"
{{- if .Values.azureUseWorkloadIdentityExtension }}
- name: ARM_USE_WORKLOAD_IDENTITY_EXTENSION
value: "true"
Expand Down
14 changes: 14 additions & 0 deletions charts/cluster-autoscaler/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ autoDiscovery:
# Enable autodiscovery for `cloudProvider=magnum`, for groups matching `autoDiscovery.roles`.
clusterName: # cluster.local

# autoDiscovery.namespace -- Enable autodiscovery via cluster namespace for for `cloudProvider=clusterapi`
namespace: # default

# autoDiscovery.tags -- ASG tags to match, run through `tpl`.
tags:
- k8s.io/cluster-autoscaler/enabled
Expand All @@ -36,6 +39,14 @@ autoDiscovery:
# maxSize: 2<br />
# minSize: 1
# </pre>
# For Hetzner Cloud, the `instanceType` and `region` keys are also required.
# <pre>
# - name: mypool<br />
# maxSize: 2<br />
# minSize: 1<br />
# instanceType: CPX21<br />
# region: FSN1
# </pre>
autoscalingGroups: []
# - name: asg1
# maxSize: 2
Expand Down Expand Up @@ -96,6 +107,9 @@ azureUseWorkloadIdentityExtension: false
# azureVMType -- Azure VM type.
azureVMType: "vmss"

# azureEnableForceDelete -- Whether to force delete VMs or VMSS instances when scaling down.
azureEnableForceDelete: false

# cloudConfigPath -- Configuration file for cloud provider.
cloudConfigPath: ""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ func (ali *aliCloudProvider) NodeGroups() []cloudprovider.NodeGroup {

// NodeGroupForNode returns the node group for the given node.
func (ali *aliCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
if len(node.Spec.ProviderID) == 0 {
klog.Warningf("Node %v has no providerId", node.Name)
return nil, nil
}
instanceId, err := ecsInstanceIdFromProviderId(node.Spec.ProviderID)
if err != nil {
klog.Errorf("failed to get instance Id from provider Id:%s,because of %s", node.Spec.ProviderID, err.Error())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func newTestAzureManager(t *testing.T) *AzureManager {
VMType: vmTypeVMSS,
MaxDeploymentsCount: 2,
Deployment: "deployment",
EnableForceDelete: true,
},
azClient: &azClient{
virtualMachineScaleSetsClient: mockVMSSClient,
Expand Down
10 changes: 10 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ type Config struct {
CloudProviderBackoffDuration int `json:"cloudProviderBackoffDuration,omitempty" yaml:"cloudProviderBackoffDuration,omitempty"`
CloudProviderBackoffJitter float64 `json:"cloudProviderBackoffJitter,omitempty" yaml:"cloudProviderBackoffJitter,omitempty"`

// EnableForceDelete defines whether to enable force deletion on the APIs
EnableForceDelete bool `json:"enableForceDelete,omitempty" yaml:"enableForceDelete,omitempty"`

// EnableDynamicInstanceList defines whether to enable dynamic instance workflow for instance information check
EnableDynamicInstanceList bool `json:"enableDynamicInstanceList,omitempty" yaml:"enableDynamicInstanceList,omitempty"`

Expand Down Expand Up @@ -303,6 +306,13 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
}
}

if enableForceDelete := os.Getenv("AZURE_ENABLE_FORCE_DELETE"); enableForceDelete != "" {
cfg.EnableForceDelete, err = strconv.ParseBool(enableForceDelete)
if err != nil {
return nil, fmt.Errorf("failed to parse AZURE_ENABLE_FORCE_DELETE: %q, %v", enableForceDelete, err)
}
}

err = initializeCloudProviderRateLimitConfig(&cfg.CloudProviderRateLimitConfig)
if err != nil {
return nil, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,7 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
minSize: minVal,
maxSize: maxVal,
manager: manager,
enableForceDelete: manager.config.EnableForceDelete,
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
Expand Down
28 changes: 26 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ type ScaleSet struct {
minSize int
maxSize int

enableForceDelete bool

sizeMutex sync.Mutex
curSize int64

Expand Down Expand Up @@ -87,6 +89,7 @@ func NewScaleSet(spec *dynamic.NodeGroupSpec, az *AzureManager, curSize int64) (
sizeRefreshPeriod: az.azureCache.refreshInterval,
enableDynamicInstanceList: az.config.EnableDynamicInstanceList,
instancesRefreshJitter: az.config.VmssVmsCacheJitter,
enableForceDelete: az.config.EnableForceDelete,
}

if az.config.VmssVmsCacheTTL != 0 {
Expand Down Expand Up @@ -251,6 +254,16 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error {
Sku: vmssInfo.Sku,
Location: vmssInfo.Location,
}

if vmssInfo.ExtendedLocation != nil {
op.ExtendedLocation = &compute.ExtendedLocation{
Name: vmssInfo.ExtendedLocation.Name,
Type: vmssInfo.ExtendedLocation.Type,
}

klog.V(3).Infof("Passing ExtendedLocation information if it is not nil, with Edge Zone name:(%s)", *op.ExtendedLocation.Name)
}

ctx, cancel := getContextWithTimeout(vmssContextTimeout)
defer cancel()
klog.V(3).Infof("Waiting for virtualMachineScaleSetsClient.CreateOrUpdateAsync(%s)", scaleSet.Name)
Expand Down Expand Up @@ -437,8 +450,15 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregistered
resourceGroup := scaleSet.manager.config.ResourceGroup

scaleSet.instanceMutex.Lock()
klog.V(3).Infof("Calling virtualMachineScaleSetsClient.DeleteInstancesAsync(%v)", requiredIds.InstanceIds)
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsg.Id(), *requiredIds, false)
klog.V(3).Infof("Calling virtualMachineScaleSetsClient.DeleteInstancesAsync(%v), force delete set to %v", requiredIds.InstanceIds, scaleSet.enableForceDelete)
future, rerr := scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup, commonAsg.Id(), *requiredIds, scaleSet.enableForceDelete)

if scaleSet.enableForceDelete && isOperationNotAllowed(rerr) {
klog.Infof("falling back to normal delete for instances %v for %s", requiredIds.InstanceIds, scaleSet.Name)
future, rerr = scaleSet.manager.azClient.virtualMachineScaleSetsClient.DeleteInstancesAsync(ctx, resourceGroup,
commonAsg.Id(), *requiredIds, false)
}

scaleSet.instanceMutex.Unlock()
if rerr != nil {
klog.Errorf("virtualMachineScaleSetsClient.DeleteInstancesAsync for instances %v failed: %v", requiredIds.InstanceIds, rerr)
Expand Down Expand Up @@ -746,3 +766,7 @@ func (scaleSet *ScaleSet) getOrchestrationMode() (compute.OrchestrationMode, err
}
return vmss.OrchestrationMode, nil
}

func isOperationNotAllowed(rerr *retry.Error) bool {
return rerr != nil && rerr.ServiceErrorCode() == retry.OperationNotAllowed
}
Loading

0 comments on commit 478d3e0

Please sign in to comment.