Skip to content

Commit

Permalink
Merge pull request kubernetes#3203 from detiber/configSplit2
Browse files Browse the repository at this point in the history
[cluster-autoscaler] Support using --cloud-config for clusterapi provider
  • Loading branch information
k8s-ci-robot authored and Ben Moss committed Sep 25, 2020
1 parent e1979a9 commit d939603
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 66 deletions.
76 changes: 68 additions & 8 deletions cluster-autoscaler/cloudprovider/clusterapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,6 @@ cluster.
The cluster-api provider requires Kubernetes v1.16 or greater to run the
v1alpha3 version of the API.

## Cluster API Prerequisites

Please be aware that currently the cluster autoscaler only supports CAPI
clusters that have joined their management and workload clusters into a single
cluster. For more information about this please see the
[Cluster API Concepts documentations](https://cluster-api.sigs.k8s.io/user/concepts.html)
and the [`clusterctl move` command documentation](https://cluster-api.sigs.k8s.io/user/concepts.html).

## Starting the Autoscaler

To enable the Cluster API provider, you must first specify it in the command
Expand All @@ -31,6 +23,74 @@ Please note, this example only shows the cloud provider options, you will
most likely need other command line flags. For more information you can invoke
`cluster-autoscaler --help` to see a full list of options.

## Configuring node group auto discovery

If you do not configure node group auto discovery, cluster autoscaler will attempt
to match nodes against any scalable resources found in any namespace and belonging
to any Cluster.

Limiting cluster autoscaler to only match against resources in the blue namespace

```
--node-group-auto-discovery=clusterapi:namespace=blue
```

Limiting cluster autoscaler to only match against resources belonging to Cluster test1

```
--node-group-auto-discovery=clusterapi:clusterName=test1
```

Limiting cluster autoscaler to only match against resources matching the provided labels

```
--node-group-auto-discovery=clusterapi:color=green,shape=square
```

These can be mixed and matched in any combination, for example to only match resources
in the staging namespace, belonging to the purple cluster, with the label owner=jim:

```
--node-group-auto-discovery=clusterapi:namespace=staging,clusterName=purple,owner=jim
```

## Connecting cluster-autoscaler to Cluster API management and workload Clusters

You will also need to provide the path to the kubeconfig(s) for the management
and workload cluster you wish cluster-autoscaler to run against. To specify the
kubeconfig path for the workload cluster to monitor, use the `--kubeconfig`
option and supply the path to the kubeconfig. If the `--kubeconfig` option is
not specified, cluster-autoscaler will attempt to use an in-cluster configuration.
To specify the kubeconfig path for the management cluster to monitor, use the
`--cloud-config` option and supply the path to the kubeconfig. If the
`--cloud-config` option is not specified it will fall back to using the kubeconfig
that was provided with the `--kubeconfig` option.

Use in-cluster config for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi
```

Use in-cluster config for workload cluster, specify kubeconfig for management cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --cloud-config=/mnt/kubeconfig
```

Use in-cluster config for management cluster, specify kubeconfig for workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/kubeconfig --clusterapi-cloud-config-authoritative
```

Use separate kubeconfigs for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/workload.kubeconfig --cloud-config=/mnt/management.kubeconfig
```

Use a single provided kubeconfig for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/workload.kubeconfig
```

## Enabling Autoscaling

To enable the automatic scaling of components in your cluster-api managed
Expand Down
34 changes: 28 additions & 6 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,28 +136,50 @@ func newProvider(

// BuildClusterAPI builds CloudProvider implementation for machine api.
func BuildClusterAPI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
externalConfig, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfigPath)
managementKubeconfig := opts.CloudConfig
if managementKubeconfig == "" && !opts.ClusterAPICloudConfigAuthoritative {
managementKubeconfig = opts.KubeConfigPath
}

managementConfig, err := clientcmd.BuildConfigFromFlags("", managementKubeconfig)
if err != nil {
klog.Fatalf("cannot build config: %v", err)
klog.Fatalf("cannot build management cluster config: %v", err)
}

workloadKubeconfig := opts.KubeConfigPath

workloadConfig, err := clientcmd.BuildConfigFromFlags("", workloadKubeconfig)
if err != nil {
klog.Fatalf("cannot build workload cluster config: %v", err)
}

// Grab a dynamic interface that we can create informers from
dc, err := dynamic.NewForConfig(externalConfig)
managementClient, err := dynamic.NewForConfig(managementConfig)
if err != nil {
klog.Fatalf("could not generate dynamic client for config")
}

kubeClient, err := kubernetes.NewForConfig(externalConfig)
workloadClient, err := kubernetes.NewForConfig(workloadConfig)
if err != nil {
klog.Fatalf("create kube clientset failed: %v", err)
}

discoveryClient, err := discovery.NewDiscoveryClientForConfig(externalConfig)
managementDiscoveryClient, err := discovery.NewDiscoveryClientForConfig(managementConfig)
if err != nil {
klog.Fatalf("create discovery client failed: %v", err)
}

controller, err := newMachineController(dc, kubeClient, discoveryClient)
cachedDiscovery := memory.NewMemCacheClient(managementDiscoveryClient)
managementScaleClient, err := scale.NewForConfig(
managementConfig,
restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery),
dynamic.LegacyAPIPathResolverFunc,
scale.NewDiscoveryScaleKindResolver(managementDiscoveryClient))
if err != nil {
klog.Fatalf("create scale client failed: %v", err)
}

controller, err := newMachineController(managementClient, workloadClient, managementDiscoveryClient, managementScaleClient, do)
if err != nil {
klog.Fatal(err)
}
Expand Down
3 changes: 3 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,7 @@ type AutoscalingOptions struct {
AWSUseStaticInstanceList bool
// Path to kube configuration if available
KubeConfigPath string
// ClusterAPICloudConfigAuthoritative tells the Cluster API provider to treat the CloudConfig option as authoritative and
// not use KubeConfigPath as a fallback when it is not provided.
ClusterAPICloudConfigAuthoritative bool
}
106 changes: 54 additions & 52 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,11 @@ var (
regional = flag.Bool("regional", false, "Cluster is regional.")
newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up.")

ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
clusterAPICloudConfigAuthoritative = flag.Bool("clusterapi-cloud-config-authoritative", false, "Treat the cloud-config flag authoritatively (do not fallback to using kubeconfig flag). ClusterAPI only")
)

func createAutoscalingOptions() config.AutoscalingOptions {
Expand All @@ -193,54 +194,55 @@ func createAutoscalingOptions() config.AutoscalingOptions {
klog.Fatalf("Failed to parse flags: %v", err)
}
return config.AutoscalingOptions{
CloudConfig: *cloudConfig,
CloudProviderName: *cloudProviderFlag,
NodeGroupAutoDiscovery: *nodeGroupAutoDiscoveryFlag,
MaxTotalUnreadyPercentage: *maxTotalUnreadyPercentage,
OkTotalUnreadyCount: *okTotalUnreadyCount,
ScaleUpFromZero: *scaleUpFromZero,
EstimatorName: *estimatorFlag,
ExpanderName: *expanderFlag,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization,
MaxBulkSoftTaintCount: *maxBulkSoftTaintCount,
MaxBulkSoftTaintTime: *maxBulkSoftTaintTime,
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
MaxNodeProvisionTime: *maxNodeProvisionTime,
MaxNodesTotal: *maxNodesTotal,
MaxCoresTotal: maxCoresTotal,
MinCoresTotal: minCoresTotal,
MaxMemoryTotal: maxMemoryTotal,
MinMemoryTotal: minMemoryTotal,
GpuTotal: parsedGpuTotal,
NodeGroups: *nodeGroupsFlag,
ScaleDownDelayAfterAdd: *scaleDownDelayAfterAdd,
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
ScaleDownEnabled: *scaleDownEnabled,
ScaleDownUnneededTime: *scaleDownUnneededTime,
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold,
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
WriteStatusConfigMap: *writeStatusConfigMapFlag,
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
ConfigNamespace: *namespace,
ClusterName: *clusterName,
NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled,
MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount,
UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout,
ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff,
Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
CloudConfig: *cloudConfig,
CloudProviderName: *cloudProviderFlag,
NodeGroupAutoDiscovery: *nodeGroupAutoDiscoveryFlag,
MaxTotalUnreadyPercentage: *maxTotalUnreadyPercentage,
OkTotalUnreadyCount: *okTotalUnreadyCount,
ScaleUpFromZero: *scaleUpFromZero,
EstimatorName: *estimatorFlag,
ExpanderName: *expanderFlag,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization,
MaxBulkSoftTaintCount: *maxBulkSoftTaintCount,
MaxBulkSoftTaintTime: *maxBulkSoftTaintTime,
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
MaxNodeProvisionTime: *maxNodeProvisionTime,
MaxNodesTotal: *maxNodesTotal,
MaxCoresTotal: maxCoresTotal,
MinCoresTotal: minCoresTotal,
MaxMemoryTotal: maxMemoryTotal,
MinMemoryTotal: minMemoryTotal,
GpuTotal: parsedGpuTotal,
NodeGroups: *nodeGroupsFlag,
ScaleDownDelayAfterAdd: *scaleDownDelayAfterAdd,
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
ScaleDownEnabled: *scaleDownEnabled,
ScaleDownUnneededTime: *scaleDownUnneededTime,
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold,
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
WriteStatusConfigMap: *writeStatusConfigMapFlag,
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
ConfigNamespace: *namespace,
ClusterName: *clusterName,
NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled,
MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount,
UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout,
ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff,
Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
ClusterAPICloudConfigAuthoritative: *clusterAPICloudConfigAuthoritative,
}
}

Expand Down

0 comments on commit d939603

Please sign in to comment.