Skip to content

Commit

Permalink
Implement external gRPC Cloud Provider
Browse files Browse the repository at this point in the history
  • Loading branch information
dbonfigli authored and vishalanarase committed Jun 1, 2022
1 parent 47f465d commit 1e165b1
Show file tree
Hide file tree
Showing 31 changed files with 6,869 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cluster-autoscaler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ You should also take a look at the notes and "gotchas" for your specific cloud p
* [Brightbox](./cloudprovider/brightbox/README.md)
* [CloudStack](./cloudprovider/cloudstack/README.md)
* [HuaweiCloud](./cloudprovider/huaweicloud/README.md)
* [External gRPC](./cloudprovider/externalgrpc/README.md)
* [Hetzner](./cloudprovider/hetzner/README.md)
* [Equinix Metal](./cloudprovider/packet/README.md#notes)
* [IonosCloud](./cloudprovider/ionoscloud/README.md)
Expand Down Expand Up @@ -166,6 +167,7 @@ Supported cloud providers:
* CloudStack https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/cloudstack/README.md
* Exoscale https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md
* Equinix Metal https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/packet/README.md
* External gRPC https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/externalgrpc/README.md
* OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md
* Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md
* OCI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md
Expand Down
9 changes: 9 additions & 0 deletions cluster-autoscaler/cloudprovider/builder/builder_all.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
<<<<<<< HEAD
//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet && !oci && !vultr && !tencentcloud && !civo
// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet,!oci,!vultr,!tencentcloud,!civo
=======
//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet && !oci && !vultr && !tencentcloud && !externalgrpc
// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet,!oci,!vultr,!tencentcloud,!externalgrpc
>>>>>>> db5336d08 (Implement external gRPC Cloud Provider)

/*
Copyright 2018 The Kubernetes Authors.
Expand Down Expand Up @@ -32,6 +37,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/clusterapi"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/digitalocean"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/exoscale"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/externalgrpc"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/hetzner"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/huaweicloud"
Expand All @@ -57,6 +63,7 @@ var AvailableCloudProviders = []string{
cloudprovider.MagnumProviderName,
cloudprovider.DigitalOceanProviderName,
cloudprovider.ExoscaleProviderName,
cloudprovider.ExternalGrpcProviderName,
cloudprovider.HuaweicloudProviderName,
cloudprovider.HetznerProviderName,
cloudprovider.OracleCloudProviderName,
Expand Down Expand Up @@ -97,6 +104,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro
return digitalocean.BuildDigitalOcean(opts, do, rl)
case cloudprovider.ExoscaleProviderName:
return exoscale.BuildExoscale(opts, do, rl)
case cloudprovider.ExternalGrpcProviderName:
return externalgrpc.BuildExternalGrpc(opts, do, rl)
case cloudprovider.MagnumProviderName:
return magnum.BuildMagnum(opts, do, rl)
case cloudprovider.HuaweicloudProviderName:
Expand Down
43 changes: 43 additions & 0 deletions cluster-autoscaler/cloudprovider/builder/builder_externalgrpc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//go:build externalgrpc
// +build externalgrpc

/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package builder

import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/externalgrpc"
"k8s.io/autoscaler/cluster-autoscaler/config"
)

// AvailableCloudProviders supported by the cloud provider builder.
var AvailableCloudProviders = []string{
cloudprovider.ExternalGrpcProviderName,
}

// DefaultCloudProvider for externalgrpc-only build is externalgrpc.
const DefaultCloudProvider = cloudprovider.ExternalGrpcProviderName

func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
switch opts.CloudProviderName {
case cloudprovider.ExternalGrpcProviderName:
return externalgrpc.BuildExternalGrpc(opts, do, rl)
}

return nil
}
2 changes: 2 additions & 0 deletions cluster-autoscaler/cloudprovider/cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ const (
PacketProviderName = "packet"
// TencentcloudProviderName gets the provider name of tencentcloud
TencentcloudProviderName = "tencentcloud"
// ExternalGrpcProviderName gets the provider name of the external grpc provider
ExternalGrpcProviderName = "externalgrpc"
// CivoProviderName gets the provider name of civo
CivoProviderName = "civo"
)
Expand Down
4 changes: 4 additions & 0 deletions cluster-autoscaler/cloudprovider/externalgrpc/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
approvers:
#- dbonfigli
reviewers:
#- dbonfigli
80 changes: 80 additions & 0 deletions cluster-autoscaler/cloudprovider/externalgrpc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# External gRPC Cloud Provider

The Exteral gRPC Cloud Provider provides a plugin system to support out-of-tree cloud provider implementations.

Cluster Autoscaler adds or removes nodes from the cluster by creating or deleting VMs. To separate the autoscaling logic (the same for all clouds) from the API calls required to execute it (different for each cloud), the latter are hidden behind an interface, `CloudProvider`. Each supported cloud has its own implementation in this repository and `--cloud-provider` flag determines which one will be used.

The gRPC Cloud Provider acts as a client for a cloud provider that implements its custom logic separately from the cluster autoscaler, and serves it as a `CloudProvider` gRPC service (similar to the `CloudProvider` interface) without the need to fork this project, follow its development lifecyle, adhere to its rules (e.g. do not use additional external dependencies) or implement the Cluster API.

## Configuration

For the cluster autoscaler parameters, use the `--cloud-provider=externalgrpc` flag and define the cloud configuration file with `--cloud-config=<file location>`, this is yaml file with the following parameters:

| Key | Value | Mandatory | Default |
|-----|-------|-----------|---------|
| address | external gRPC cloud provider service address of the form "host:port", "host%zone:port", "[host]:port" or "[host%zone]:port" | yes | none |
| key | path to file containing the tls key, if using mTLS | no | none |
| cert | path to file containing the tls certificate, if using mTLS | no | none |
| cacert | path to file containing the CA certificate, if using mTLS | no | none |

The use of mTLS is recommended, since simple, non-authenticated calls to the external gRPC cloud provider service will result in the creation / deletion of nodes.

Log levels of intertest for this provider are:
* 1 (flag: ```--v=1```): basic logging of errors;
* 5 (flag: ```--v=5```): detailed logging of every call;

For the deployment and configuration of an external gRPC cloud provider of choice, see its specific documentation.

## Examples

You can find an example of external gRPC cloud provider service implementation on the [examples/external-grpc-cloud-provider-service](examples/external-grpc-cloud-provider-service) directory: it is actually a server that wraps all the in-tree cloud providers.

A complete example:
* deploy `cert-manager` and the manifests in [examples/certmanager-manifests](examples/certmanager-manifests) to generate certificates for gRPC client and server;
* build the image for the example external gRPC cloud provider service as defined in [examples/external-grpc-cloud-provider-service](examples/external-grpc-cloud-provider-service);
* deploy the example external gRPC cloud provider service using the manifests at [examples/external-grpc-cloud-provider-service-manifests](examples/external-grpc-cloud-provider-service-manifests), change the parameters as needed and test whichever cloud provider you want;
* deploy the cluster autoscaler selecting the External gRPC Cloud Provider using the manifests at [examples/cluster-autoscaler-manifests](examples/cluster-autoscaler-manifests).

## Development

### External gRPC Cloud Provider service Implementation

To build a cloud provider, create a gRPC server for the `CloudProvider` service defined in [protos/externalgrpc.proto](protos/externalgrpc.proto) that implements all its required RPCs.

### Caching

The `CloudProvider` interface was designed with the assumption that its implementation functions would be fast, this may not be true anymore with the added overhead of gRPC. In the interest of performance, some gRPC API responses are cached by this cloud provider:
* `NodeGroupForNode()` caches the node group for a node until `Refresh()` is called;
* `NodeGroups()` caches the current node groups until `Refresh()` is called;
* `GPULabel()` and `GetAvailableGPUTypes()` are cached at first call and never wiped;
* A `NodeGroup` caches `MaxSize()`, `MinSize()` and `Debug()` return values during its creation, and `TemplateNodeInfo()` at its first call, these values will be cached for the lifetime of the `NodeGroup` object.

### Code Generation

To regenerate the gRPC code:

1. install `protoc` and `protoc-gen-go-grpc`:

```bash
go install google.golang.org/protobuf/cmd/[email protected]
go install google.golang.org/grpc/cmd/[email protected]
```

2. generate gRPC client and server code:

```bash
protoc \
-I ./cluster-autoscaler \
-I ./cluster-autoscaler/vendor \
--go_out=. \
--go-grpc_out=. \
./cluster-autoscaler/cloudprovider/externalgrpc/protos/externalgrpc.proto
```

### General considerations

Abstractions used by Cluster Autoscaler assume nodes belong to "node groups". All node within a group must be of the same machine type (have the same amount of resources), have the same set of labels and taints, and be located in the same availability zone. This doesn't mean a cloud has to have a concept of such node groups, but it helps.

There must be a way to delete a specific node. If your cloud supports instance groups, and you are only able to provide a method to decrease the size of a given group, without guaranteeing which instance will be killed, it won't work well.

There must be a way to match a Kubernetes node to an instance it is running on. This is usually done by kubelet setting node's `ProviderId` field to an instance id which can be used in API calls to cloud.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: ca-issuer
namespace: kube-system
spec:
ca:
secretName: ca-root-secret
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: selfsigned-ca
namespace: kube-system
spec:
isCA: true
commonName: selfsigned-ca
secretName: ca-root-secret
privateKey:
algorithm: ECDSA
size: 256
issuerRef:
name: selfsigned-issuer
kind: Issuer
group: cert-manager.io
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: cluster-autoscaler-grpc-client-cert
namespace: kube-system
spec:
secretName: cluster-autoscaler-grpc-client-cert
commonName: cluster-autoscaler-grpc-client
dnsNames:
- "cluster-autoscaler-grpc-client"
duration: 87600h
usages:
- client auth
issuerRef:
name: ca-issuer
kind: Issuer
group: cert-manager.io
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: cluster-autoscaler-grpc-server-cert
namespace: kube-system
spec:
secretName: cluster-autoscaler-grpc-server-cert
commonName: ca-external-grpc-cloud-provider-service
duration: 87600h
usages:
- server auth
dnsNames:
- "ca-external-grpc-cloud-provider-service"
issuerRef:
name: ca-issuer
kind: Issuer
group: cert-manager.io
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: selfsigned-issuer
namespace: kube-system
spec:
selfSigned: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: cluster-autoscaler-cloud-config
namespace: kube-system
data:
cloud-config: |-
address: "ca-external-grpc-cloud-provider-service:8086"
key: "/etc/ssl/client-cert/tls.key"
cert: "/etc/ssl/client-cert/tls.crt"
cacert: "/etc/ssl/client-cert/ca.crt"
Loading

0 comments on commit 1e165b1

Please sign in to comment.