Skip to content

Commit

Permalink
Merge pull request kubernetes#4452 from airbnb/es--grpc-expander-plugin
Browse files Browse the repository at this point in the history
Add gRPC expander plugin
  • Loading branch information
k8s-ci-robot authored and Evan Sheng committed Mar 24, 2022
1 parent 49598c6 commit bc9e09a
Show file tree
Hide file tree
Showing 18 changed files with 1,708 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ func (m *asgCache) createPlaceholdersForDesiredNonStartedInstances(groups []*aut
for i := realInstances; i < desired; i++ {
id := fmt.Sprintf("%s-%s-%d", placeholderInstanceNamePrefix, *g.AutoScalingGroupName, i)
klog.V(4).Infof("Instance group %s has only %d instances created while requested count is %d. "+
"Creating placeholder instance with ID %s.", *g.AutoScalingGroupName, real, desired, id)
"Creating placeholder instance with ID %s.", *g.AutoScalingGroupName, realInstances, desired, id)
g.Instances = append(g.Instances, &autoscaling.Instance{
InstanceId: &id,
AvailabilityZone: g.AvailabilityZones[0],
Expand Down
4 changes: 4 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ type AutoscalingOptions struct {
EstimatorName string
// ExpanderNames sets the chain of node group expanders to be used in scale up
ExpanderNames string
// GRPCExpanderCert is the location of the cert passed to the gRPC server for TLS when using the gRPC expander
GRPCExpanderCert string
// GRPCExpanderURL is the url of the gRPC server when using the gRPC expander
GRPCExpanderURL string
// IgnoreDaemonSetsUtilization is whether CA will ignore DaemonSet pods when calculating resource utilization for scaling down
IgnoreDaemonSetsUtilization bool
// IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down
Expand Down
4 changes: 2 additions & 2 deletions cluster-autoscaler/core/autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ func initializeDefaultOptions(opts *AutoscalerOptions) error {
opts.CloudProvider = cloudBuilder.NewCloudProvider(opts.AutoscalingOptions)
}
if opts.ExpanderStrategy == nil {
expanderStrategy, err := factory.ExpanderStrategyFromStrings(strings.Split(opts.ExpanderNames, ","),
opts.CloudProvider, opts.AutoscalingKubeClients, opts.KubeClient, opts.ConfigNamespace)
expanderStrategy, err := factory.ExpanderStrategyFromStrings(strings.Split(opts.ExpanderNames, ","), opts.CloudProvider,
opts.AutoscalingKubeClients, opts.KubeClient, opts.ConfigNamespace, opts.GRPCExpanderCert, opts.GRPCExpanderURL)
if err != nil {
return err
}
Expand Down
4 changes: 3 additions & 1 deletion cluster-autoscaler/expander/expander.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (

var (
// AvailableExpanders is a list of available expander options
AvailableExpanders = []string{RandomExpanderName, MostPodsExpanderName, LeastWasteExpanderName, PriceBasedExpanderName, PriorityBasedExpanderName}
AvailableExpanders = []string{RandomExpanderName, MostPodsExpanderName, LeastWasteExpanderName, PriceBasedExpanderName, PriorityBasedExpanderName, GRPCExpanderName}
// RandomExpanderName selects a node group at random
RandomExpanderName = "random"
// MostPodsExpanderName selects a node group that fits the most pods
Expand All @@ -36,6 +36,8 @@ var (
PriceBasedExpanderName = "price"
// PriorityBasedExpanderName selects a node group based on a user-configured priorities assigned to group names
PriorityBasedExpanderName = "priority"
// GRPCExpanderName uses the gRPC client expander to call to an external gRPC server to select a node group for scale up
GRPCExpanderName = "grpc"
)

// Option describes an option to expand the cluster.
Expand Down
7 changes: 5 additions & 2 deletions cluster-autoscaler/expander/factory/expander_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,22 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/context"
"k8s.io/autoscaler/cluster-autoscaler/expander"
"k8s.io/autoscaler/cluster-autoscaler/expander/grpcplugin"
"k8s.io/autoscaler/cluster-autoscaler/expander/mostpods"
"k8s.io/autoscaler/cluster-autoscaler/expander/price"
"k8s.io/autoscaler/cluster-autoscaler/expander/priority"
"k8s.io/autoscaler/cluster-autoscaler/expander/random"
"k8s.io/autoscaler/cluster-autoscaler/expander/waste"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"

kube_client "k8s.io/client-go/kubernetes"
)

// ExpanderStrategyFromStrings creates an expander.Strategy according to the names of the expanders passed in
// take in whole opts and access stuff here
func ExpanderStrategyFromStrings(expanderFlags []string, cloudProvider cloudprovider.CloudProvider,
autoscalingKubeClients *context.AutoscalingKubeClients, kubeClient kube_client.Interface,
configNamespace string) (expander.Strategy, errors.AutoscalerError) {
configNamespace string, GRPCExpanderCert string, GRPCExpanderURL string) (expander.Strategy, errors.AutoscalerError) {
var filters []expander.Filter
seenExpanders := map[string]struct{}{}
strategySeen := false
Expand Down Expand Up @@ -67,6 +68,8 @@ func ExpanderStrategyFromStrings(expanderFlags []string, cloudProvider cloudprov
stopChannel := make(chan struct{})
lister := kubernetes.NewConfigMapListerForNamespace(kubeClient, stopChannel, configNamespace)
filters = append(filters, priority.NewFilter(lister.ConfigMaps(configNamespace), autoscalingKubeClients.Recorder))
case expander.GRPCExpanderName:
filters = append(filters, grpcplugin.NewFilter(GRPCExpanderCert, GRPCExpanderURL))
default:
return nil, errors.NewAutoscalerError(errors.InternalError, "Expander %s not supported", expanderFlag)
}
Expand Down
41 changes: 41 additions & 0 deletions cluster-autoscaler/expander/grpcplugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# gRPC Expander for Cluster Autoscaler

## Introduction
This expander functions as a gRPC client, and will pass expansion options to an external gRPC server.
The external server will use this information to make a decision on which Node Group to expand, and return an option to expand.

## Motivation

This expander gives users very fine grained control over which option they'd like to expand.
The gRPC server must be implemented by the user, but the logic can be developed out of band with Cluster Autoscaler.
There are a wide variety of use cases here. Some examples are as follows:
* A tiered weighted random strategy can be implemented, instead of a static priority ladder offered by the priority expander.
* A strategy to encapsulate business logic specific to a user but not all users of Cluster Autoscaler
* A strategy to take into account the dynamic fluctuating prices of the spot instance market

## Configuration options
As using this expander requires communication with another service, users must specify a few options as CLI arguments.

```yaml
--grpcExpanderUrl
```
URL of the gRPC Expander server, for CA to communicate with.
```yaml
--grpcExpanderCert
```
Location of the volume mounted certificate of the gRPC server if it is configured to communicate over TLS

## gRPC Expander Server Setup
The gRPC server can be set up in many ways, but a simple example is described below.
An example of a barebones gRPC Exapnder Server can be found in the `example` directory under `fake_grpc_server.go` file. This is meant to be copied elsewhere and deployed as a separate
service. Note that the `protos/expander.pb.go` generated protobuf code will also need to be copied and used to serialize/deserizle the Options passed from CA.
Communication between Cluster Autoscaler and the gRPC Server will occur over native kube-proxy. To use this, note the Service and Namespace the gRPC server is deployed in.

Deploy the gRPC Expander Server as a separate app, listening on a specifc port number.
Start Cluster Autoscaler with the `--grpcExapnderURl=SERVICE_NAME.NAMESPACE_NAME.svc.cluster.local:PORT_NUMBER` flag, as well as `--grpcExpanderCert` pointed at the location of the volume mounted certificate of the gRPC server.

## Details

The gRPC client currently transforms nodeInfo objects passed into the expander to v1.Node objects to save rpc call throughput. As such, the gRPC server will not have access to daemonsets and static pods running on each node.


104 changes: 104 additions & 0 deletions cluster-autoscaler/expander/grpcplugin/example/fake_grpc_server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package example

import (
"context"
"fmt"
"log"
"net"

"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"k8s.io/autoscaler/cluster-autoscaler/expander/grpcplugin/protos"
)

// This code is meant to be used as starter code, deployed as a separate app, not in Cluster Autoscaler.
// This serves as the gRPC Expander Server counterpart to the client which lives in this repo
// main.go of said application should simply pass in paths to (optional)cert, (optional)private key, and port, and call Serve to start listening
// copy the protos/expander.pb.go to your other application's repo, so it has access to the protobuf definitions

// Serve should be called by the main() function in main.go of the Expander Server repo to start serving
func Serve(certPath string, keyPath string, port uint) {

var grpcServer *grpc.Server

// If credentials are passed in, use them
if certPath != "" && keyPath != "" {
log.Printf("Using certFile: %v and keyFile: %v", certPath, keyPath)
tlsCredentials, err := credentials.NewServerTLSFromFile(certPath, keyPath)
if err != nil {
log.Fatal("cannot load TLS credentials: ", err)
}
grpcServer = grpc.NewServer(grpc.Creds(tlsCredentials))
} else {
grpcServer = grpc.NewServer()
}

netListener := getNetListener(port)

expanderServerImpl := NewExpanderServerImpl()

protos.RegisterExpanderServer(grpcServer, expanderServerImpl)

// start the server
log.Println("Starting server on port ", port)
if err := grpcServer.Serve(netListener); err != nil {
log.Fatalf("failed to serve: %s", err)
}
}

func getNetListener(port uint) net.Listener {
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
log.Fatalf("failed to listen: %v", err)
panic(fmt.Sprintf("failed to listen: %v", err))
}

return lis
}

// ExpanderServerImpl is an implementation of Expander Server from proto definition
type ExpanderServerImpl struct{}

// NewExpanderServerImpl is this Expander's implementation of the server
func NewExpanderServerImpl() *ExpanderServerImpl {
return &ExpanderServerImpl{}
}

// BestOptions method filters out the best options of all options passed from the gRPC Client in CA, according to the defined strategy.
func (ServerImpl *ExpanderServerImpl) BestOptions(ctx context.Context, req *protos.BestOptionsRequest) (*protos.BestOptionsResponse, error) {
opts := req.GetOptions()
log.Printf("Received BestOption Request with %v options", len(opts))

// This strategy simply chooses the Option with the longest NodeGroupID name, but can be replaced with any arbitrary logic
longest := 0
var choice *protos.Option
for _, opt := range opts {
log.Println(opt.NodeGroupId)
if len(opt.NodeGroupId) > longest {
choice = opt
}
}

log.Print("returned bestOptions with option: ", choice.NodeGroupId)

// Return just one option for now
return &protos.BestOptionsResponse{
Options: []*protos.Option{choice},
}, nil
}
30 changes: 30 additions & 0 deletions cluster-autoscaler/expander/grpcplugin/example/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package example

import "flag"

func main() {

certPath := flag.String("cert-path", "", "Path to cert file for gRPC Expander Server")
keyPath := flag.String("key-path", "", "Path to private key for gRPC Expander Server")
port := flag.Uint("port", 7000, "Port number for server to listen on")

flag.Parse()

Serve(*certPath, *keyPath, *port)
}
Loading

0 comments on commit bc9e09a

Please sign in to comment.