Skip to content

Commit

Permalink
add starter code and readme for grpc expander usage
Browse files Browse the repository at this point in the history
  • Loading branch information
evansheng committed Feb 16, 2022
1 parent 4504f55 commit a2b24e0
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 524 deletions.
3 changes: 0 additions & 3 deletions cluster-autoscaler/expander/factory/expander_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/expander/waste"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
"k8s.io/klog/v2"

kube_client "k8s.io/client-go/kubernetes"
)

Expand Down Expand Up @@ -71,7 +69,6 @@ func ExpanderStrategyFromStrings(expanderFlags []string, cloudProvider cloudprov
lister := kubernetes.NewConfigMapListerForNamespace(kubeClient, stopChannel, configNamespace)
filters = append(filters, priority.NewFilter(lister.ConfigMaps(configNamespace), autoscalingKubeClients.Recorder))
case expander.GRPCExpanderName:
klog.V(1).Info("GRPC expander chosen")
filters = append(filters, grpcplugin.NewFilter(GRPCExpanderCert, GRPCExpanderURL))
default:
return nil, errors.NewAutoscalerError(errors.InternalError, "Expander %s not supported", expanderFlag)
Expand Down
41 changes: 41 additions & 0 deletions cluster-autoscaler/expander/grpcplugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# gRPC Expander for Cluster Autoscaler

## Introduction
This expander functions as a gRPC client, and will pass expansion options to an external gRPC server.
The external server will use this information to make a decision on which Node Group to expand, and return an option to expand.

## Motivation

This expander gives users very fine grained control over which option they'd like to expand.
The gRPC server must be implemented by the user, but the logic can be developed out of band with Cluster Autoscaler.
There are a wide variety of use cases here. Some examples are as follows:
* A tiered weighted random strategy can be implemented, instead of a static priority ladder offered by the priority expander.
* A strategy to encapsulate business logic specific to a user but not all users of Cluster Autoscaler
* A strategy to take into account the dynamic fluctuating prices of the spot instance market

## Configuration options
As using this expander requires communication with another service, users must specify a few options as CLI arguments.

```yaml
--grpcExpanderUrl
```
URL of the gRPC Expander server, for CA to communicate with.
```yaml
--grpcExpanderCert
```
Location of the volume mounted certificate of the gRPC server if it is configured to communicate over TLS

## gRPC Expander Server Setup
The gRPC server can be set up in many ways, but a simple example is described below.
An example of a barebones gRPC Exapnder Server can be found in the `example` directory under `fake_grpc_server.go` file. This is meant to be copied elsewhere and deployed as a separate
service. Note that the `protos/expander.pb.go` generated protobuf code will also need to be copied and used to serialize/deserizle the Options passed from CA.
Communication between Cluster Autoscaler and the gRPC Server will occur over native kube-proxy. To use this, note the Service and Namespace the gRPC server is deployed in.

Deploy the gRPC Expander Server as a separate app, listening on a specifc port number.
Start Cluster Autoscaler with the `--grpcExapnderURl=SERVICE_NAME.NAMESPACE_NAME.svc.cluster.local:PORT_NUMBER` flag, as well as `--grpcExpanderCert` pointed at the location of the volume mounted certificate of the gRPC server.

## Details

The gRPC client currently transforms nodeInfo objects passed into the expander to v1.Node objects to save rpc call throughput. As such, the gRPC server will not have access to daemonsets and static pods running on each node.


104 changes: 104 additions & 0 deletions cluster-autoscaler/expander/grpcplugin/example/fake_grpc_server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package example

import (
"context"
"fmt"
"log"
"net"

"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"k8s.io/autoscaler/cluster-autoscaler/expander/grpcplugin/protos"
)

// This code is meant to be used as starter code, deployed as a separate app, not in Cluster Autoscaler.
// This serves as the gRPC Expander Server counterpart to the client which lives in this repo
// main.go of said application should simply pass in paths to (optional)cert, (optional)private key, and port, and call Serve to start listening
// copy the protos/expander.pb.go to your other application's repo, so it has access to the protobuf definitions

// Serve should be called by the main() function in main.go of the Expander Server repo to start serving
func Serve(certPath string, keyPath string, port uint) {

var grpcServer *grpc.Server

// If credentials are passed in, use them
if certPath != "" && keyPath != "" {
log.Printf("Using certFile: %v and keyFile: %v", certPath, keyPath)
tlsCredentials, err := credentials.NewServerTLSFromFile(certPath, keyPath)
if err != nil {
log.Fatal("cannot load TLS credentials: ", err)
}
grpcServer = grpc.NewServer(grpc.Creds(tlsCredentials))
} else {
grpcServer = grpc.NewServer()
}

netListener := getNetListener(port)

expanderServerImpl := NewExpanderServerImpl()

protos.RegisterExpanderServer(grpcServer, expanderServerImpl)

// start the server
log.Println("Starting server on port ", port)
if err := grpcServer.Serve(netListener); err != nil {
log.Fatalf("failed to serve: %s", err)
}
}

func getNetListener(port uint) net.Listener {
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port))
if err != nil {
log.Fatalf("failed to listen: %v", err)
panic(fmt.Sprintf("failed to listen: %v", err))
}

return lis
}

// ExpanderServerImpl is an implementation of Expander Server from proto definition
type ExpanderServerImpl struct{}

// NewExpanderServerImpl is this Expander's implementation of the server
func NewExpanderServerImpl() *ExpanderServerImpl {
return &ExpanderServerImpl{}
}

// BestOptions method filters out the best options of all options passed from the gRPC Client in CA, according to the defined strategy.
func (ServerImpl *ExpanderServerImpl) BestOptions(ctx context.Context, req *protos.BestOptionsRequest) (*protos.BestOptionsResponse, error) {
opts := req.GetOptions()
log.Printf("Received BestOption Request with %v options", len(opts))

// This strategy simply chooses the Option with the longest NodeGroupID name, but can be replaced with any arbitrary logic
longest := 0
var choice *protos.Option
for _, opt := range opts {
log.Println(opt.NodeGroupId)
if len(opt.NodeGroupId) > longest {
choice = opt
}
}

log.Print("returned bestOptions with option: ", choice.NodeGroupId)

// Return just one option for now
return &protos.BestOptionsResponse{
Options: []*protos.Option{choice},
}, nil
}
30 changes: 30 additions & 0 deletions cluster-autoscaler/expander/grpcplugin/example/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package example

import "flag"

func main() {

certPath := flag.String("cert-path", "", "Path to cert file for gRPC Expander Server")
keyPath := flag.String("key-path", "", "Path to private key for gRPC Expander Server")
port := flag.Uint("port", 7000, "Port number for server to listen on")

flag.Parse()

Serve(*certPath, *keyPath, *port)
}
65 changes: 34 additions & 31 deletions cluster-autoscaler/expander/grpcplugin/grpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ import (
"google.golang.org/grpc/credentials"
)

const gRPCTimeout = 5 * time.Second

type grpcclientstrategy struct {
grpcClient protos.ExpanderClient
}
Expand All @@ -47,89 +49,90 @@ func NewFilter(expanderCert string, expanderUrl string) expander.Filter {
func createGRPCClient(expanderCert string, expanderUrl string) protos.ExpanderClient {
var dialOpt grpc.DialOption

// if no Cert file specified, use insecure
if expanderCert == "" {
dialOpt = grpc.WithInsecure()
} else {
creds, err := credentials.NewClientTLSFromFile(expanderCert, "")
if err != nil {
log.Fatalf("Failed to create TLS credentials %v", err)
return nil
}
dialOpt = grpc.WithTransportCredentials(creds)
log.Fatalf("GRPC Expander Cert not specified, insecure connections not allowed")
return nil
}
creds, err := credentials.NewClientTLSFromFile(expanderCert, "")
if err != nil {
log.Fatalf("Failed to create TLS credentials %v", err)
return nil
}
klog.V(2).Info("Dialing ", expanderUrl, " dialopt: ", dialOpt)
dialOpt = grpc.WithTransportCredentials(creds)
klog.V(2).Infof("Dialing: %s with dialopt: %v", expanderUrl, dialOpt)
conn, err := grpc.Dial(expanderUrl, dialOpt)
if err != nil {
log.Fatalf("fail to dial server: %v", err)
log.Fatalf("Fail to dial server: %v", err)
return nil
}
return protos.NewExpanderClient(conn)
}

func (g *grpcclientstrategy) BestOptions(expansionOptions []expander.Option, nodeInfo map[string]*schedulerframework.NodeInfo) []expander.Option {
if g.grpcClient == nil {
log.Fatalf("Incorrect gRPC client config, filtering no options")
klog.Errorf("Incorrect gRPC client config, filtering no options")
return expansionOptions
}

// Transform inputs to gRPC inputs
nodeGroupIDOptionMap := make(map[string]expander.Option)
grpcOptionsSlice := []*protos.Option{}
populateOptionsForGRPC(expansionOptions, nodeGroupIDOptionMap, &grpcOptionsSlice)
grpcNodeInfoMap := make(map[string]*v1.Node)
populateNodeInfoForGRPC(nodeInfo, grpcNodeInfoMap)
grpcOptionsSlice, nodeGroupIDOptionMap := populateOptionsForGRPC(expansionOptions)
grpcNodeMap := populateNodeInfoForGRPC(nodeInfo)

// call gRPC server to get BestOption
klog.V(2).Info("GPRC call of best options to server with ", len(nodeGroupIDOptionMap), " options")
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
klog.V(2).Infof("GPRC call of best options to server with %v options", len(nodeGroupIDOptionMap))
ctx, cancel := context.WithTimeout(context.Background(), gRPCTimeout)
defer cancel()
bestOptionsResponse, err := g.grpcClient.BestOptions(ctx, &protos.BestOptionsRequest{Options: grpcOptionsSlice, NodeInfoMap: grpcNodeInfoMap})
bestOptionsResponse, err := g.grpcClient.BestOptions(ctx, &protos.BestOptionsRequest{Options: grpcOptionsSlice, NodeMap: grpcNodeMap})
if err != nil {
klog.V(2).Info("GRPC call timed out, no options filtered")
klog.V(4).Info("GRPC call timed out, no options filtered")
return expansionOptions
}

if bestOptionsResponse == nil || bestOptionsResponse.Options == nil {
klog.V(2).Info("GRPC returned nil bestOptions, no options filtered")
klog.V(4).Info("GRPC returned nil bestOptions, no options filtered")
return expansionOptions
}
// Transform back options slice
options := transformAndSanitizeOptionsFromGRPC(bestOptionsResponse.Options, nodeGroupIDOptionMap)
if options == nil {
klog.V(2).Info("Unable to sanitize GPRC returned bestOptions, no options filtered")
klog.V(4).Info("Unable to sanitize GPRC returned bestOptions, no options filtered")
return expansionOptions
}
return options
}

// populateOptionsForGRPC creates a map of nodegroup ID and options, as well as a slice of Options objects for the gRPC call
func populateOptionsForGRPC(expansionOptions []expander.Option, nodeGroupIDOptionMap map[string]expander.Option, grpcOptionsSlice *[]*protos.Option) {
func populateOptionsForGRPC(expansionOptions []expander.Option) ([]*protos.Option, map[string]expander.Option) {
grpcOptionsSlice := []*protos.Option{}
nodeGroupIDOptionMap := make(map[string]expander.Option)
for _, option := range expansionOptions {
nodeGroupIDOptionMap[option.NodeGroup.Id()] = option
*grpcOptionsSlice = append(*grpcOptionsSlice, newOptionMessage(option.NodeGroup.Id(), int32(option.NodeCount), option.Debug, option.Pods))
grpcOptionsSlice = append(grpcOptionsSlice, newOptionMessage(option.NodeGroup.Id(), int32(option.NodeCount), option.Debug, option.Pods))
}
return grpcOptionsSlice, nodeGroupIDOptionMap
}

// populateNodeInfoForGRPC modifies the nodeInfo object, and replaces it with the v1.Node to pass through grpc
func populateNodeInfoForGRPC(nodeInfos map[string]*schedulerframework.NodeInfo, grpcNodeInfoMap map[string]*v1.Node) {
// populateNodeInfoForGRPC looks at the corresponding v1.Node object per NodeInfo object, and populates the grpcNodeInfoMap with these to pass over grpc
func populateNodeInfoForGRPC(nodeInfos map[string]*schedulerframework.NodeInfo) map[string]*v1.Node {
grpcNodeInfoMap := make(map[string]*v1.Node)
for nodeId, nodeInfo := range nodeInfos {
grpcNodeInfoMap[nodeId] = nodeInfo.Node()
}
return grpcNodeInfoMap
}

func transformAndSanitizeOptionsFromGRPC(bestOptionsResponseOptions []*protos.Option, nodeGroupIDOptionMap map[string]expander.Option) []expander.Option {
var options []expander.Option
for _, option := range bestOptionsResponseOptions {
if option == nil {
klog.Errorf("gRPC server returned nil Option")
return nil
klog.Errorf("GRPC server returned nil Option")
continue
}
if _, ok := nodeGroupIDOptionMap[option.NodeGroupId]; ok {
options = append(options, nodeGroupIDOptionMap[option.NodeGroupId])
} else {
klog.Errorf("gRPC server returned invalid nodeGroup ID: ", option.NodeGroupId)
return nil
klog.Errorf("GRPC server returned invalid nodeGroup ID: ", option.NodeGroupId)
continue
}
}
return options
Expand Down
Loading

0 comments on commit a2b24e0

Please sign in to comment.