forked from kubernetes/autoscaler
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request kubernetes#4452 from airbnb/es--grpc-expander-plugin
Add gRPC expander plugin
- Loading branch information
1 parent
6e22b10
commit c98a480
Showing
351 changed files
with
108,818 additions
and
61,370 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# gRPC Expander for Cluster Autoscaler | ||
|
||
## Introduction | ||
This expander functions as a gRPC client, and will pass expansion options to an external gRPC server. | ||
The external server will use this information to make a decision on which Node Group to expand, and return an option to expand. | ||
|
||
## Motivation | ||
|
||
This expander gives users very fine grained control over which option they'd like to expand. | ||
The gRPC server must be implemented by the user, but the logic can be developed out of band with Cluster Autoscaler. | ||
There are a wide variety of use cases here. Some examples are as follows: | ||
* A tiered weighted random strategy can be implemented, instead of a static priority ladder offered by the priority expander. | ||
* A strategy to encapsulate business logic specific to a user but not all users of Cluster Autoscaler | ||
* A strategy to take into account the dynamic fluctuating prices of the spot instance market | ||
|
||
## Configuration options | ||
As using this expander requires communication with another service, users must specify a few options as CLI arguments. | ||
|
||
```yaml | ||
--grpcExpanderUrl | ||
``` | ||
URL of the gRPC Expander server, for CA to communicate with. | ||
```yaml | ||
--grpcExpanderCert | ||
``` | ||
Location of the volume mounted certificate of the gRPC server if it is configured to communicate over TLS | ||
|
||
## gRPC Expander Server Setup | ||
The gRPC server can be set up in many ways, but a simple example is described below. | ||
An example of a barebones gRPC Exapnder Server can be found in the `example` directory under `fake_grpc_server.go` file. This is meant to be copied elsewhere and deployed as a separate | ||
service. Note that the `protos/expander.pb.go` generated protobuf code will also need to be copied and used to serialize/deserizle the Options passed from CA. | ||
Communication between Cluster Autoscaler and the gRPC Server will occur over native kube-proxy. To use this, note the Service and Namespace the gRPC server is deployed in. | ||
|
||
Deploy the gRPC Expander Server as a separate app, listening on a specifc port number. | ||
Start Cluster Autoscaler with the `--grpcExapnderURl=SERVICE_NAME.NAMESPACE_NAME.svc.cluster.local:PORT_NUMBER` flag, as well as `--grpcExpanderCert` pointed at the location of the volume mounted certificate of the gRPC server. | ||
|
||
## Details | ||
|
||
The gRPC client currently transforms nodeInfo objects passed into the expander to v1.Node objects to save rpc call throughput. As such, the gRPC server will not have access to daemonsets and static pods running on each node. | ||
|
||
|
104 changes: 104 additions & 0 deletions
104
cluster-autoscaler/expander/grpcplugin/example/fake_grpc_server.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
Copyright 2021 The Kubernetes Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package example | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"log" | ||
"net" | ||
|
||
"google.golang.org/grpc" | ||
"google.golang.org/grpc/credentials" | ||
"k8s.io/autoscaler/cluster-autoscaler/expander/grpcplugin/protos" | ||
) | ||
|
||
// This code is meant to be used as starter code, deployed as a separate app, not in Cluster Autoscaler. | ||
// This serves as the gRPC Expander Server counterpart to the client which lives in this repo | ||
// main.go of said application should simply pass in paths to (optional)cert, (optional)private key, and port, and call Serve to start listening | ||
// copy the protos/expander.pb.go to your other application's repo, so it has access to the protobuf definitions | ||
|
||
// Serve should be called by the main() function in main.go of the Expander Server repo to start serving | ||
func Serve(certPath string, keyPath string, port uint) { | ||
|
||
var grpcServer *grpc.Server | ||
|
||
// If credentials are passed in, use them | ||
if certPath != "" && keyPath != "" { | ||
log.Printf("Using certFile: %v and keyFile: %v", certPath, keyPath) | ||
tlsCredentials, err := credentials.NewServerTLSFromFile(certPath, keyPath) | ||
if err != nil { | ||
log.Fatal("cannot load TLS credentials: ", err) | ||
} | ||
grpcServer = grpc.NewServer(grpc.Creds(tlsCredentials)) | ||
} else { | ||
grpcServer = grpc.NewServer() | ||
} | ||
|
||
netListener := getNetListener(port) | ||
|
||
expanderServerImpl := NewExpanderServerImpl() | ||
|
||
protos.RegisterExpanderServer(grpcServer, expanderServerImpl) | ||
|
||
// start the server | ||
log.Println("Starting server on port ", port) | ||
if err := grpcServer.Serve(netListener); err != nil { | ||
log.Fatalf("failed to serve: %s", err) | ||
} | ||
} | ||
|
||
func getNetListener(port uint) net.Listener { | ||
lis, err := net.Listen("tcp", fmt.Sprintf(":%d", port)) | ||
if err != nil { | ||
log.Fatalf("failed to listen: %v", err) | ||
panic(fmt.Sprintf("failed to listen: %v", err)) | ||
} | ||
|
||
return lis | ||
} | ||
|
||
// ExpanderServerImpl is an implementation of Expander Server from proto definition | ||
type ExpanderServerImpl struct{} | ||
|
||
// NewExpanderServerImpl is this Expander's implementation of the server | ||
func NewExpanderServerImpl() *ExpanderServerImpl { | ||
return &ExpanderServerImpl{} | ||
} | ||
|
||
// BestOptions method filters out the best options of all options passed from the gRPC Client in CA, according to the defined strategy. | ||
func (ServerImpl *ExpanderServerImpl) BestOptions(ctx context.Context, req *protos.BestOptionsRequest) (*protos.BestOptionsResponse, error) { | ||
opts := req.GetOptions() | ||
log.Printf("Received BestOption Request with %v options", len(opts)) | ||
|
||
// This strategy simply chooses the Option with the longest NodeGroupID name, but can be replaced with any arbitrary logic | ||
longest := 0 | ||
var choice *protos.Option | ||
for _, opt := range opts { | ||
log.Println(opt.NodeGroupId) | ||
if len(opt.NodeGroupId) > longest { | ||
choice = opt | ||
} | ||
} | ||
|
||
log.Print("returned bestOptions with option: ", choice.NodeGroupId) | ||
|
||
// Return just one option for now | ||
return &protos.BestOptionsResponse{ | ||
Options: []*protos.Option{choice}, | ||
}, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
/* | ||
Copyright 2021 The Kubernetes Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package example | ||
|
||
import "flag" | ||
|
||
func main() { | ||
|
||
certPath := flag.String("cert-path", "", "Path to cert file for gRPC Expander Server") | ||
keyPath := flag.String("key-path", "", "Path to private key for gRPC Expander Server") | ||
port := flag.Uint("port", 7000, "Port number for server to listen on") | ||
|
||
flag.Parse() | ||
|
||
Serve(*certPath, *keyPath, *port) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
/* | ||
Copyright 2021 The Kubernetes Authors. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package grpcplugin | ||
|
||
import ( | ||
"context" | ||
"log" | ||
"time" | ||
|
||
v1 "k8s.io/api/core/v1" | ||
"k8s.io/autoscaler/cluster-autoscaler/expander" | ||
"k8s.io/autoscaler/cluster-autoscaler/expander/grpcplugin/protos" | ||
"k8s.io/klog/v2" | ||
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" | ||
|
||
"google.golang.org/grpc" | ||
"google.golang.org/grpc/credentials" | ||
) | ||
|
||
const gRPCTimeout = 5 * time.Second | ||
|
||
type grpcclientstrategy struct { | ||
grpcClient protos.ExpanderClient | ||
} | ||
|
||
// NewFilter returns an expansion filter that creates a gRPC client, and calls out to a gRPC server | ||
func NewFilter(expanderCert string, expanderUrl string) expander.Filter { | ||
client := createGRPCClient(expanderCert, expanderUrl) | ||
if client == nil { | ||
return &grpcclientstrategy{grpcClient: nil} | ||
} | ||
return &grpcclientstrategy{grpcClient: client} | ||
} | ||
|
||
func createGRPCClient(expanderCert string, expanderUrl string) protos.ExpanderClient { | ||
var dialOpt grpc.DialOption | ||
|
||
if expanderCert == "" { | ||
log.Fatalf("GRPC Expander Cert not specified, insecure connections not allowed") | ||
return nil | ||
} | ||
creds, err := credentials.NewClientTLSFromFile(expanderCert, "") | ||
if err != nil { | ||
log.Fatalf("Failed to create TLS credentials %v", err) | ||
return nil | ||
} | ||
dialOpt = grpc.WithTransportCredentials(creds) | ||
klog.V(2).Infof("Dialing: %s with dialopt: %v", expanderUrl, dialOpt) | ||
conn, err := grpc.Dial(expanderUrl, dialOpt) | ||
if err != nil { | ||
log.Fatalf("Fail to dial server: %v", err) | ||
return nil | ||
} | ||
return protos.NewExpanderClient(conn) | ||
} | ||
|
||
func (g *grpcclientstrategy) BestOptions(expansionOptions []expander.Option, nodeInfo map[string]*schedulerframework.NodeInfo) []expander.Option { | ||
if g.grpcClient == nil { | ||
klog.Errorf("Incorrect gRPC client config, filtering no options") | ||
return expansionOptions | ||
} | ||
|
||
// Transform inputs to gRPC inputs | ||
grpcOptionsSlice, nodeGroupIDOptionMap := populateOptionsForGRPC(expansionOptions) | ||
grpcNodeMap := populateNodeInfoForGRPC(nodeInfo) | ||
|
||
// call gRPC server to get BestOption | ||
klog.V(2).Infof("GPRC call of best options to server with %v options", len(nodeGroupIDOptionMap)) | ||
ctx, cancel := context.WithTimeout(context.Background(), gRPCTimeout) | ||
defer cancel() | ||
bestOptionsResponse, err := g.grpcClient.BestOptions(ctx, &protos.BestOptionsRequest{Options: grpcOptionsSlice, NodeMap: grpcNodeMap}) | ||
if err != nil { | ||
klog.V(4).Info("GRPC call timed out, no options filtered") | ||
return expansionOptions | ||
} | ||
|
||
if bestOptionsResponse == nil || bestOptionsResponse.Options == nil { | ||
klog.V(4).Info("GRPC returned nil bestOptions, no options filtered") | ||
return expansionOptions | ||
} | ||
// Transform back options slice | ||
options := transformAndSanitizeOptionsFromGRPC(bestOptionsResponse.Options, nodeGroupIDOptionMap) | ||
if options == nil { | ||
klog.V(4).Info("Unable to sanitize GPRC returned bestOptions, no options filtered") | ||
return expansionOptions | ||
} | ||
return options | ||
} | ||
|
||
// populateOptionsForGRPC creates a map of nodegroup ID and options, as well as a slice of Options objects for the gRPC call | ||
func populateOptionsForGRPC(expansionOptions []expander.Option) ([]*protos.Option, map[string]expander.Option) { | ||
grpcOptionsSlice := []*protos.Option{} | ||
nodeGroupIDOptionMap := make(map[string]expander.Option) | ||
for _, option := range expansionOptions { | ||
nodeGroupIDOptionMap[option.NodeGroup.Id()] = option | ||
grpcOptionsSlice = append(grpcOptionsSlice, newOptionMessage(option.NodeGroup.Id(), int32(option.NodeCount), option.Debug, option.Pods)) | ||
} | ||
return grpcOptionsSlice, nodeGroupIDOptionMap | ||
} | ||
|
||
// populateNodeInfoForGRPC looks at the corresponding v1.Node object per NodeInfo object, and populates the grpcNodeInfoMap with these to pass over grpc | ||
func populateNodeInfoForGRPC(nodeInfos map[string]*schedulerframework.NodeInfo) map[string]*v1.Node { | ||
grpcNodeInfoMap := make(map[string]*v1.Node) | ||
for nodeId, nodeInfo := range nodeInfos { | ||
grpcNodeInfoMap[nodeId] = nodeInfo.Node() | ||
} | ||
return grpcNodeInfoMap | ||
} | ||
|
||
func transformAndSanitizeOptionsFromGRPC(bestOptionsResponseOptions []*protos.Option, nodeGroupIDOptionMap map[string]expander.Option) []expander.Option { | ||
var options []expander.Option | ||
for _, option := range bestOptionsResponseOptions { | ||
if option == nil { | ||
klog.Errorf("GRPC server returned nil Option") | ||
continue | ||
} | ||
if _, ok := nodeGroupIDOptionMap[option.NodeGroupId]; ok { | ||
options = append(options, nodeGroupIDOptionMap[option.NodeGroupId]) | ||
} else { | ||
klog.Errorf("GRPC server returned invalid nodeGroup ID: ", option.NodeGroupId) | ||
continue | ||
} | ||
} | ||
return options | ||
} | ||
|
||
func newOptionMessage(nodeGroupId string, nodeCount int32, debug string, pods []*v1.Pod) *protos.Option { | ||
return &protos.Option{NodeGroupId: nodeGroupId, NodeCount: nodeCount, Debug: debug, Pod: pods} | ||
} |
Oops, something went wrong.