-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cheap and cheerful autoscaler #229
Changes from 60 commits
520284f
0ef9a10
6373b51
09ca18e
aef729a
ea33034
847c907
cf94ccb
5cc75a3
3025926
8af0392
0eca4d4
aaf102c
eea535b
fec0ce1
1861bc7
f2ff89f
2e736bb
8f776d5
81ed376
98beef3
ef41614
45861ae
b8eb904
81091e8
812585d
7a82f11
26b5893
76ecac5
023abbb
185848a
6951abf
f2da749
c686fc5
0d1fa95
eb6cabf
94c5033
1cf32bc
582ba29
fdfaf11
dc4256a
831490a
59dd1d4
3cfa284
a4c01e1
e72087b
0b75d97
5adb66d
9ecea8d
13dbf7c
829bfe8
b1c8f1a
3567398
51eeea1
28f1f1b
e01bf7e
a5f8b7d
dd47fa0
0912ee4
65d1c4f
aa9de12
0b22485
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") | ||
|
||
go_library( | ||
name = "go_default_library", | ||
srcs = ["main.go"], | ||
importpath = "github.com/google/elafros/cmd/ela-autoscaler", | ||
visibility = ["//visibility:private"], | ||
deps = [ | ||
"//pkg/autoscaler:go_default_library", | ||
"//vendor/github.com/golang/glog:go_default_library", | ||
"//vendor/github.com/gorilla/websocket:go_default_library", | ||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", | ||
"//vendor/k8s.io/client-go/kubernetes:go_default_library", | ||
"//vendor/k8s.io/client-go/rest:go_default_library", | ||
], | ||
) | ||
|
||
go_binary( | ||
name = "ela-autoscaler", | ||
embed = [":go_default_library"], | ||
importpath = "github.com/google/elafros/cmd/ela-autoscaler", | ||
pure = "on", | ||
visibility = ["//visibility:public"], | ||
) | ||
|
||
load("@io_bazel_rules_docker//go:image.bzl", "go_image") | ||
|
||
go_image( | ||
name = "image", | ||
binary = ":ela-autoscaler", | ||
visibility = ["//visibility:public"], | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
/* | ||
Copyright 2018 Google Inc. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package main | ||
|
||
import ( | ||
"bytes" | ||
"encoding/gob" | ||
"net/http" | ||
"os" | ||
"time" | ||
|
||
ela_autoscaler "github.com/google/elafros/pkg/autoscaler" | ||
|
||
"github.com/golang/glog" | ||
"github.com/gorilla/websocket" | ||
|
||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/client-go/kubernetes" | ||
"k8s.io/client-go/rest" | ||
) | ||
|
||
const ( | ||
// The desired number of concurrent requests for each pod. This | ||
// is the primary knob for the fast autoscaler which will try | ||
// achieve a 60-second average concurrency per pod of | ||
// targetConcurrency. Another process may tune targetConcurrency | ||
// to best handle the resource requirements of the revision. | ||
targetConcurrency = float64(1.0) | ||
|
||
// A big enough buffer to handle 1000 pods sending stats every 1 | ||
// second while we do the autoscaling computation (a few hundred | ||
// milliseconds). | ||
statBufferSize = 1000 | ||
|
||
// Enough buffer to store scale requests generated every 2 | ||
// seconds while an http request is taking the full timeout of 5 | ||
// second. | ||
scaleBufferSize = 10 | ||
) | ||
|
||
var ( | ||
upgrader = websocket.Upgrader{} | ||
kubeClient *kubernetes.Clientset | ||
statChan = make(chan ela_autoscaler.Stat, statBufferSize) | ||
scaleChan = make(chan int32, scaleBufferSize) | ||
elaNamespace string | ||
elaDeployment string | ||
elaAutoscalerPort string | ||
) | ||
|
||
func init() { | ||
elaNamespace = os.Getenv("ELA_NAMESPACE") | ||
if elaNamespace == "" { | ||
glog.Fatal("No ELA_NAMESPACE provided.") | ||
} | ||
glog.Infof("ELA_NAMESPACE=%v", elaNamespace) | ||
|
||
elaDeployment = os.Getenv("ELA_DEPLOYMENT") | ||
if elaDeployment == "" { | ||
glog.Fatal("No ELA_DEPLOYMENT provided.") | ||
} | ||
glog.Infof("ELA_DEPLOYMENT=%v", elaDeployment) | ||
|
||
elaAutoscalerPort = os.Getenv("ELA_AUTOSCALER_PORT") | ||
if elaAutoscalerPort == "" { | ||
glog.Fatal("No ELA_AUTOSCALER_PORT provided.") | ||
} | ||
glog.Infof("ELA_AUTOSCALER_PORT=%v", elaAutoscalerPort) | ||
} | ||
|
||
func autoscaler() { | ||
glog.Infof("Target concurrency: %0.2f.", targetConcurrency) | ||
|
||
a := ela_autoscaler.NewAutoscaler(targetConcurrency) | ||
ticker := time.NewTicker(2 * time.Second) | ||
|
||
for { | ||
select { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems these two cases are independent. If both are ready, we should do both instead of choosing one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Autoscaler is not safe for concurrent access, so we only do one of these at a time. Not a big deal since the Scale computation is pretty fast, even with 1000's of pods. And the stat channel is buffered. |
||
case <-ticker.C: | ||
scale, ok := a.Scale(time.Now()) | ||
if ok { | ||
scaleChan <- scale | ||
} | ||
case s := <-statChan: | ||
a.Record(s) | ||
} | ||
} | ||
} | ||
|
||
func scaleSerializer() { | ||
for { | ||
select { | ||
case desiredPodCount := <-scaleChan: | ||
FastForward: | ||
// Fast forward to the most recent desired pod | ||
// count since the http timeout (5 sec) is more | ||
// than the autoscaling rate (2 sec) and there | ||
// could be multiple pending scale requests. | ||
for { | ||
select { | ||
case p := <-scaleChan: | ||
glog.Warning("Scaling is not keeping up with autoscaling requests.") | ||
desiredPodCount = p | ||
default: | ||
break FastForward | ||
} | ||
} | ||
scaleTo(desiredPodCount) | ||
} | ||
} | ||
} | ||
|
||
func scaleTo(podCount int32) { | ||
glog.Infof("Target scale is %v", podCount) | ||
dc := kubeClient.ExtensionsV1beta1().Deployments(elaNamespace) | ||
deployment, err := dc.Get(elaDeployment, metav1.GetOptions{}) | ||
if err != nil { | ||
glog.Error("Error getting Deployment %q: %s", elaDeployment, err) | ||
return | ||
} | ||
if *deployment.Spec.Replicas == podCount { | ||
glog.Info("Already at scale.") | ||
return | ||
} | ||
deployment.Spec.Replicas = &podCount | ||
_, err = dc.Update(deployment) | ||
if err != nil { | ||
glog.Errorf("Error updating Deployment %q: %s", elaDeployment, err) | ||
} | ||
glog.Info("Successfully scaled.") | ||
} | ||
|
||
func handler(w http.ResponseWriter, r *http.Request) { | ||
conn, err := upgrader.Upgrade(w, r, nil) | ||
if err != nil { | ||
glog.Error(err) | ||
return | ||
} | ||
glog.Info("New metrics source online.") | ||
for { | ||
messageType, msg, err := conn.ReadMessage() | ||
if err != nil { | ||
glog.Info("Metrics source dropping off.") | ||
return | ||
} | ||
if messageType != websocket.BinaryMessage { | ||
glog.Error("Dropping non-binary message.") | ||
continue | ||
} | ||
dec := gob.NewDecoder(bytes.NewBuffer(msg)) | ||
var stat ela_autoscaler.Stat | ||
err = dec.Decode(&stat) | ||
if err != nil { | ||
glog.Error(err) | ||
continue | ||
} | ||
statChan <- stat | ||
} | ||
} | ||
|
||
func main() { | ||
glog.Info("Autoscaler up") | ||
config, err := rest.InClusterConfig() | ||
if err != nil { | ||
glog.Fatal(err) | ||
} | ||
config.Timeout = time.Duration(5 * time.Second) | ||
kc, err := kubernetes.NewForConfig(config) | ||
if err != nil { | ||
glog.Fatal(err) | ||
} | ||
kubeClient = kc | ||
go autoscaler() | ||
go scaleSerializer() | ||
http.HandleFunc("/", handler) | ||
http.ListenAndServe(":"+elaAutoscalerPort, nil) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") | ||
|
||
go_library( | ||
name = "go_default_library", | ||
srcs = ["main.go"], | ||
importpath = "github.com/google/elafros/cmd/ela-queue", | ||
visibility = ["//visibility:private"], | ||
deps = [ | ||
"//pkg/autoscaler:go_default_library", | ||
"//vendor/github.com/golang/glog:go_default_library", | ||
"//vendor/github.com/gorilla/websocket:go_default_library", | ||
"//vendor/k8s.io/api/core/v1:go_default_library", | ||
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", | ||
"//vendor/k8s.io/client-go/kubernetes:go_default_library", | ||
"//vendor/k8s.io/client-go/rest:go_default_library", | ||
], | ||
) | ||
|
||
go_binary( | ||
name = "ela-queue", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The purpose of the queue is changed to forwarding stats? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Um ... yes. It's a hack. The queue's original purpose was to support enforced request serialization, and it's still in the right place to do that. But I am using it to also count how many requests are in that queue. And not enforcing serialization. |
||
embed = [":go_default_library"], | ||
importpath = "github.com/google/elafros/cmd/ela-queue", | ||
pure = "on", | ||
visibility = ["//visibility:public"], | ||
) | ||
|
||
load("@io_bazel_rules_docker//go:image.bzl", "go_image") | ||
|
||
go_image( | ||
name = "image", | ||
binary = ":ela-queue", | ||
visibility = ["//visibility:public"], | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How deeply do we understand the capabilities the autoscaler needs right now? Can we just do this TODO?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the medium term, we want to collect metrics from Prometheus, in which case we can do away with this queue->autoscaler websocket pipeline and associated permissions. In the short term, we should turn the client-server relationship around and have the autoscaler scrape the pods (@evankanderson and @vaikas-google 's suggestion) which would also do away with the pod permission requirement. This is just to play around with and I plan to get rid of it. Will update to comment accordingly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the autoscaler collects metrics from Prometheus instead of pods directly, its reaction time is coupled to Prometheus' sampling interval. It would be more flexible to have the autoscaler scrape pods directly (using their Prometheus endpoints). Then it can decide its own sampling interval. Here are some examples of situations when the autoscaler might want to vary sampling frequency:
If the autoscaler does its own scraping, it still needs a ClusterRoleBinding with read permissions so it can enumerate the list of pods to target.
It's possible that Prometheus has an API the autoscaler can use to increase or decrease the sampling frequency of a particular tagged metric. That might be sufficient and we could avoid writing a bunch of scraping code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree. Maybe we will stick with scraping the pods if we can't get the Envroy->Mixer->Prometheus pipeline latency low enough.
Yes, the autoscaler will still need a role to find the pods. And to modify the deployement. The queue is also using this role binding and that should go away.